Merge branch 'for-5.5/logitech' into for-linus
authorJiri Kosina <jkosina@suse.cz>
Fri, 29 Nov 2019 19:37:55 +0000 (20:37 +0100)
committerJiri Kosina <jkosina@suse.cz>
Fri, 29 Nov 2019 19:37:55 +0000 (20:37 +0100)
- Support for Logitech G15 (Hans de Goede)
- silencing of non-informative error flow in dmesg from
  logitechi-hiddpp (Hans de Goede)

2852 files changed:
.mailmap
CREDITS
Documentation/ABI/testing/ima_policy
Documentation/ABI/testing/sysfs-class-backlight [new file with mode: 0644]
Documentation/ABI/testing/sysfs-class-watchdog
Documentation/ABI/testing/sysfs-kernel-slab
Documentation/admin-guide/cgroup-v1/memory.rst
Documentation/admin-guide/cgroup-v2.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/arm64/memory.rst
Documentation/arm64/silicon-errata.rst
Documentation/core-api/index.rst
Documentation/core-api/kernel-api.rst
Documentation/core-api/memory-allocation.rst
Documentation/core-api/symbol-namespaces.rst [new file with mode: 0644]
Documentation/dev-tools/kasan.rst
Documentation/dev-tools/kselftest.rst
Documentation/devicetree/bindings/arm/rockchip.yaml
Documentation/devicetree/bindings/dsp/fsl,dsp.yaml
Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt
Documentation/devicetree/bindings/i2c/i2c-emev2.txt [deleted file]
Documentation/devicetree/bindings/i2c/i2c-rcar.txt [deleted file]
Documentation/devicetree/bindings/i2c/i2c-riic.txt [deleted file]
Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt [deleted file]
Documentation/devicetree/bindings/i2c/renesas,i2c.txt [new file with mode: 0644]
Documentation/devicetree/bindings/i2c/renesas,iic-emev2.txt [new file with mode: 0644]
Documentation/devicetree/bindings/i2c/renesas,iic.txt [new file with mode: 0644]
Documentation/devicetree/bindings/i2c/renesas,riic.txt [new file with mode: 0644]
Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml
Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt
Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
Documentation/devicetree/bindings/media/rc.yaml
Documentation/devicetree/bindings/mfd/mt6397.txt
Documentation/devicetree/bindings/mfd/rn5t618.txt
Documentation/devicetree/bindings/net/adi,adin.yaml
Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
Documentation/devicetree/bindings/net/renesas,ravb.txt
Documentation/devicetree/bindings/net/snps,dwmac.yaml
Documentation/devicetree/bindings/pci/designware-pcie.txt
Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
Documentation/devicetree/bindings/pci/mediatek-pcie.txt
Documentation/devicetree/bindings/pci/nvidia,tegra194-pcie.txt [new file with mode: 0644]
Documentation/devicetree/bindings/pci/pci-armada8k.txt
Documentation/devicetree/bindings/pci/pci.txt
Documentation/devicetree/bindings/pci/pcie-al.txt [new file with mode: 0644]
Documentation/devicetree/bindings/phy/lantiq,vrx200-pcie-phy.yaml
Documentation/devicetree/bindings/phy/phy-tegra194-p2u.txt [new file with mode: 0644]
Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
Documentation/devicetree/bindings/power/reset/mt6323-poweroff.txt [new file with mode: 0644]
Documentation/devicetree/bindings/pwm/pwm-mediatek.txt
Documentation/devicetree/bindings/pwm/pwm-sprd.txt [new file with mode: 0644]
Documentation/devicetree/bindings/regulator/fixed-regulator.yaml
Documentation/devicetree/bindings/riscv/cpus.yaml
Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
Documentation/devicetree/bindings/thermal/qoriq-thermal.txt
Documentation/devicetree/bindings/usb/amlogic,dwc3.txt
Documentation/devicetree/bindings/usb/generic-ehci.yaml
Documentation/devicetree/bindings/usb/generic-ohci.yaml
Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.txt
Documentation/devicetree/bindings/usb/mediatek,mtu3.txt
Documentation/devicetree/bindings/usb/usb-hcd.yaml
Documentation/devicetree/bindings/usb/usb-uhci.txt
Documentation/devicetree/bindings/usb/usb-xhci.txt
Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt
Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.txt [new file with mode: 0644]
Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt [deleted file]
Documentation/devicetree/bindings/watchdog/watchdog.yaml [new file with mode: 0644]
Documentation/filesystems/ceph.txt
Documentation/filesystems/index.rst
Documentation/filesystems/virtiofs.rst [new file with mode: 0644]
Documentation/hwmon/index.rst
Documentation/hwmon/inspur-ipsps1.rst
Documentation/hwmon/k10temp.rst
Documentation/kbuild/makefiles.rst
Documentation/kbuild/modules.rst
Documentation/kbuild/namespaces.rst [deleted file]
Documentation/kbuild/reproducible-builds.rst
Documentation/networking/device_drivers/index.rst
Documentation/networking/device_drivers/intel/e100.rst
Documentation/networking/device_drivers/intel/e1000.rst
Documentation/networking/device_drivers/intel/e1000e.rst
Documentation/networking/device_drivers/intel/fm10k.rst
Documentation/networking/device_drivers/intel/i40e.rst
Documentation/networking/device_drivers/intel/iavf.rst
Documentation/networking/device_drivers/intel/ice.rst
Documentation/networking/device_drivers/intel/igb.rst
Documentation/networking/device_drivers/intel/igbvf.rst
Documentation/networking/device_drivers/intel/ixgbe.rst
Documentation/networking/device_drivers/intel/ixgbevf.rst
Documentation/networking/device_drivers/pensando/ionic.rst
Documentation/networking/devlink-trap.rst
Documentation/networking/ip-sysctl.txt
Documentation/networking/j1939.rst
Documentation/networking/net_dim.txt
Documentation/process/coding-style.rst
Documentation/process/deprecated.rst
Documentation/process/embargoed-hardware-issues.rst
Documentation/security/IMA-templates.rst
Documentation/usb/rio.rst [deleted file]
Documentation/virt/kvm/api.txt
Documentation/vm/split_page_table_lock.rst
Documentation/watchdog/watchdog-parameters.rst
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/include/asm/pgalloc.h
arch/alpha/include/asm/pgtable.h
arch/alpha/include/uapi/asm/mman.h
arch/arc/boot/dts/hsdk.dts
arch/arc/configs/hsdk_defconfig
arch/arc/include/asm/pgalloc.h
arch/arc/include/asm/pgtable.h
arch/arc/kernel/perf_event.c
arch/arm/Kconfig
arch/arm/boot/dts/am335x-icev2.dts
arch/arm/boot/dts/am33xx-l4.dtsi
arch/arm/boot/dts/am3517-evm.dts
arch/arm/boot/dts/am3874-iceboard.dts
arch/arm/boot/dts/am4372.dtsi
arch/arm/boot/dts/bcm2835-rpi-zero-w.dts
arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi
arch/arm/boot/dts/dra7-l4.dtsi
arch/arm/boot/dts/imx6-logicpd-som.dtsi
arch/arm/boot/dts/imx7s.dtsi
arch/arm/boot/dts/logicpd-som-lv.dtsi
arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi
arch/arm/boot/dts/logicpd-torpedo-som.dtsi
arch/arm/boot/dts/ls1021a.dtsi
arch/arm/boot/dts/mt7629-rfb.dts
arch/arm/boot/dts/mt7629.dtsi
arch/arm/boot/dts/omap3-gta04.dtsi
arch/arm/boot/dts/omap4-droid4-xt894.dts
arch/arm/boot/dts/omap4-panda-common.dtsi
arch/arm/boot/dts/omap4-sdp.dts
arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi
arch/arm/boot/dts/omap5-board-common.dtsi
arch/arm/boot/dts/omap54xx-clocks.dtsi
arch/arm/boot/dts/ste-dbx5x0.dtsi
arch/arm/boot/dts/stm32mp157-pinctrl.dtsi
arch/arm/boot/dts/sun4i-a10.dtsi
arch/arm/boot/dts/sun5i.dtsi
arch/arm/boot/dts/sun6i-a31.dtsi
arch/arm/boot/dts/sun7i-a20.dtsi
arch/arm/boot/dts/sun8i-a23-a33.dtsi
arch/arm/boot/dts/sun8i-a83t.dtsi
arch/arm/boot/dts/sun8i-r40.dtsi
arch/arm/boot/dts/sun9i-a80.dtsi
arch/arm/boot/dts/sunxi-h3-h5.dtsi
arch/arm/boot/dts/vf610-zii-scu4-aib.dts
arch/arm/configs/badge4_defconfig
arch/arm/configs/corgi_defconfig
arch/arm/configs/davinci_all_defconfig
arch/arm/configs/imx_v6_v7_defconfig
arch/arm/configs/multi_v7_defconfig
arch/arm/configs/omap2plus_defconfig
arch/arm/configs/pxa_defconfig
arch/arm/configs/s3c2410_defconfig
arch/arm/configs/spitz_defconfig
arch/arm/crypto/Kconfig
arch/arm/crypto/aes-ce-core.S
arch/arm/include/asm/domain.h
arch/arm/include/asm/pgalloc.h
arch/arm/include/asm/pgtable-nommu.h
arch/arm/include/asm/pgtable.h
arch/arm/include/asm/processor.h
arch/arm/include/asm/tlb.h
arch/arm/include/asm/uaccess.h
arch/arm/include/asm/xen/xen-ops.h [deleted file]
arch/arm/kernel/head-common.S
arch/arm/kernel/head-nommu.S
arch/arm/kernel/process.c
arch/arm/mach-aspeed/Kconfig
arch/arm/mach-davinci/dm365.c
arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
arch/arm/mach-omap2/omap_hwmod_33xx_data.c
arch/arm/mach-omap2/pdata-quirks.c
arch/arm/mach-omap2/pm.c
arch/arm/mm/alignment.c
arch/arm/mm/flush.c
arch/arm/mm/mmap.c
arch/arm/mm/mmu.c
arch/arm/mm/proc-v7m.S
arch/arm/xen/Makefile
arch/arm/xen/efi.c [deleted file]
arch/arm/xen/enlighten.c
arch/arm/xen/mm.c
arch/arm64/Kconfig
arch/arm64/Makefile
arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts
arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi
arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi
arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi
arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi
arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
arch/arm64/boot/dts/freescale/imx8mm.dtsi
arch/arm64/boot/dts/freescale/imx8mn.dtsi
arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
arch/arm64/boot/dts/freescale/imx8mq.dtsi
arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts
arch/arm64/boot/dts/nvidia/tegra194.dtsi
arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts
arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts
arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts
arch/arm64/configs/defconfig
arch/arm64/include/asm/asm-uaccess.h
arch/arm64/include/asm/atomic_lse.h
arch/arm64/include/asm/cpucaps.h
arch/arm64/include/asm/cputype.h
arch/arm64/include/asm/kvm_hyp.h
arch/arm64/include/asm/memory.h
arch/arm64/include/asm/pgalloc.h
arch/arm64/include/asm/pgtable-prot.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/tlb.h
arch/arm64/include/asm/vdso/compat_barrier.h
arch/arm64/include/asm/vdso_datapage.h [deleted file]
arch/arm64/include/asm/xen/xen-ops.h [deleted file]
arch/arm64/kernel/armv8_deprecated.c
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/entry.S
arch/arm64/kernel/ftrace.c
arch/arm64/kernel/hibernate.c
arch/arm64/kernel/process.c
arch/arm64/kernel/vdso/gettimeofday.S [deleted file]
arch/arm64/kernel/vdso32/Makefile
arch/arm64/kvm/hyp/switch.c
arch/arm64/kvm/hyp/tlb.c
arch/arm64/kvm/sys_regs.c
arch/arm64/mm/fault.c
arch/arm64/mm/flush.c
arch/arm64/mm/mmap.c
arch/arm64/mm/mmu.c
arch/arm64/mm/pgd.c
arch/arm64/xen/Makefile
arch/c6x/include/asm/pgtable.h
arch/csky/abiv1/alignment.c
arch/csky/abiv1/cacheflush.c
arch/csky/abiv1/inc/abi/cacheflush.h
arch/csky/abiv1/inc/abi/page.h
arch/csky/abiv1/mmap.c
arch/csky/include/asm/barrier.h
arch/csky/include/asm/cache.h
arch/csky/include/asm/io.h
arch/csky/include/asm/pgalloc.h
arch/csky/include/asm/pgtable.h
arch/csky/kernel/entry.S
arch/csky/kernel/perf_event.c
arch/csky/kernel/process.c
arch/csky/mm/cachev1.c
arch/csky/mm/cachev2.c
arch/csky/mm/dma-mapping.c
arch/csky/mm/init.c
arch/csky/mm/ioremap.c
arch/h8300/include/asm/pgtable.h
arch/hexagon/include/asm/pgalloc.h
arch/hexagon/include/asm/pgtable.h
arch/hexagon/mm/Makefile
arch/hexagon/mm/init.c
arch/hexagon/mm/pgalloc.c [deleted file]
arch/ia64/Kconfig
arch/ia64/include/asm/pgalloc.h
arch/ia64/include/asm/pgtable.h
arch/ia64/kernel/irq_ia64.c
arch/ia64/mm/contig.c
arch/ia64/mm/discontig.c
arch/ia64/mm/init.c
arch/m68k/include/asm/mcf_pgalloc.h
arch/m68k/include/asm/motorola_pgalloc.h
arch/m68k/include/asm/pgtable_mm.h
arch/m68k/include/asm/pgtable_no.h
arch/m68k/include/asm/sun3_pgalloc.h
arch/microblaze/Kconfig
arch/microblaze/boot/dts/system.dts
arch/microblaze/configs/mmu_defconfig
arch/microblaze/configs/nommu_defconfig
arch/microblaze/include/asm/io.h
arch/microblaze/include/asm/pci.h
arch/microblaze/include/asm/pgalloc.h
arch/microblaze/include/asm/pgtable.h
arch/microblaze/include/asm/uaccess.h
arch/microblaze/kernel/reset.c
arch/microblaze/mm/consistent.c
arch/microblaze/mm/pgtable.c
arch/mips/Kconfig
arch/mips/bcm63xx/prom.c
arch/mips/boot/dts/qca/ar9331.dtsi
arch/mips/configs/mtx1_defconfig
arch/mips/configs/rm200_defconfig
arch/mips/fw/arc/memory.c
arch/mips/fw/sni/sniprom.c
arch/mips/include/asm/bmips.h
arch/mips/include/asm/cmpxchg.h
arch/mips/include/asm/octeon/cvmx-ipd.h
arch/mips/include/asm/pci.h
arch/mips/include/asm/pgalloc.h
arch/mips/include/asm/pgtable.h
arch/mips/include/asm/processor.h
arch/mips/include/asm/unistd.h
arch/mips/include/asm/vdso/gettimeofday.h
arch/mips/include/uapi/asm/hwcap.h
arch/mips/include/uapi/asm/mman.h
arch/mips/kernel/cpu-bugs64.c
arch/mips/kernel/cpu-probe.c
arch/mips/kernel/setup.c
arch/mips/kernel/smp-bmips.c
arch/mips/kernel/syscall.c
arch/mips/kernel/syscalls/syscall_n32.tbl
arch/mips/kernel/syscalls/syscall_n64.tbl
arch/mips/kernel/syscalls/syscall_o32.tbl
arch/mips/loongson64/Platform
arch/mips/loongson64/common/mem.c
arch/mips/loongson64/common/serial.c
arch/mips/loongson64/loongson-3/numa.c
arch/mips/mm/mmap.c
arch/mips/mm/tlbex.c
arch/mips/pmcs-msp71xx/msp_prom.c
arch/mips/vdso/Makefile
arch/mips/vdso/gettimeofday.c [deleted file]
arch/nds32/include/asm/pgalloc.h
arch/nds32/include/asm/pgtable.h
arch/nios2/include/asm/pgalloc.h
arch/nios2/include/asm/pgtable.h
arch/nios2/kernel/setup.c
arch/openrisc/include/asm/pgalloc.h
arch/openrisc/include/asm/pgtable.h
arch/parisc/include/asm/cache.h
arch/parisc/include/asm/ldcw.h
arch/parisc/include/asm/pgalloc.h
arch/parisc/include/asm/pgtable.h
arch/parisc/include/uapi/asm/mman.h
arch/parisc/kernel/entry.S
arch/parisc/mm/ioremap.c
arch/powerpc/boot/Makefile
arch/powerpc/include/asm/book3s/32/kup.h
arch/powerpc/include/asm/book3s/64/radix.h
arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
arch/powerpc/include/asm/cputable.h
arch/powerpc/include/asm/elf.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/pci.h
arch/powerpc/include/asm/pgalloc.h
arch/powerpc/include/asm/pgtable.h
arch/powerpc/include/asm/reg.h
arch/powerpc/kernel/cpu_setup_power.S
arch/powerpc/kernel/dbell.c
arch/powerpc/kernel/dt_cpu_ftrs.c
arch/powerpc/kernel/eeh.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/prom_init_check.sh
arch/powerpc/kvm/book3s.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_nested.c
arch/powerpc/kvm/book3s_hv_rm_mmu.c
arch/powerpc/kvm/book3s_hv_rm_xics.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_xive.c
arch/powerpc/kvm/book3s_xive.h
arch/powerpc/kvm/book3s_xive_native.c
arch/powerpc/mm/book3s64/hash_native.c
arch/powerpc/mm/book3s64/hash_pgtable.c
arch/powerpc/mm/book3s64/hash_utils.c
arch/powerpc/mm/book3s64/iommu_api.c
arch/powerpc/mm/book3s64/mmu_context.c
arch/powerpc/mm/book3s64/radix_pgtable.c
arch/powerpc/mm/book3s64/radix_tlb.c
arch/powerpc/mm/hugetlbpage.c
arch/powerpc/mm/init_64.c
arch/powerpc/mm/kasan/kasan_init_32.c
arch/powerpc/mm/pgtable-frag.c
arch/powerpc/platforms/cell/spufs/inode.c
arch/powerpc/platforms/powernv/eeh-powernv.c
arch/powerpc/platforms/powernv/smp.c
arch/powerpc/platforms/pseries/lpar.c
arch/powerpc/platforms/pseries/papr_scm.c
arch/powerpc/platforms/pseries/pseries.h
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/sysdev/xics/icp-native.c
arch/powerpc/sysdev/xics/icp-opal.c
arch/riscv/Kconfig
arch/riscv/boot/dts/sifive/fu540-c000.dtsi
arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
arch/riscv/configs/defconfig
arch/riscv/configs/rv32_defconfig
arch/riscv/include/asm/asm.h
arch/riscv/include/asm/bug.h
arch/riscv/include/asm/io.h
arch/riscv/include/asm/irq.h
arch/riscv/include/asm/pgalloc.h
arch/riscv/include/asm/pgtable.h
arch/riscv/include/asm/switch_to.h
arch/riscv/include/asm/tlbflush.h
arch/riscv/kernel/cpufeature.c
arch/riscv/kernel/entry.S
arch/riscv/kernel/head.S
arch/riscv/kernel/head.h [new file with mode: 0644]
arch/riscv/kernel/irq.c
arch/riscv/kernel/module-sections.c
arch/riscv/kernel/process.c
arch/riscv/kernel/ptrace.c
arch/riscv/kernel/reset.c
arch/riscv/kernel/setup.c
arch/riscv/kernel/signal.c
arch/riscv/kernel/smp.c
arch/riscv/kernel/smpboot.c
arch/riscv/kernel/syscall_table.c
arch/riscv/kernel/time.c
arch/riscv/kernel/traps.c
arch/riscv/kernel/vdso.c
arch/riscv/mm/context.c
arch/riscv/mm/fault.c
arch/riscv/mm/init.c
arch/riscv/mm/sifive_l2_cache.c
arch/s390/Kconfig
arch/s390/boot/startup.c
arch/s390/configs/debug_defconfig
arch/s390/configs/defconfig
arch/s390/configs/zfcpdump_defconfig
arch/s390/hypfs/inode.c
arch/s390/include/asm/atomic_ops.h
arch/s390/include/asm/bitops.h
arch/s390/include/asm/cpacf.h
arch/s390/include/asm/cpu_mf.h
arch/s390/include/asm/hugetlb.h
arch/s390/include/asm/jump_label.h
arch/s390/include/asm/perf_event.h
arch/s390/include/asm/pgtable.h
arch/s390/include/asm/qdio.h
arch/s390/include/asm/uaccess.h
arch/s390/include/asm/unwind.h
arch/s390/include/uapi/asm/zcrypt.h
arch/s390/kernel/idle.c
arch/s390/kernel/kexec_elf.c
arch/s390/kernel/kexec_image.c
arch/s390/kernel/machine_kexec_file.c
arch/s390/kernel/machine_kexec_reloc.c
arch/s390/kernel/perf_cpum_cf_diag.c
arch/s390/kernel/perf_cpum_sf.c
arch/s390/kernel/topology.c
arch/s390/kernel/unwind_bc.c
arch/s390/kvm/kvm-s390.c
arch/s390/mm/cmm.c
arch/s390/mm/pgalloc.c
arch/s390/pci/pci_clp.c
arch/sh/include/asm/pgalloc.h
arch/sh/include/asm/pgtable.h
arch/sh/mm/Kconfig
arch/sh/mm/nommu.c
arch/sparc/Kconfig
arch/sparc/include/asm/pci.h
arch/sparc/include/asm/pgalloc_32.h
arch/sparc/include/asm/pgalloc_64.h
arch/sparc/include/asm/pgtable_32.h
arch/sparc/include/asm/pgtable_64.h
arch/sparc/mm/init_32.c
arch/sparc/mm/init_64.c
arch/sparc/mm/srmmu.c
arch/um/drivers/ubd_kern.c
arch/um/include/asm/pgalloc.h
arch/um/include/asm/pgtable.h
arch/unicore32/include/asm/pgalloc.h
arch/unicore32/include/asm/pgtable.h
arch/unicore32/include/asm/tlb.h
arch/x86/Kconfig
arch/x86/boot/compressed/acpi.c
arch/x86/boot/compressed/eboot.c
arch/x86/boot/compressed/misc.c
arch/x86/events/amd/core.c
arch/x86/events/amd/ibs.c
arch/x86/events/intel/core.c
arch/x86/events/intel/cstate.c
arch/x86/events/intel/pt.c
arch/x86/events/intel/uncore.c
arch/x86/events/intel/uncore.h
arch/x86/events/msr.c
arch/x86/hyperv/hv_apic.c
arch/x86/include/asm/acpi.h
arch/x86/include/asm/cpu_entry_area.h
arch/x86/include/asm/hyperv-tlfs.h
arch/x86/include/asm/intel-family.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/mwait.h
arch/x86/include/asm/pgtable_32.h
arch/x86/include/asm/pgtable_64.h
arch/x86/include/asm/pti.h
arch/x86/include/asm/svm.h
arch/x86/include/asm/uaccess.h
arch/x86/include/asm/vmware.h
arch/x86/include/asm/vmx.h
arch/x86/include/asm/x86_init.h
arch/x86/include/uapi/asm/svm.h
arch/x86/include/uapi/asm/vmx.h
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/apic/x2apic_cluster.c
arch/x86/kernel/cpu/mshyperv.c
arch/x86/kernel/cpu/umwait.c
arch/x86/kernel/cpu/vmware.c
arch/x86/kernel/head64.c
arch/x86/kernel/ima_arch.c
arch/x86/kernel/ioport.c
arch/x86/kernel/kexec-bzimage64.c
arch/x86/kernel/msr.c
arch/x86/kernel/process.h
arch/x86/kernel/x86_init.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/lapic.c
arch/x86/kvm/lapic.h
arch/x86/kvm/mmu.c
arch/x86/kvm/mmutrace.h
arch/x86/kvm/svm.c
arch/x86/kvm/vmx/capabilities.h
arch/x86/kvm/vmx/evmcs.h
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/nested.h
arch/x86/kvm/vmx/ops.h
arch/x86/kvm/vmx/pmu_intel.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
arch/x86/lib/delay.c
arch/x86/mm/pat_rbtree.c
arch/x86/mm/pgtable.c
arch/x86/mm/testmmiotrace.c
arch/x86/platform/efi/efi.c
arch/x86/purgatory/Makefile
arch/x86/xen/efi.c
arch/x86/xen/enlighten.c
arch/x86/xen/enlighten_pv.c
arch/xtensa/boot/dts/virt.dts
arch/xtensa/include/asm/bitops.h
arch/xtensa/include/asm/pgalloc.h
arch/xtensa/include/asm/pgtable.h
arch/xtensa/include/asm/tlbflush.h
arch/xtensa/include/asm/uaccess.h
arch/xtensa/include/uapi/asm/mman.h
arch/xtensa/kernel/xtensa_ksyms.c
block/bfq-iosched.c
block/blk-cgroup.c
block/blk-core.c
block/blk-flush.c
block/blk-integrity.c
block/blk-iocost.c
block/blk-mq-sched.c
block/blk-mq.c
block/blk-rq-qos.c
block/blk-rq-qos.h
block/blk-sysfs.c
block/blk-wbt.c
block/blk.h
block/bsg-lib.c
block/elevator.c
block/sed-opal.c
block/t10-pi.c
certs/system_keyring.c
crypto/asymmetric_keys/pkcs7_verify.c
crypto/asymmetric_keys/verify_pefile.c
drivers/acpi/acpi_apd.c
drivers/acpi/cppc_acpi.c
drivers/acpi/custom_method.c
drivers/acpi/hmat/hmat.c
drivers/acpi/nfit/core.c
drivers/acpi/osl.c
drivers/acpi/pci_root.c
drivers/acpi/processor_driver.c
drivers/acpi/processor_perflib.c
drivers/acpi/processor_thermal.c
drivers/acpi/sleep.c
drivers/acpi/tables.c
drivers/amba/bus.c
drivers/android/binder.c
drivers/android/binder_alloc.c
drivers/android/binder_internal.h
drivers/ata/ahci.c
drivers/ata/libahci_platform.c
drivers/ata/libata-scsi.c
drivers/atm/he.c
drivers/base/core.c
drivers/base/memory.c
drivers/base/node.c
drivers/base/platform.c
drivers/base/power/qos.c
drivers/block/drbd/drbd_interval.c
drivers/block/loop.c
drivers/block/nbd.c
drivers/block/null_blk_zoned.c
drivers/block/pktcdvd.c
drivers/block/rbd.c
drivers/block/zram/zram_drv.c
drivers/bus/ti-sysc.c
drivers/char/mem.c
drivers/char/random.c
drivers/char/tpm/tpm-interface.c
drivers/char/xillybus/xillybus_pcie.c
drivers/clk/ti/clk-7xx.c
drivers/clocksource/timer-of.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/intel_pstate.c
drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
drivers/cpuidle/cpuidle-haltpoll.c
drivers/crypto/chelsio/chtls/chtls_cm.c
drivers/crypto/chelsio/chtls/chtls_io.c
drivers/dma-buf/dma-resv.c
drivers/dma/imx-sdma.c
drivers/dma/qcom/bam_dma.c
drivers/dma/sprd-dma.c
drivers/dma/tegra210-adma.c
drivers/dma/ti/cppi41.c
drivers/dma/xilinx/xilinx_dma.c
drivers/edac/ghes_edac.c
drivers/firmware/arm_scmi/reset.c
drivers/firmware/dmi_scan.c
drivers/firmware/efi/Kconfig
drivers/firmware/efi/cper.c
drivers/firmware/efi/efi.c
drivers/firmware/efi/libstub/Makefile
drivers/firmware/efi/libstub/arm32-stub.c
drivers/firmware/efi/libstub/efi-stub-helper.c
drivers/firmware/efi/rci2-table.c
drivers/firmware/efi/test/efi_test.c
drivers/firmware/efi/tpm.c
drivers/firmware/google/vpd_decode.c
drivers/gpio/gpio-eic-sprd.c
drivers/gpio/gpio-intel-mid.c
drivers/gpio/gpio-lynxpoint.c
drivers/gpio/gpio-max77620.c
drivers/gpio/gpio-merrifield.c
drivers/gpio/gpio-mvebu.c
drivers/gpio/gpiolib-of.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/amd/amdgpu/Makefile
drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
drivers/gpu/drm/amd/amdgpu/nv.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
drivers/gpu/drm/amd/amdgpu/soc15.c
drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/dc/calcs/Makefile
drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
drivers/gpu/drm/amd/display/dc/core/dc.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
drivers/gpu/drm/amd/display/dc/core/dc_resource.c
drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
drivers/gpu/drm/amd/display/dc/dcn20/Makefile
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
drivers/gpu/drm/amd/display/dc/dcn21/Makefile
drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
drivers/gpu/drm/amd/display/dc/dml/Makefile
drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
drivers/gpu/drm/amd/display/dc/dsc/Makefile
drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.c
drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c
drivers/gpu/drm/amd/display/dc/inc/resource.h
drivers/gpu/drm/amd/display/include/dal_asic_id.h
drivers/gpu/drm/amd/include/renoir_ip_offset.h
drivers/gpu/drm/amd/powerplay/amd_powerplay.c
drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
drivers/gpu/drm/amd/powerplay/navi10_ppt.c
drivers/gpu/drm/amd/powerplay/renoir_ppt.c
drivers/gpu/drm/amd/powerplay/renoir_ppt.h
drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
drivers/gpu/drm/amd/powerplay/vega20_ppt.c
drivers/gpu/drm/arm/display/komeda/komeda_kms.c
drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c
drivers/gpu/drm/arm/malidp_mw.c
drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
drivers/gpu/drm/bridge/tc358767.c
drivers/gpu/drm/drm_atomic.c
drivers/gpu/drm/drm_atomic_helper.c
drivers/gpu/drm/drm_atomic_state_helper.c
drivers/gpu/drm/drm_atomic_uapi.c
drivers/gpu/drm/drm_drv.c
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/drm_ioctl.c
drivers/gpu/drm/drm_mode_object.c
drivers/gpu/drm/drm_self_refresh_helper.c
drivers/gpu/drm/drm_writeback.c
drivers/gpu/drm/etnaviv/etnaviv_dump.c
drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
drivers/gpu/drm/etnaviv/etnaviv_mmu.c
drivers/gpu/drm/i915/display/intel_bios.c
drivers/gpu/drm/i915/display/intel_display.c
drivers/gpu/drm/i915/display/intel_display.h
drivers/gpu/drm/i915/display/intel_dp.c
drivers/gpu/drm/i915/display/intel_dp.h
drivers/gpu/drm/i915/display/intel_dp_mst.c
drivers/gpu/drm/i915/display/intel_dpll_mgr.c
drivers/gpu/drm/i915/display/intel_dpll_mgr.h
drivers/gpu/drm/i915/display/intel_sprite.c
drivers/gpu/drm/i915/gem/i915_gem_mman.c
drivers/gpu/drm/i915/gem/i915_gem_object.h
drivers/gpu/drm/i915/gem/i915_gem_object_types.h
drivers/gpu/drm/i915/gem/i915_gem_pm.c
drivers/gpu/drm/i915/gem/i915_gem_userptr.c
drivers/gpu/drm/i915/gt/intel_engine.h
drivers/gpu/drm/i915/gt/intel_engine_cs.c
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/intel_reset.c
drivers/gpu/drm/i915/gt/intel_reset.h
drivers/gpu/drm/i915/gt/intel_ringbuffer.c
drivers/gpu/drm/i915/gt/intel_workarounds.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem.h
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/i915_request.h
drivers/gpu/drm/i915/intel_pch.c
drivers/gpu/drm/i915/intel_pch.h
drivers/gpu/drm/i915/selftests/i915_gem.c
drivers/gpu/drm/msm/dsi/dsi_host.c
drivers/gpu/drm/nouveau/dispnv50/wndw.c
drivers/gpu/drm/omapdrm/dss/dss.c
drivers/gpu/drm/panel/panel-lg-lb035q02.c
drivers/gpu/drm/panel/panel-nec-nl8048hl11.c
drivers/gpu/drm/panel/panel-sony-acx565akm.c
drivers/gpu/drm/panel/panel-tpo-td028ttec1.c
drivers/gpu/drm/panel/panel-tpo-td043mtea1.c
drivers/gpu/drm/panfrost/panfrost_devfreq.c
drivers/gpu/drm/panfrost/panfrost_device.c
drivers/gpu/drm/panfrost/panfrost_drv.c
drivers/gpu/drm/panfrost/panfrost_gpu.c
drivers/gpu/drm/panfrost/panfrost_job.c
drivers/gpu/drm/panfrost/panfrost_mmu.c
drivers/gpu/drm/panfrost/panfrost_perfcnt.c
drivers/gpu/drm/radeon/radeon_drv.c
drivers/gpu/drm/radeon/radeon_gem.c
drivers/gpu/drm/radeon/radeon_kms.c
drivers/gpu/drm/rcar-du/rcar_du_writeback.c
drivers/gpu/drm/rockchip/rockchip_drm_vop.c
drivers/gpu/drm/scheduler/sched_main.c
drivers/gpu/drm/tilcdc/tilcdc_tfp410.c
drivers/gpu/drm/tiny/Kconfig
drivers/gpu/drm/ttm/ttm_bo.c
drivers/gpu/drm/ttm/ttm_bo_vm.c
drivers/gpu/drm/v3d/v3d_gem.c
drivers/gpu/drm/vc4/vc4_txp.c
drivers/gpu/drm/via/via_dmablit.c
drivers/gpu/drm/xen/xen_drm_front.c
drivers/hid/hid-axff.c
drivers/hid/hid-core.c
drivers/hid/hid-dr.c
drivers/hid/hid-emsff.c
drivers/hid/hid-gaff.c
drivers/hid/hid-google-hammer.c
drivers/hid/hid-holtekff.c
drivers/hid/hid-hyperv.c
drivers/hid/hid-ids.h
drivers/hid/hid-lg2ff.c
drivers/hid/hid-lg3ff.c
drivers/hid/hid-lg4ff.c
drivers/hid/hid-lgff.c
drivers/hid/hid-logitech-hidpp.c
drivers/hid/hid-microsoft.c
drivers/hid/hid-prodikeys.c
drivers/hid/hid-quirks.c
drivers/hid/hid-rmi.c
drivers/hid/hid-sony.c
drivers/hid/hid-tmff.c
drivers/hid/hid-zpff.c
drivers/hid/hidraw.c
drivers/hid/i2c-hid/i2c-hid-core.c
drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
drivers/hid/intel-ish-hid/ishtp/client-buffers.c
drivers/hid/intel-ish-hid/ishtp/hbm.c
drivers/hid/wacom.h
drivers/hid/wacom_wac.c
drivers/hv/channel_mgmt.c
drivers/hv/connection.c
drivers/hv/hv.c
drivers/hv/hv_balloon.c
drivers/hv/hyperv_vmbus.h
drivers/hv/vmbus_drv.c
drivers/hwmon/ina3221.c
drivers/hwmon/nct7904.c
drivers/i2c/busses/Kconfig
drivers/i2c/busses/Makefile
drivers/i2c/busses/i2c-aspeed.c
drivers/i2c/busses/i2c-axxia.c
drivers/i2c/busses/i2c-bcm-iproc.c
drivers/i2c/busses/i2c-bcm2835.c
drivers/i2c/busses/i2c-cht-wc.c
drivers/i2c/busses/i2c-designware-master.c
drivers/i2c/busses/i2c-designware-pcidrv.c
drivers/i2c/busses/i2c-designware-platdrv.c
drivers/i2c/busses/i2c-exynos5.c
drivers/i2c/busses/i2c-fsi.c
drivers/i2c/busses/i2c-hix5hd2.c
drivers/i2c/busses/i2c-i801.c
drivers/i2c/busses/i2c-icy.c [new file with mode: 0644]
drivers/i2c/busses/i2c-imx-lpi2c.c
drivers/i2c/busses/i2c-imx.c
drivers/i2c/busses/i2c-ismt.c
drivers/i2c/busses/i2c-mt65xx.c
drivers/i2c/busses/i2c-mxs.c
drivers/i2c/busses/i2c-ocores.c
drivers/i2c/busses/i2c-piix4.c
drivers/i2c/busses/i2c-qcom-geni.c
drivers/i2c/busses/i2c-riic.c
drivers/i2c/busses/i2c-sprd.c
drivers/i2c/busses/i2c-stm32f7.c
drivers/i2c/busses/i2c-synquacer.c
drivers/i2c/busses/i2c-taos-evm.c
drivers/i2c/busses/i2c-tegra.c
drivers/i2c/busses/i2c-uniphier-f.c
drivers/i2c/busses/i2c-uniphier.c
drivers/i2c/i2c-core-base.c
drivers/i2c/i2c-slave-eeprom.c
drivers/iio/accel/adxl372.c
drivers/iio/accel/bmc150-accel-core.c
drivers/iio/adc/ad799x.c
drivers/iio/adc/axp288_adc.c
drivers/iio/adc/hx711.c
drivers/iio/adc/meson_saradc.c
drivers/iio/adc/stm32-adc-core.c
drivers/iio/adc/stm32-adc-core.h
drivers/iio/adc/stm32-adc.c
drivers/iio/imu/adis_buffer.c
drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c
drivers/iio/light/Kconfig
drivers/iio/light/opt3001.c
drivers/iio/light/vcnl4000.c
drivers/infiniband/core/addr.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/device.c
drivers/infiniband/core/iwcm.c
drivers/infiniband/core/netlink.c
drivers/infiniband/core/nldev.c
drivers/infiniband/core/rw.c
drivers/infiniband/core/security.c
drivers/infiniband/core/umem.c
drivers/infiniband/core/umem_odp.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/mem.c
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/hfi1/sdma.c
drivers/infiniband/hw/hfi1/tid_rdma.c
drivers/infiniband/hw/hfi1/user_pages.c
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/i40iw/i40iw_verbs.c
drivers/infiniband/hw/mlx5/devx.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/qedr/main.c
drivers/infiniband/hw/qib/qib_user_pages.c
drivers/infiniband/hw/usnic/usnic_uiom.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
drivers/infiniband/sw/siw/siw_mem.c
drivers/infiniband/sw/siw/siw_qp.c
drivers/infiniband/sw/siw/siw_verbs.c
drivers/input/misc/da9063_onkey.c
drivers/input/misc/soc_button_array.c
drivers/input/mouse/elantech.c
drivers/input/rmi4/rmi_driver.c
drivers/input/touchscreen/goodix.c
drivers/input/touchscreen/st1232.c
drivers/iommu/amd_iommu.c
drivers/iommu/amd_iommu_quirks.c
drivers/iommu/amd_iommu_types.h
drivers/iommu/arm-smmu.c
drivers/iommu/intel-iommu.c
drivers/iommu/io-pgtable-arm.c
drivers/iommu/ipmmu-vmsa.c
drivers/iommu/rockchip-iommu.c
drivers/irqchip/irq-al-fic.c
drivers/irqchip/irq-atmel-aic5.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-sifive-plic.c
drivers/isdn/capi/capi.c
drivers/isdn/mISDN/socket.c
drivers/macintosh/windfarm_cpufreq_clamp.c
drivers/md/dm-cache-target.c
drivers/md/dm-clone-target.c
drivers/md/dm-integrity.c
drivers/md/dm-snap.c
drivers/md/raid0.c
drivers/media/i2c/adv748x/adv748x-core.c
drivers/media/i2c/adv7604.c
drivers/media/usb/stkwebcam/stk-webcam.c
drivers/media/v4l2-core/videobuf-dma-contig.c
drivers/memstick/host/jmb38x_ms.c
drivers/mfd/88pm800.c
drivers/mfd/88pm860x-core.c
drivers/mfd/Kconfig
drivers/mfd/Makefile
drivers/mfd/ab3100-core.c
drivers/mfd/ab8500-debugfs.c
drivers/mfd/asic3.c
drivers/mfd/bcm590xx.c
drivers/mfd/da9150-core.c
drivers/mfd/davinci_voicecodec.c
drivers/mfd/db8500-prcmu.c
drivers/mfd/ezx-pcap.c
drivers/mfd/fsl-imx25-tsadc.c
drivers/mfd/htc-i2cpld.c
drivers/mfd/intel-lpss-acpi.c
drivers/mfd/intel-lpss-pci.c
drivers/mfd/intel-lpss.c
drivers/mfd/intel_soc_pmic_bxtwc.c
drivers/mfd/intel_soc_pmic_mrfld.c [new file with mode: 0644]
drivers/mfd/jz4740-adc.c [deleted file]
drivers/mfd/max14577.c
drivers/mfd/max77620.c
drivers/mfd/max77693.c
drivers/mfd/max77843.c
drivers/mfd/max8907.c
drivers/mfd/max8925-i2c.c
drivers/mfd/max8997.c
drivers/mfd/max8998.c
drivers/mfd/mt6397-core.c
drivers/mfd/mt6397-irq.c [new file with mode: 0644]
drivers/mfd/palmas.c
drivers/mfd/qcom_rpm.c
drivers/mfd/sm501.c
drivers/mfd/timberdale.c
drivers/mfd/tps80031.c
drivers/mfd/twl-core.c
drivers/misc/eeprom/at24.c
drivers/misc/fastrpc.c
drivers/misc/mei/bus-fixup.c
drivers/misc/mei/hw-me-regs.h
drivers/misc/mei/hw-me.c
drivers/misc/mei/hw-me.h
drivers/misc/mei/mei_dev.h
drivers/misc/mei/pci-me.c
drivers/mmc/host/Kconfig
drivers/mmc/host/Makefile
drivers/mmc/host/cqhci.c
drivers/mmc/host/mxs-mmc.c
drivers/mmc/host/renesas_sdhi_core.c
drivers/mmc/host/sdhci-iproc.c
drivers/mmc/host/sdhci-of-esdhc.c
drivers/mmc/host/sdhci-omap.c
drivers/mmc/host/sdhci-pci-core.c
drivers/mmc/host/sdhci-pci-gli.c [new file with mode: 0644]
drivers/mmc/host/sdhci-pci.h
drivers/mmc/host/sdhci-tegra.c
drivers/mmc/host/sdhci.c
drivers/mmc/host/sdhci.h
drivers/mmc/host/sh_mmcif.c
drivers/mtd/nand/raw/au1550nd.c
drivers/mtd/spi-nor/spi-nor.c
drivers/net/Kconfig
drivers/net/arcnet/Kconfig
drivers/net/arcnet/arcnet.c
drivers/net/bonding/bond_alb.c
drivers/net/bonding/bond_main.c
drivers/net/can/usb/Kconfig
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/b53/b53_serdes.h
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/lantiq_pce.h
drivers/net/dsa/microchip/ksz8795.c
drivers/net/dsa/microchip/ksz8795_spi.c
drivers/net/dsa/microchip/ksz9477_i2c.c
drivers/net/dsa/microchip/ksz9477_reg.h
drivers/net/dsa/microchip/ksz9477_spi.c
drivers/net/dsa/microchip/ksz_common.c
drivers/net/dsa/microchip/ksz_common.h
drivers/net/dsa/qca8k.c
drivers/net/dsa/rtl8366.c
drivers/net/dsa/rtl8366rb.c
drivers/net/dsa/sja1105/Kconfig
drivers/net/dsa/sja1105/sja1105.h
drivers/net/dsa/sja1105/sja1105_dynamic_config.h
drivers/net/dsa/sja1105/sja1105_main.c
drivers/net/dsa/sja1105/sja1105_ptp.h
drivers/net/dsa/sja1105/sja1105_spi.c
drivers/net/dsa/sja1105/sja1105_static_config.h
drivers/net/dsa/sja1105/sja1105_tas.h
drivers/net/ethernet/Kconfig
drivers/net/ethernet/Makefile
drivers/net/ethernet/allwinner/Kconfig
drivers/net/ethernet/amazon/Kconfig
drivers/net/ethernet/amazon/ena/ena_eth_com.c
drivers/net/ethernet/aquantia/atlantic/aq_main.c
drivers/net/ethernet/aquantia/atlantic/aq_nic.c
drivers/net/ethernet/aquantia/atlantic/aq_ring.c
drivers/net/ethernet/aquantia/atlantic/aq_vec.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
drivers/net/ethernet/arc/emac_rockchip.c
drivers/net/ethernet/atheros/ag71xx.c
drivers/net/ethernet/broadcom/Kconfig
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/broadcom/genet/bcmgenet.h
drivers/net/ethernet/broadcom/genet/bcmmii.c
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/cavium/common/cavium_ptp.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
drivers/net/ethernet/chelsio/cxgb4/sge.c
drivers/net/ethernet/cortina/gemini.h
drivers/net/ethernet/emulex/benet/Kconfig
drivers/net/ethernet/faraday/ftgmac100.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h
drivers/net/ethernet/freescale/dpaa2/dpni.h
drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h
drivers/net/ethernet/freescale/dpaa2/dprtc.h
drivers/net/ethernet/freescale/enetc/enetc_pf.c
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/fec_ptp.c
drivers/net/ethernet/freescale/gianfar.c
drivers/net/ethernet/google/gve/gve_rx.c
drivers/net/ethernet/google/gve/gve_tx.c
drivers/net/ethernet/hisilicon/hip04_eth.c
drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
drivers/net/ethernet/hisilicon/hns3/hnae3.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
drivers/net/ethernet/hisilicon/hns_mdio.c
drivers/net/ethernet/i825xx/lasi_82596.c
drivers/net/ethernet/i825xx/lib82596.c
drivers/net/ethernet/i825xx/sni_82596.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/ibm/ibmvnic.h
drivers/net/ethernet/intel/e1000/e1000_ethtool.c
drivers/net/ethernet/intel/e1000e/e1000.h
drivers/net/ethernet/intel/i40e/i40e_xsk.c
drivers/net/ethernet/intel/igb/e1000_82575.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igc/igc_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/jme.c
drivers/net/ethernet/marvell/mvneta_bm.h
drivers/net/ethernet/marvell/skge.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
drivers/net/ethernet/mellanox/mlx5/core/Kconfig
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mr.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
drivers/net/ethernet/mellanox/mlxsw/core.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
drivers/net/ethernet/mscc/ocelot.c
drivers/net/ethernet/mscc/ocelot_board.c
drivers/net/ethernet/netronome/nfp/abm/cls.c
drivers/net/ethernet/netronome/nfp/flower/main.c
drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
drivers/net/ethernet/netx-eth.c [deleted file]
drivers/net/ethernet/ni/nixge.c
drivers/net/ethernet/nxp/Kconfig
drivers/net/ethernet/nxp/lpc_eth.c
drivers/net/ethernet/pensando/Kconfig
drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
drivers/net/ethernet/pensando/ionic/ionic_lif.c
drivers/net/ethernet/pensando/ionic/ionic_lif.h
drivers/net/ethernet/pensando/ionic/ionic_main.c
drivers/net/ethernet/pensando/ionic/ionic_stats.c
drivers/net/ethernet/qlogic/qed/qed_main.c
drivers/net/ethernet/qlogic/qed/qed_sriov.c
drivers/net/ethernet/qlogic/qede/qede_fp.c
drivers/net/ethernet/qlogic/qla3xxx.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/socionext/netsec.c
drivers/net/ethernet/socionext/sni_ave.c
drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
drivers/net/ethernet/stmicro/stmmac/dwmac5.c
drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
drivers/net/ethernet/ti/davinci_cpdma.c
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
drivers/net/fjes/fjes_main.c
drivers/net/hamradio/bpqether.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/ieee802154/atusb.c
drivers/net/ieee802154/ca8210.c
drivers/net/ieee802154/mcr20a.c
drivers/net/ipvlan/ipvlan_main.c
drivers/net/macsec.c
drivers/net/macvlan.c
drivers/net/netdevsim/dev.c
drivers/net/netdevsim/fib.c
drivers/net/phy/Kconfig
drivers/net/phy/at803x.c
drivers/net/phy/bcm7xxx.c
drivers/net/phy/mdio_device.c
drivers/net/phy/micrel.c
drivers/net/phy/national.c
drivers/net/phy/phy-c45.c
drivers/net/phy/phy-core.c
drivers/net/phy/phy.c
drivers/net/phy/phy_device.c
drivers/net/phy/phylink.c
drivers/net/phy/smsc.c
drivers/net/ppp/ppp_generic.c
drivers/net/ppp/pptp.c
drivers/net/tap.c
drivers/net/team/team.c
drivers/net/tun.c
drivers/net/usb/cdc_ether.c
drivers/net/usb/cdc_ncm.c
drivers/net/usb/hso.c
drivers/net/usb/lan78xx.c
drivers/net/usb/qmi_wwan.c
drivers/net/usb/r8152.c
drivers/net/usb/sr9800.c
drivers/net/usb/usbnet.c
drivers/net/virtio_net.c
drivers/net/vrf.c
drivers/net/vxlan.c
drivers/net/wimax/i2400m/op-rfkill.c
drivers/net/wimax/i2400m/tx.c
drivers/net/wireless/ath/Kconfig
drivers/net/wireless/ath/ar5523/Kconfig
drivers/net/wireless/ath/ath10k/core.c
drivers/net/wireless/ath/ath5k/pci.c
drivers/net/wireless/ath/ath6kl/Kconfig
drivers/net/wireless/ath/ath9k/Kconfig
drivers/net/wireless/ath/carl9170/Kconfig
drivers/net/wireless/ath/wil6210/txrx.c
drivers/net/wireless/atmel/Kconfig
drivers/net/wireless/intel/ipw2x00/Kconfig
drivers/net/wireless/intel/iwlegacy/3945-mac.c
drivers/net/wireless/intel/iwlegacy/4965-mac.c
drivers/net/wireless/intel/iwlegacy/Kconfig
drivers/net/wireless/intel/iwlwifi/Kconfig
drivers/net/wireless/intel/iwlwifi/fw/acpi.c
drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
drivers/net/wireless/intel/iwlwifi/fw/dbg.c
drivers/net/wireless/intel/iwlwifi/fw/file.h
drivers/net/wireless/intel/iwlwifi/iwl-csr.h
drivers/net/wireless/intel/iwlwifi/iwl-io.h
drivers/net/wireless/intel/iwlwifi/iwl-prph.h
drivers/net/wireless/intel/iwlwifi/mvm/fw.c
drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
drivers/net/wireless/intel/iwlwifi/mvm/scan.c
drivers/net/wireless/intel/iwlwifi/mvm/sta.c
drivers/net/wireless/intel/iwlwifi/mvm/tt.c
drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
drivers/net/wireless/intel/iwlwifi/pcie/drv.c
drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
drivers/net/wireless/intel/iwlwifi/pcie/trans.c
drivers/net/wireless/intersil/hostap/hostap_hw.c
drivers/net/wireless/mac80211_hwsim.c
drivers/net/wireless/mediatek/mt76/Makefile
drivers/net/wireless/mediatek/mt76/dma.c
drivers/net/wireless/mediatek/mt76/mt76.h
drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
drivers/net/wireless/mediatek/mt76/pci.c [new file with mode: 0644]
drivers/net/wireless/ralink/rt2x00/Kconfig
drivers/net/wireless/ralink/rt2x00/rt2x00.h
drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
drivers/net/wireless/realtek/rtlwifi/pci.c
drivers/net/wireless/realtek/rtlwifi/ps.c
drivers/net/wireless/realtek/rtw88/mac.c
drivers/net/wireless/realtek/rtw88/main.c
drivers/net/wireless/realtek/rtw88/pci.c
drivers/net/wireless/virt_wifi.c
drivers/net/wireless/zydas/zd1211rw/zd_usb.c
drivers/net/xen-netback/interface.c
drivers/net/xen-netfront.c
drivers/nfc/pn533/usb.c
drivers/nfc/st95hf/core.c
drivers/ntb/hw/amd/ntb_hw_amd.c
drivers/ntb/hw/amd/ntb_hw_amd.h
drivers/ntb/hw/idt/Kconfig
drivers/ntb/hw/mscc/ntb_hw_switchtec.c
drivers/ntb/ntb_transport.c
drivers/ntb/test/ntb_perf.c
drivers/nvdimm/btt.c
drivers/nvdimm/bus.c
drivers/nvdimm/namespace_devs.c
drivers/nvdimm/nd.h
drivers/nvdimm/pfn_devs.c
drivers/nvdimm/region.c
drivers/nvdimm/region_devs.c
drivers/nvdimm/security.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/multipath.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/host/tcp.c
drivers/nvme/target/io-cmd-bdev.c
drivers/nvme/target/loop.c
drivers/nvme/target/tcp.c
drivers/of/of_mdio.c
drivers/of/of_reserved_mem.c
drivers/of/unittest.c
drivers/opp/core.c
drivers/opp/of.c
drivers/parisc/sba_iommu.c
drivers/pci/Kconfig
drivers/pci/access.c
drivers/pci/bus.c
drivers/pci/controller/dwc/Kconfig
drivers/pci/controller/dwc/Makefile
drivers/pci/controller/dwc/pci-exynos.c
drivers/pci/controller/dwc/pci-imx6.c
drivers/pci/controller/dwc/pci-layerscape-ep.c
drivers/pci/controller/dwc/pcie-al.c
drivers/pci/controller/dwc/pcie-armada8k.c
drivers/pci/controller/dwc/pcie-designware-ep.c
drivers/pci/controller/dwc/pcie-designware-host.c
drivers/pci/controller/dwc/pcie-designware.c
drivers/pci/controller/dwc/pcie-designware.h
drivers/pci/controller/dwc/pcie-histb.c
drivers/pci/controller/dwc/pcie-kirin.c
drivers/pci/controller/dwc/pcie-tegra194.c [new file with mode: 0644]
drivers/pci/controller/pci-host-common.c
drivers/pci/controller/pci-hyperv.c
drivers/pci/controller/pci-tegra.c
drivers/pci/controller/pcie-iproc-platform.c
drivers/pci/controller/pcie-mediatek.c
drivers/pci/controller/pcie-mobiveil.c
drivers/pci/controller/pcie-rockchip-host.c
drivers/pci/controller/vmd.c
drivers/pci/hotplug/cpci_hotplug_core.c
drivers/pci/hotplug/cpqphp_core.c
drivers/pci/hotplug/cpqphp_ctrl.c
drivers/pci/hotplug/cpqphp_nvram.h
drivers/pci/hotplug/ibmphp_res.c
drivers/pci/hotplug/pciehp.h
drivers/pci/hotplug/pciehp_core.c
drivers/pci/hotplug/pciehp_ctrl.c
drivers/pci/hotplug/pciehp_hpc.c
drivers/pci/hotplug/rpadlpar_core.c
drivers/pci/hotplug/rpaphp_core.c
drivers/pci/iov.c
drivers/pci/of.c
drivers/pci/p2pdma.c
drivers/pci/pci-acpi.c
drivers/pci/pci-bridge-emul.c
drivers/pci/pci-sysfs.c
drivers/pci/pci.c
drivers/pci/pci.h
drivers/pci/pcie/aspm.c
drivers/pci/pcie/err.c
drivers/pci/probe.c
drivers/pci/proc.c
drivers/pci/quirks.c
drivers/pci/search.c
drivers/pci/setup-bus.c
drivers/pci/syscall.c
drivers/pci/vc.c
drivers/pci/vpd.c
drivers/pcmcia/cistpl.c
drivers/phy/tegra/Kconfig
drivers/phy/tegra/Makefile
drivers/phy/tegra/phy-tegra194-p2u.c [new file with mode: 0644]
drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
drivers/pinctrl/aspeed/pinmux-aspeed.h
drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
drivers/pinctrl/bcm/pinctrl-ns2-mux.c
drivers/pinctrl/berlin/pinctrl-as370.c
drivers/pinctrl/intel/pinctrl-cherryview.c
drivers/pinctrl/intel/pinctrl-intel.c
drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
drivers/pinctrl/pinctrl-stmfx.c
drivers/platform/x86/Kconfig
drivers/platform/x86/classmate-laptop.c
drivers/platform/x86/i2c-multi-instantiate.c
drivers/platform/x86/intel_punit_ipc.c
drivers/platform/x86/pmc_atom.c
drivers/ptp/Kconfig
drivers/ptp/ptp_chardev.c
drivers/ptp/ptp_qoriq.c
drivers/pwm/Kconfig
drivers/pwm/Makefile
drivers/pwm/core.c
drivers/pwm/pwm-atmel-hlcdc.c
drivers/pwm/pwm-atmel.c
drivers/pwm/pwm-bcm-iproc.c
drivers/pwm/pwm-bcm2835.c
drivers/pwm/pwm-cros-ec.c
drivers/pwm/pwm-fsl-ftm.c
drivers/pwm/pwm-hibvt.c
drivers/pwm/pwm-imx-tpm.c
drivers/pwm/pwm-imx27.c
drivers/pwm/pwm-jz4740.c
drivers/pwm/pwm-lpss.c
drivers/pwm/pwm-mediatek.c
drivers/pwm/pwm-meson.c
drivers/pwm/pwm-mxs.c
drivers/pwm/pwm-rcar.c
drivers/pwm/pwm-rockchip.c
drivers/pwm/pwm-sifive.c
drivers/pwm/pwm-sprd.c [new file with mode: 0644]
drivers/pwm/pwm-sti.c
drivers/pwm/pwm-stm32-lp.c
drivers/pwm/pwm-stm32.c
drivers/pwm/pwm-sun4i.c
drivers/pwm/pwm-zx.c
drivers/regulator/core.c
drivers/regulator/da9062-regulator.c
drivers/regulator/fixed.c
drivers/regulator/lochnagar-regulator.c
drivers/regulator/of_regulator.c
drivers/regulator/pfuze100-regulator.c
drivers/regulator/qcom-rpmh-regulator.c
drivers/regulator/ti-abb-regulator.c
drivers/reset/reset-scmi.c
drivers/s390/block/dasd_eckd.c
drivers/s390/cio/ccwgroup.c
drivers/s390/cio/cio.h
drivers/s390/cio/css.c
drivers/s390/cio/device.c
drivers/s390/cio/device_ops.c
drivers/s390/cio/qdio_setup.c
drivers/s390/crypto/ap_bus.c
drivers/s390/crypto/ap_bus.h
drivers/s390/crypto/pkey_api.c
drivers/s390/crypto/vfio_ap_drv.c
drivers/s390/crypto/zcrypt_api.c
drivers/s390/crypto/zcrypt_api.h
drivers/s390/crypto/zcrypt_cex4.c
drivers/s390/net/qeth_core_main.c
drivers/s390/net/qeth_l2_main.c
drivers/s390/scsi/zfcp_fsf.c
drivers/scsi/Kconfig
drivers/scsi/aacraid/linit.c
drivers/scsi/bnx2fc/bnx2fc_io.c
drivers/scsi/ch.c
drivers/scsi/device_handler/scsi_dh_alua.c
drivers/scsi/hisi_sas/hisi_sas_main.c
drivers/scsi/hpsa.c
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_nportdisc.c
drivers/scsi/lpfc/lpfc_scsi.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/megaraid.c
drivers/scsi/mpt3sas/mpt3sas_scsih.c
drivers/scsi/qedf/qedf_main.c
drivers/scsi/qla2xxx/qla_attr.c
drivers/scsi/qla2xxx/qla_bsg.c
drivers/scsi/qla2xxx/qla_def.h
drivers/scsi/qla2xxx/qla_gs.c
drivers/scsi/qla2xxx/qla_init.c
drivers/scsi/qla2xxx/qla_iocb.c
drivers/scsi/qla2xxx/qla_isr.c
drivers/scsi/qla2xxx/qla_mbx.c
drivers/scsi/qla2xxx/qla_mid.c
drivers/scsi/qla2xxx/qla_os.c
drivers/scsi/qla2xxx/qla_target.c
drivers/scsi/scsi_error.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_sysfs.c
drivers/scsi/sd.c
drivers/scsi/sni_53c710.c
drivers/scsi/storvsc_drv.c
drivers/scsi/ufs/ufs_bsg.c
drivers/scsi/ufs/ufshcd.c
drivers/soc/imx/soc-imx-scu.c
drivers/staging/android/ion/ion_system_heap.c
drivers/staging/exfat/Kconfig
drivers/staging/exfat/Makefile
drivers/staging/exfat/exfat.h
drivers/staging/exfat/exfat_blkdev.c
drivers/staging/exfat/exfat_cache.c
drivers/staging/exfat/exfat_core.c
drivers/staging/exfat/exfat_nls.c
drivers/staging/exfat/exfat_super.c
drivers/staging/exfat/exfat_upcase.c
drivers/staging/fbtft/Kconfig
drivers/staging/fbtft/Makefile
drivers/staging/fbtft/fbtft-core.c
drivers/staging/fbtft/fbtft_device.c [deleted file]
drivers/staging/fbtft/flexfb.c [deleted file]
drivers/staging/octeon/ethernet-tx.c
drivers/staging/octeon/octeon-stubs.h
drivers/staging/rtl8188eu/hal/hal8188e_rate_adaptive.c
drivers/staging/rtl8188eu/os_dep/usb_intf.c
drivers/staging/speakup/sysfs-driver-speakup [new file with mode: 0644]
drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c
drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c
drivers/staging/vt6655/device_main.c
drivers/staging/wlan-ng/cfg80211.c
drivers/target/iscsi/cxgbit/cxgbit_cm.c
drivers/target/target_core_device.c
drivers/target/tcm_fc/tfc_io.c
drivers/tee/tee_shm.c
drivers/thermal/Kconfig
drivers/thermal/armada_thermal.c
drivers/thermal/cpu_cooling.c
drivers/thermal/db8500_thermal.c
drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
drivers/thermal/intel/int340x_thermal/int3403_thermal.c
drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
drivers/thermal/intel/intel_pch_thermal.c
drivers/thermal/qcom/tsens-8960.c
drivers/thermal/qcom/tsens-v0_1.c
drivers/thermal/qcom/tsens-v1.c
drivers/thermal/qcom/tsens.h
drivers/thermal/qoriq_thermal.c
drivers/thermal/rcar_gen3_thermal.c
drivers/thermal/tegra/soctherm.c
drivers/thermal/thermal_core.c
drivers/thermal/thermal_hwmon.c
drivers/thermal/thermal_mmio.c
drivers/tty/n_hdlc.c
drivers/tty/serial/8250/8250_men_mcb.c
drivers/tty/serial/8250/8250_omap.c
drivers/tty/serial/Kconfig
drivers/tty/serial/fsl_linflexuart.c
drivers/tty/serial/fsl_lpuart.c
drivers/tty/serial/imx.c
drivers/tty/serial/owl-uart.c
drivers/tty/serial/rda-uart.c
drivers/tty/serial/serial_core.c
drivers/tty/serial/serial_mctrl_gpio.c
drivers/tty/serial/sh-sci.c
drivers/tty/serial/uartlite.c
drivers/tty/serial/xilinx_uartps.c
drivers/usb/cdns3/cdns3-pci-wrap.c
drivers/usb/cdns3/core.c
drivers/usb/cdns3/ep0.c
drivers/usb/cdns3/gadget.c
drivers/usb/cdns3/host-export.h
drivers/usb/cdns3/host.c
drivers/usb/class/usblp.c
drivers/usb/core/config.c
drivers/usb/dwc3/Kconfig
drivers/usb/dwc3/core.c
drivers/usb/dwc3/drd.c
drivers/usb/dwc3/dwc3-pci.c
drivers/usb/dwc3/gadget.c
drivers/usb/dwc3/host.c
drivers/usb/gadget/composite.c
drivers/usb/gadget/configfs.c
drivers/usb/gadget/function/f_fs.c
drivers/usb/gadget/udc/Kconfig
drivers/usb/gadget/udc/atmel_usba_udc.c
drivers/usb/gadget/udc/core.c
drivers/usb/gadget/udc/dummy_hcd.c
drivers/usb/gadget/udc/fsl_udc_core.c
drivers/usb/gadget/udc/lpc32xx_udc.c
drivers/usb/gadget/udc/renesas_usb3.c
drivers/usb/host/xhci-debugfs.c
drivers/usb/host/xhci-ext-caps.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci.c
drivers/usb/image/microtek.c
drivers/usb/misc/Kconfig
drivers/usb/misc/Makefile
drivers/usb/misc/adutux.c
drivers/usb/misc/chaoskey.c
drivers/usb/misc/iowarrior.c
drivers/usb/misc/ldusb.c
drivers/usb/misc/legousbtower.c
drivers/usb/misc/rio500.c [deleted file]
drivers/usb/misc/rio500_usb.h [deleted file]
drivers/usb/misc/usblcd.c
drivers/usb/misc/yurex.c
drivers/usb/mtu3/mtu3_core.c
drivers/usb/renesas_usbhs/common.c
drivers/usb/renesas_usbhs/common.h
drivers/usb/renesas_usbhs/fifo.c
drivers/usb/renesas_usbhs/fifo.h
drivers/usb/renesas_usbhs/mod_gadget.c
drivers/usb/renesas_usbhs/pipe.c
drivers/usb/renesas_usbhs/pipe.h
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/keyspan.c
drivers/usb/serial/option.c
drivers/usb/serial/ti_usb_3410_5052.c
drivers/usb/serial/usb-serial.c
drivers/usb/serial/whiteheat.c
drivers/usb/serial/whiteheat.h
drivers/usb/storage/scsiglue.c
drivers/usb/storage/uas.c
drivers/usb/typec/tcpm/tcpm.c
drivers/usb/typec/ucsi/displayport.c
drivers/usb/typec/ucsi/ucsi_ccg.c
drivers/usb/usb-skeleton.c
drivers/usb/usbip/vhci_hcd.c
drivers/usb/usbip/vhci_tx.c
drivers/vfio/vfio_iommu_spapr_tce.c
drivers/vfio/vfio_iommu_type1.c
drivers/vhost/test.c
drivers/vhost/vringh.c
drivers/video/backlight/Kconfig
drivers/video/backlight/backlight.c
drivers/video/backlight/gpio_backlight.c
drivers/video/backlight/lm3630a_bl.c
drivers/video/backlight/lms283gf05.c
drivers/video/backlight/pwm_bl.c
drivers/video/backlight/rave-sp-backlight.c
drivers/video/backlight/tosa_lcd.c
drivers/video/logo/Makefile
drivers/virt/vboxguest/vboxguest_utils.c
drivers/virtio/virtio_ring.c
drivers/w1/slaves/Kconfig
drivers/watchdog/Kconfig
drivers/watchdog/Makefile
drivers/watchdog/aspeed_wdt.c
drivers/watchdog/ath79_wdt.c
drivers/watchdog/cpwd.c
drivers/watchdog/diag288_wdt.c
drivers/watchdog/f71808e_wdt.c
drivers/watchdog/iTCO_wdt.c
drivers/watchdog/imx2_wdt.c
drivers/watchdog/imx7ulp_wdt.c [new file with mode: 0644]
drivers/watchdog/imx_sc_wdt.c
drivers/watchdog/jz4740_wdt.c
drivers/watchdog/ks8695_wdt.c [deleted file]
drivers/watchdog/nuc900_wdt.c [deleted file]
drivers/watchdog/orion_wdt.c
drivers/watchdog/qcom-wdt.c
drivers/watchdog/sprd_wdt.c
drivers/watchdog/ziirave_wdt.c
drivers/xen/balloon.c
drivers/xen/efi.c
drivers/xen/events/events_base.c
drivers/xen/gntdev.c
drivers/xen/grant-table.c
drivers/xen/pci.c
drivers/xen/pvcalls-back.c
drivers/xen/swiotlb-xen.c
drivers/xen/xenbus/xenbus_dev_frontend.c
fs/9p/cache.c
fs/9p/vfs_file.c
fs/9p/vfs_super.c
fs/afs/dynroot.c
fs/afs/inode.c
fs/afs/internal.h
fs/binfmt_elf.c
fs/btrfs/block-group.c
fs/btrfs/ctree.h
fs/btrfs/delalloc-space.c
fs/btrfs/disk-io.c
fs/btrfs/extent_io.c
fs/btrfs/file.c
fs/btrfs/inode-map.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/qgroup.c
fs/btrfs/ref-verify.c
fs/btrfs/relocation.c
fs/btrfs/send.c
fs/btrfs/tests/btrfs-tests.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/ceph/Makefile
fs/ceph/addr.c
fs/ceph/cache.c
fs/ceph/caps.c
fs/ceph/debugfs.c
fs/ceph/export.c
fs/ceph/file.c
fs/ceph/inode.c
fs/ceph/io.c [new file with mode: 0644]
fs/ceph/io.h [new file with mode: 0644]
fs/ceph/locks.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/super.c
fs/ceph/super.h
fs/ceph/xattr.c
fs/cifs/cifs_ioctl.h
fs/cifs/cifsacl.h
fs/cifs/cifsfs.c
fs/cifs/cifsglob.h
fs/cifs/cifsproto.h
fs/cifs/cifssmb.c
fs/cifs/connect.c
fs/cifs/dir.c
fs/cifs/file.c
fs/cifs/inode.c
fs/cifs/ioctl.c
fs/cifs/netmisc.c
fs/cifs/sess.c
fs/cifs/smb1ops.c
fs/cifs/smb2file.c
fs/cifs/smb2inode.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smb2proto.h
fs/cifs/smbfsctl.h
fs/cifs/transport.c
fs/cifs/xattr.c
fs/dax.c
fs/debugfs/file.c
fs/debugfs/inode.c
fs/direct-io.c
fs/erofs/data.c
fs/erofs/super.c
fs/erofs/zdata.c
fs/exec.c
fs/ext4/inode.c
fs/fat/dir.c
fs/fat/fatent.c
fs/fhandle.c
fs/file_table.c
fs/fs-writeback.c
fs/fs_context.c
fs/fuse/Kconfig
fs/fuse/Makefile
fs/fuse/cuse.c
fs/fuse/dev.c
fs/fuse/dir.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
fs/fuse/readdir.c
fs/fuse/virtio_fs.c [new file with mode: 0644]
fs/fuse/xattr.c
fs/gfs2/incore.h
fs/gfs2/ops_fstype.c
fs/gfs2/super.c
fs/gfs2/super.h
fs/inode.c
fs/io_uring.c
fs/iomap/direct-io.c
fs/jbd2/journal.c
fs/jbd2/transaction.c
fs/jffs2/super.c
fs/libfs.c
fs/locks.c
fs/namespace.c
fs/nfs/delegation.c
fs/nfs/delegation.h
fs/nfs/dir.c
fs/nfs/direct.c
fs/nfs/filelayout/filelayout.c
fs/nfs/internal.h
fs/nfs/nfs3proc.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/nfs4xdr.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
fs/nfs/super.c
fs/nfs/write.c
fs/nfsd/Kconfig
fs/nfsd/Makefile
fs/nfsd/acl.h
fs/nfsd/blocklayout.c
fs/nfsd/export.c
fs/nfsd/filecache.c [new file with mode: 0644]
fs/nfsd/filecache.h [new file with mode: 0644]
fs/nfsd/netns.h
fs/nfsd/nfs3proc.c
fs/nfsd/nfs3xdr.c
fs/nfsd/nfs4callback.c
fs/nfsd/nfs4layouts.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4recover.c
fs/nfsd/nfs4state.c
fs/nfsd/nfs4xdr.c
fs/nfsd/nfsctl.c
fs/nfsd/nfsproc.c
fs/nfsd/nfssvc.c
fs/nfsd/state.h
fs/nfsd/trace.h
fs/nfsd/vfs.c
fs/nfsd/vfs.h
fs/nfsd/xdr3.h
fs/nfsd/xdr4.h
fs/notify/fsnotify.h
fs/notify/group.c
fs/notify/mark.c
fs/ntfs/mft.c
fs/ntfs/namei.c
fs/ntfs/runlist.c
fs/ntfs/super.c
fs/ocfs2/alloc.c
fs/ocfs2/aops.c
fs/ocfs2/blockcheck.c
fs/ocfs2/cluster/heartbeat.c
fs/ocfs2/dir.c
fs/ocfs2/dlm/dlmcommon.h
fs/ocfs2/dlm/dlmdebug.c
fs/ocfs2/dlm/dlmdebug.h
fs/ocfs2/dlm/dlmdomain.c
fs/ocfs2/dlm/dlmunlock.c
fs/ocfs2/dlmglue.c
fs/ocfs2/extent_map.c
fs/ocfs2/file.c
fs/ocfs2/inode.c
fs/ocfs2/ioctl.c
fs/ocfs2/journal.c
fs/ocfs2/journal.h
fs/ocfs2/localalloc.c
fs/ocfs2/namei.c
fs/ocfs2/ocfs2.h
fs/ocfs2/super.c
fs/ocfs2/xattr.c
fs/open.c
fs/proc/kcore.c
fs/proc/meminfo.c
fs/proc/page.c
fs/proc/task_mmu.c
fs/proc_namespace.c
fs/readdir.c
fs/reiserfs/do_balan.c
fs/reiserfs/fix_node.c
fs/reiserfs/journal.c
fs/reiserfs/lbalance.c
fs/reiserfs/objectid.c
fs/reiserfs/prints.c
fs/reiserfs/stree.c
fs/statfs.c
fs/super.c
fs/tracefs/inode.c
fs/userfaultfd.c
fs/xfs/libxfs/xfs_ag.c
fs/xfs/libxfs/xfs_alloc.h
fs/xfs/libxfs/xfs_attr_leaf.c
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_bmap.h
fs/xfs/libxfs/xfs_dir2_block.c
fs/xfs/libxfs/xfs_fs.h
fs/xfs/libxfs/xfs_sb.c
fs/xfs/scrub/alloc.c
fs/xfs/scrub/refcount.c
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_file.c
fs/xfs/xfs_log.c
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_sysfs.c
include/Kbuild
include/acpi/processor.h
include/asm-generic/bug.h
include/asm-generic/pgalloc.h
include/asm-generic/pgtable.h
include/asm-generic/vmlinux.lds.h
include/crypto/pkcs7.h
include/drm/drm_crtc.h
include/drm/drm_self_refresh_helper.h
include/linux/acpi.h
include/linux/backlight.h
include/linux/bitmap.h
include/linux/bitops.h
include/linux/blkdev.h
include/linux/ceph/libceph.h
include/linux/ceph/messenger.h
include/linux/ceph/mon_client.h
include/linux/ceph/osd_client.h
include/linux/compaction.h
include/linux/compiler_attributes.h
include/linux/cpu.h
include/linux/cpufreq.h
include/linux/cpumask.h
include/linux/dsa/sja1105.h
include/linux/dynamic_debug.h
include/linux/efi.h
include/linux/export.h
include/linux/filter.h
include/linux/fs.h
include/linux/fs_context.h
include/linux/fsnotify_backend.h
include/linux/gfp.h
include/linux/gpio/driver.h
include/linux/huge_mm.h
include/linux/hugetlb.h
include/linux/hwmon.h
include/linux/hyperv.h
include/linux/i2c.h
include/linux/if_macvlan.h
include/linux/if_team.h
include/linux/if_vlan.h
include/linux/ima.h
include/linux/interval_tree_generic.h
include/linux/iomap.h
include/linux/jbd2.h
include/linux/kexec.h
include/linux/kgdb.h
include/linux/khugepaged.h
include/linux/kvm_host.h
include/linux/leds.h
include/linux/lsm_hooks.h
include/linux/memcontrol.h
include/linux/memory.h
include/linux/mempolicy.h
include/linux/memremap.h
include/linux/mfd/da9063/pdata.h [deleted file]
include/linux/mfd/intel_soc_pmic_mrfld.h [new file with mode: 0644]
include/linux/mfd/mt6397/core.h
include/linux/micrel_phy.h
include/linux/mii.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/mm_types_task.h
include/linux/mmzone.h
include/linux/module.h
include/linux/module_signature.h [new file with mode: 0644]
include/linux/netdevice.h
include/linux/nfs_fs.h
include/linux/page-flags.h
include/linux/page_ext.h
include/linux/pagemap.h
include/linux/pci-aspm.h [deleted file]
include/linux/pci-p2pdma.h
include/linux/pci.h
include/linux/pci_hotplug.h
include/linux/pci_ids.h
include/linux/perf_event.h
include/linux/phy.h
include/linux/platform_data/cros_ec_commands.h
include/linux/platform_data/db8500_thermal.h [deleted file]
include/linux/platform_data/dma-imx-sdma.h
include/linux/platform_data/eth-netx.h [deleted file]
include/linux/platform_device.h
include/linux/pm_qos.h
include/linux/printk.h
include/linux/pwm.h
include/linux/quicklist.h [deleted file]
include/linux/rbtree_augmented.h
include/linux/rcuwait.h
include/linux/sched.h
include/linux/sched/mm.h
include/linux/sched/task.h
include/linux/security.h
include/linux/shrinker.h
include/linux/skbuff.h
include/linux/slab.h
include/linux/socket.h
include/linux/string.h
include/linux/sunrpc/bc_xprt.h
include/linux/sunrpc/cache.h
include/linux/sunrpc/sched.h
include/linux/sunrpc/svc_rdma.h
include/linux/sunrpc/xdr.h
include/linux/sunrpc/xprt.h
include/linux/sunrpc/xprtrdma.h
include/linux/sunrpc/xprtsock.h
include/linux/swap.h
include/linux/sysfs.h
include/linux/t10-pi.h
include/linux/tcp.h
include/linux/thread_info.h
include/linux/tpm_eventlog.h
include/linux/uaccess.h
include/linux/verification.h
include/linux/virtio_vsock.h
include/linux/vmalloc.h
include/linux/xarray.h
include/linux/zpool.h
include/net/bonding.h
include/net/busy_poll.h
include/net/cfg80211.h
include/net/flow_dissector.h
include/net/fq.h
include/net/fq_impl.h
include/net/hwbm.h
include/net/inet_timewait_sock.h
include/net/ip.h
include/net/ip_vs.h
include/net/ipv6.h
include/net/llc_conn.h
include/net/net_namespace.h
include/net/netfilter/nf_tables.h
include/net/request_sock.h
include/net/route.h
include/net/sch_generic.h
include/net/sctp/sctp.h
include/net/sock.h
include/net/tcp.h
include/net/vxlan.h
include/rdma/ib_verbs.h
include/scsi/scsi_eh.h
include/sound/hda_register.h
include/sound/simple_card_utils.h
include/trace/events/btrfs.h
include/trace/events/kmem.h
include/trace/events/rpcrdma.h
include/trace/events/rxrpc.h
include/trace/events/sock.h
include/trace/events/writeback.h
include/uapi/asm-generic/mman-common.h
include/uapi/drm/amdgpu_drm.h
include/uapi/linux/btf.h
include/uapi/linux/coff.h
include/uapi/linux/fuse.h
include/uapi/linux/io_uring.h
include/uapi/linux/kvm.h
include/uapi/linux/netfilter_bridge/ebtables.h
include/uapi/linux/nfsd/cld.h
include/uapi/linux/nvme_ioctl.h
include/uapi/linux/pci_regs.h
include/uapi/linux/pg.h
include/uapi/linux/ptp_clock.h
include/uapi/linux/sched.h
include/uapi/linux/serial_core.h
include/uapi/linux/virtio_fs.h [new file with mode: 0644]
include/uapi/linux/virtio_ids.h
include/xen/xen-ops.h
init/Kconfig
init/main.c
ipc/mqueue.c
ipc/sem.c
kernel/Makefile
kernel/bpf/btf.c
kernel/bpf/core.c
kernel/bpf/devmap.c
kernel/bpf/inode.c
kernel/bpf/syscall.c
kernel/bpf/xskmap.c
kernel/cgroup/cpuset.c
kernel/cpu.c
kernel/debug/debug_core.c
kernel/dma/remap.c
kernel/elfcore.c
kernel/events/core.c
kernel/events/uprobes.c
kernel/exit.c
kernel/fork.c
kernel/freezer.c
kernel/gen_kheaders.sh
kernel/kexec.c
kernel/kexec_core.c
kernel/kexec_file.c
kernel/kthread.c
kernel/livepatch/core.c
kernel/locking/qspinlock_paravirt.h
kernel/module.c
kernel/module_signature.c [new file with mode: 0644]
kernel/module_signing.c
kernel/panic.c
kernel/params.c
kernel/power/hibernate.c
kernel/power/main.c
kernel/power/qos.c
kernel/resource.c
kernel/sched/core.c
kernel/sched/cputime.c
kernel/sched/fair.c
kernel/sched/idle.c
kernel/sched/membarrier.c
kernel/sched/sched.h
kernel/sched/topology.c
kernel/stop_machine.c
kernel/sysctl.c
kernel/time/hrtimer.c
kernel/time/posix-cpu-timers.c
kernel/time/sched_clock.c
kernel/time/tick-broadcast-hrtimer.c
kernel/time/timer.c
kernel/trace/bpf_trace.c
kernel/trace/ftrace.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_dynevent.c
kernel/trace/trace_event_perf.c
kernel/trace/trace_events.c
kernel/trace/trace_events_filter.c
kernel/trace/trace_events_hist.c
kernel/trace/trace_events_trigger.c
kernel/trace/trace_hwlat.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_printk.c
kernel/trace/trace_probe.c
kernel/trace/trace_stack.c
kernel/trace/trace_stat.c
kernel/trace/trace_uprobe.c
lib/Kconfig
lib/Kconfig.debug
lib/Kconfig.kasan
lib/bug.c
lib/dump_stack.c
lib/extable.c
lib/generic-radix-tree.c
lib/hexdump.c
lib/iov_iter.c
lib/lzo/lzo1x_compress.c
lib/rbtree_test.c
lib/show_mem.c
lib/string.c
lib/strncpy_from_user.c
lib/strnlen_user.c
lib/test_kasan.c
lib/test_meminit.c
lib/test_user_copy.c
lib/textsearch.c
lib/usercopy.c
lib/vdso/Kconfig
lib/vdso/gettimeofday.c
mm/Kconfig
mm/Kconfig.debug
mm/Makefile
mm/backing-dev.c
mm/compaction.c
mm/filemap.c
mm/frame_vector.c
mm/gup.c
mm/huge_memory.c
mm/hugetlb.c
mm/hugetlb_cgroup.c
mm/init-mm.c
mm/internal.h
mm/kasan/common.c
mm/kasan/kasan.h
mm/kasan/report.c
mm/kasan/tags_report.c
mm/khugepaged.c
mm/kmemleak.c
mm/ksm.c
mm/madvise.c
mm/memblock.c
mm/memcontrol.c
mm/memfd.c
mm/memory-failure.c
mm/memory.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/memremap.c
mm/migrate.c
mm/mincore.c
mm/mlock.c
mm/mmap.c
mm/mmu_gather.c
mm/mmu_notifier.c
mm/mprotect.c
mm/mremap.c
mm/msync.c
mm/nommu.c
mm/oom_kill.c
mm/page_alloc.c
mm/page_ext.c
mm/page_owner.c
mm/page_poison.c
mm/page_vma_mapped.c
mm/quicklist.c [deleted file]
mm/rmap.c
mm/shmem.c
mm/shuffle.c
mm/slab.c
mm/slab.h
mm/slab_common.c
mm/slob.c
mm/slub.c
mm/sparse.c
mm/swap.c
mm/swap_state.c
mm/truncate.c
mm/usercopy.c
mm/util.c
mm/vmalloc.c
mm/vmpressure.c
mm/vmscan.c
mm/vmstat.c
mm/z3fold.c
mm/zpool.c
mm/zsmalloc.c
mm/zswap.c
net/8021q/vlan.c
net/8021q/vlan_dev.c
net/9p/client.c
net/appletalk/ddp.c
net/atm/common.c
net/ax25/af_ax25.c
net/batman-adv/Kconfig
net/batman-adv/bat_iv_ogm.c
net/batman-adv/bat_v_ogm.c
net/batman-adv/hard-interface.c
net/batman-adv/soft-interface.c
net/batman-adv/types.h
net/bluetooth/6lowpan.c
net/bluetooth/af_bluetooth.c
net/bridge/br_device.c
net/bridge/netfilter/nf_conntrack_bridge.c
net/caif/caif_socket.c
net/ceph/ceph_common.c
net/ceph/messenger.c
net/ceph/mon_client.c
net/ceph/osd_client.c
net/ceph/osdmap.c
net/core/datagram.c
net/core/dev.c
net/core/dev_addr_lists.c
net/core/devlink.c
net/core/dst.c
net/core/ethtool.c
net/core/filter.c
net/core/flow_dissector.c
net/core/lwt_bpf.c
net/core/net_namespace.c
net/core/request_sock.c
net/core/rtnetlink.c
net/core/skbuff.c
net/core/sock.c
net/dccp/ipv4.c
net/dccp/ipv6.c
net/decnet/af_decnet.c
net/dsa/dsa2.c
net/dsa/master.c
net/dsa/slave.c
net/dsa/tag_sja1105.c
net/ieee802154/6lowpan/core.c
net/ieee802154/socket.c
net/ife/Kconfig
net/ipv4/Kconfig
net/ipv4/datagram.c
net/ipv4/fib_frontend.c
net/ipv4/inet_connection_sock.c
net/ipv4/inet_diag.c
net/ipv4/inet_hashtables.c
net/ipv4/ip_forward.c
net/ipv4/ip_gre.c
net/ipv4/ip_input.c
net/ipv4/ip_output.c
net/ipv4/ipmr.c
net/ipv4/netfilter/nf_dup_ipv4.c
net/ipv4/raw.c
net/ipv4/route.c
net/ipv4/tcp.c
net/ipv4/tcp_bbr.c
net/ipv4/tcp_diag.c
net/ipv4/tcp_fastopen.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_output.c
net/ipv4/tcp_timer.c
net/ipv4/udp.c
net/ipv4/xfrm4_policy.c
net/ipv6/addrconf.c
net/ipv6/addrconf_core.c
net/ipv6/fib6_rules.c
net/ipv6/inet6_connection_sock.c
net/ipv6/inet6_hashtables.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_input.c
net/ipv6/ip6_output.c
net/ipv6/netfilter.c
net/ipv6/netfilter/Kconfig
net/ipv6/netfilter/nf_dup_ipv6.c
net/ipv6/raw.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/kcm/kcmsock.c
net/l2tp/l2tp_core.c
net/l2tp/l2tp_eth.c
net/l2tp/l2tp_ip.c
net/l2tp/l2tp_ip6.c
net/llc/af_llc.c
net/llc/llc_c_ac.c
net/llc/llc_conn.c
net/llc/llc_if.c
net/llc/llc_s_ac.c
net/llc/llc_sap.c
net/mac80211/debugfs_netdev.c
net/mac80211/mlme.c
net/mac80211/rx.c
net/mac80211/scan.c
net/mac80211/util.c
net/ncsi/internal.h
net/ncsi/ncsi-manage.c
net/netfilter/Kconfig
net/netfilter/ipvs/Kconfig
net/netfilter/ipvs/ip_vs_app.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_pe.c
net/netfilter/ipvs/ip_vs_sched.c
net/netfilter/ipvs/ip_vs_sync.c
net/netfilter/ipvs/ip_vs_xmit.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_flow_table_core.c
net/netfilter/nf_tables_api.c
net/netfilter/nf_tables_offload.c
net/netfilter/nft_connlimit.c
net/netfilter/nft_flow_offload.c
net/netfilter/nft_lookup.c
net/netfilter/nft_payload.c
net/netrom/af_netrom.c
net/nfc/llcp_sock.c
net/openvswitch/actions.c
net/openvswitch/datapath.c
net/openvswitch/vport-internal_dev.c
net/packet/af_packet.c
net/phonet/socket.c
net/qrtr/qrtr.c
net/rds/Kconfig
net/rds/bind.c
net/rds/ib.c
net/rose/af_rose.c
net/rxrpc/ar-internal.h
net/rxrpc/call_accept.c
net/rxrpc/call_object.c
net/rxrpc/conn_client.c
net/rxrpc/conn_object.c
net/rxrpc/conn_service.c
net/rxrpc/peer_event.c
net/rxrpc/peer_object.c
net/rxrpc/recvmsg.c
net/rxrpc/sendmsg.c
net/sched/Kconfig
net/sched/act_api.c
net/sched/act_mirred.c
net/sched/act_mpls.c
net/sched/act_sample.c
net/sched/cls_api.c
net/sched/cls_bpf.c
net/sched/em_meta.c
net/sched/sch_cbq.c
net/sched/sch_cbs.c
net/sched/sch_dsmark.c
net/sched/sch_etf.c
net/sched/sch_generic.c
net/sched/sch_hhf.c
net/sched/sch_htb.c
net/sched/sch_multiq.c
net/sched/sch_netem.c
net/sched/sch_sfb.c
net/sched/sch_sfq.c
net/sched/sch_taprio.c
net/sctp/diag.c
net/sctp/input.c
net/sctp/ipv6.c
net/sctp/sm_make_chunk.c
net/sctp/socket.c
net/smc/af_smc.c
net/smc/smc_core.c
net/smc/smc_pnet.c
net/smc/smc_rx.c
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/backchannel_rqst.c
net/sunrpc/cache.c
net/sunrpc/clnt.c
net/sunrpc/sched.c
net/sunrpc/svc.c
net/sunrpc/xdr.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/backchannel.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/svc_rdma.c
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/sunrpc/xprtsock.c
net/tipc/link.c
net/tipc/msg.c
net/tipc/socket.c
net/unix/af_unix.c
net/vmw_vsock/af_vsock.c
net/vmw_vsock/hyperv_transport.c
net/vmw_vsock/virtio_transport_common.c
net/wireless/chan.c
net/wireless/nl80211.c
net/wireless/reg.c
net/wireless/reg.h
net/wireless/scan.c
net/wireless/util.c
net/wireless/wext-compat.c
net/wireless/wext-sme.c
net/x25/x25_dev.c
net/xdp/xdp_umem.c
net/xdp/xsk.c
net/xfrm/xfrm_input.c
net/xfrm/xfrm_interface.c
net/xfrm/xfrm_output.c
net/xfrm/xfrm_policy.c
samples/bpf/asm_goto_workaround.h
samples/bpf/task_fd_query_user.c
scripts/Kbuild.include
scripts/Makefile
scripts/Makefile.build
scripts/Makefile.lib
scripts/checkpatch.pl
scripts/coccinelle/api/devm_platform_ioremap_resource.cocci [deleted file]
scripts/coccinelle/misc/add_namespace.cocci
scripts/gdb/linux/dmesg.py
scripts/gdb/linux/symbols.py
scripts/gdb/linux/utils.py
scripts/mod/modpost.c
scripts/mod/modpost.h
scripts/namespace.pl
scripts/nsdeps
scripts/recordmcount.h
scripts/setlocalversion
security/Kconfig
security/Makefile
security/integrity/Kconfig
security/integrity/Makefile
security/integrity/digsig.c
security/integrity/ima/Kconfig
security/integrity/ima/Makefile
security/integrity/ima/ima.h
security/integrity/ima/ima_api.c
security/integrity/ima/ima_appraise.c
security/integrity/ima/ima_crypto.c
security/integrity/ima/ima_main.c
security/integrity/ima/ima_modsig.c [new file with mode: 0644]
security/integrity/ima/ima_policy.c
security/integrity/ima/ima_template.c
security/integrity/ima/ima_template_lib.c
security/integrity/ima/ima_template_lib.h
security/integrity/integrity.h
security/keys/trusted.c
security/lockdown/Kconfig [new file with mode: 0644]
security/lockdown/Makefile [new file with mode: 0644]
security/lockdown/lockdown.c [new file with mode: 0644]
security/security.c
security/selinux/ss/services.c
security/smack/smack_access.c
security/smack/smack_lsm.c
sound/core/timer.c
sound/firewire/bebob/bebob_stream.c
sound/firewire/dice/dice-alesis.c
sound/hda/ext/hdac_ext_controller.c
sound/hda/hdac_controller.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_analog.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/soc/atmel/atmel_ssc_dai.c
sound/soc/atmel/atmel_ssc_dai.h
sound/soc/codecs/max98373.c
sound/soc/codecs/msm8916-wcd-digital.c
sound/soc/codecs/pcm3168a.c
sound/soc/codecs/rt5651.c
sound/soc/codecs/rt5682.c
sound/soc/codecs/wm8994.c
sound/soc/codecs/wm_adsp.c
sound/soc/fsl/fsl_sai.c
sound/soc/fsl/fsl_sai.h
sound/soc/intel/boards/sof_rt5682.c
sound/soc/rockchip/rockchip_i2s.c
sound/soc/samsung/arndale_rt5631.c
sound/soc/sh/rcar/core.c
sound/soc/sh/rcar/ssi.c
sound/soc/soc-core.c
sound/soc/soc-pcm.c
sound/soc/soc-topology.c
sound/soc/sof/control.c
sound/soc/sof/intel/Kconfig
sound/soc/sof/intel/bdw.c
sound/soc/sof/intel/byt.c
sound/soc/sof/intel/hda-ctrl.c
sound/soc/sof/intel/hda-loader.c
sound/soc/sof/intel/hda-stream.c
sound/soc/sof/intel/hda.c
sound/soc/sof/intel/hda.h
sound/soc/sof/loader.c
sound/soc/sof/pcm.c
sound/soc/sof/topology.c
sound/soc/stm/stm32_sai_sub.c
sound/soc/ti/Kconfig
sound/usb/pcm.c
sound/usb/quirks.c
sound/usb/validate.c
tools/arch/arm/include/uapi/asm/kvm.h
tools/arch/arm64/include/uapi/asm/kvm.h
tools/arch/s390/include/uapi/asm/kvm.h
tools/arch/x86/include/asm/cpufeatures.h
tools/arch/x86/include/uapi/asm/svm.h
tools/arch/x86/include/uapi/asm/unistd.h
tools/arch/x86/include/uapi/asm/vmx.h
tools/bpf/Makefile
tools/gpio/Makefile
tools/hv/Build [new file with mode: 0644]
tools/hv/Makefile
tools/include/asm/bug.h
tools/include/linux/rbtree.h
tools/include/linux/rbtree_augmented.h
tools/include/uapi/asm-generic/mman-common.h
tools/include/uapi/asm-generic/unistd.h
tools/include/uapi/drm/i915_drm.h
tools/include/uapi/linux/fs.h
tools/include/uapi/linux/fscrypt.h [new file with mode: 0644]
tools/include/uapi/linux/kvm.h
tools/include/uapi/linux/prctl.h
tools/include/uapi/linux/sched.h
tools/include/uapi/linux/usbdevice_fs.h
tools/lib/bpf/Makefile
tools/lib/bpf/btf_dump.c
tools/lib/bpf/libbpf_internal.h
tools/lib/bpf/xsk.c
tools/lib/rbtree.c
tools/lib/subcmd/Makefile
tools/lib/traceevent/Build
tools/lib/traceevent/Documentation/libtraceevent-event_print.txt [new file with mode: 0644]
tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt
tools/lib/traceevent/Documentation/libtraceevent-handle.txt
tools/lib/traceevent/Documentation/libtraceevent-plugins.txt [new file with mode: 0644]
tools/lib/traceevent/Documentation/libtraceevent.txt
tools/lib/traceevent/Makefile
tools/lib/traceevent/event-parse.c
tools/lib/traceevent/event-parse.h
tools/lib/traceevent/plugin_cfg80211.c [deleted file]
tools/lib/traceevent/plugin_function.c [deleted file]
tools/lib/traceevent/plugin_hrtimer.c [deleted file]
tools/lib/traceevent/plugin_jbd2.c [deleted file]
tools/lib/traceevent/plugin_kmem.c [deleted file]
tools/lib/traceevent/plugin_kvm.c [deleted file]
tools/lib/traceevent/plugin_mac80211.c [deleted file]
tools/lib/traceevent/plugin_sched_switch.c [deleted file]
tools/lib/traceevent/plugin_scsi.c [deleted file]
tools/lib/traceevent/plugin_xen.c [deleted file]
tools/lib/traceevent/plugins/Build [new file with mode: 0644]
tools/lib/traceevent/plugins/Makefile [new file with mode: 0644]
tools/lib/traceevent/plugins/plugin_cfg80211.c [new file with mode: 0644]
tools/lib/traceevent/plugins/plugin_function.c [new file with mode: 0644]
tools/lib/traceevent/plugins/plugin_hrtimer.c [new file with mode: 0644]
tools/lib/traceevent/plugins/plugin_jbd2.c [new file with mode: 0644]
tools/lib/traceevent/plugins/plugin_kmem.c [new file with mode: 0644]
tools/lib/traceevent/plugins/plugin_kvm.c [new file with mode: 0644]
tools/lib/traceevent/plugins/plugin_mac80211.c [new file with mode: 0644]
tools/lib/traceevent/plugins/plugin_sched_switch.c [new file with mode: 0644]
tools/lib/traceevent/plugins/plugin_scsi.c [new file with mode: 0644]
tools/lib/traceevent/plugins/plugin_xen.c [new file with mode: 0644]
tools/objtool/check.c
tools/perf/Documentation/asciidoc.conf
tools/perf/Documentation/jitdump-specification.txt
tools/perf/Makefile.config
tools/perf/Makefile.perf
tools/perf/arch/arm/annotate/instructions.c
tools/perf/arch/arm/util/cs-etm.c
tools/perf/arch/arm64/annotate/instructions.c
tools/perf/arch/arm64/util/arm-spe.c
tools/perf/arch/arm64/util/dwarf-regs.c
tools/perf/arch/arm64/util/header.c
tools/perf/arch/arm64/util/unwind-libunwind.c
tools/perf/arch/powerpc/util/dwarf-regs.c
tools/perf/arch/powerpc/util/header.c
tools/perf/arch/powerpc/util/kvm-stat.c
tools/perf/arch/powerpc/util/skip-callchain-idx.c
tools/perf/arch/powerpc/util/sym-handling.c
tools/perf/arch/s390/Makefile
tools/perf/arch/s390/annotate/instructions.c
tools/perf/arch/s390/util/auxtrace.c
tools/perf/arch/s390/util/header.c
tools/perf/arch/s390/util/machine.c
tools/perf/arch/x86/annotate/instructions.c
tools/perf/arch/x86/tests/intel-cqm.c
tools/perf/arch/x86/tests/perf-time-to-tsc.c
tools/perf/arch/x86/tests/rdpmc.c
tools/perf/arch/x86/util/archinsn.c
tools/perf/arch/x86/util/event.c
tools/perf/arch/x86/util/header.c
tools/perf/arch/x86/util/intel-bts.c
tools/perf/arch/x86/util/intel-pt.c
tools/perf/arch/x86/util/machine.c
tools/perf/arch/x86/util/tsc.c
tools/perf/arch/x86/util/unwind-libunwind.c
tools/perf/bench/epoll-ctl.c
tools/perf/bench/epoll-wait.c
tools/perf/bench/futex-hash.c
tools/perf/bench/futex-lock-pi.c
tools/perf/bench/futex-requeue.c
tools/perf/bench/futex-wake-parallel.c
tools/perf/bench/futex-wake.c
tools/perf/bench/numa.c
tools/perf/bench/sched-messaging.c
tools/perf/bench/sched-pipe.c
tools/perf/builtin-annotate.c
tools/perf/builtin-buildid-cache.c
tools/perf/builtin-buildid-list.c
tools/perf/builtin-c2c.c
tools/perf/builtin-config.c
tools/perf/builtin-diff.c
tools/perf/builtin-evlist.c
tools/perf/builtin-inject.c
tools/perf/builtin-kmem.c
tools/perf/builtin-kvm.c
tools/perf/builtin-list.c
tools/perf/builtin-lock.c
tools/perf/builtin-mem.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-timechart.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/check-headers.sh
tools/perf/jvmti/Build
tools/perf/lib/Makefile
tools/perf/lib/core.c
tools/perf/lib/cpumap.c
tools/perf/lib/evlist.c
tools/perf/lib/evsel.c
tools/perf/lib/include/internal/evlist.h
tools/perf/lib/include/internal/evsel.h
tools/perf/lib/include/internal/lib.h
tools/perf/lib/include/internal/mmap.h [new file with mode: 0644]
tools/perf/lib/include/perf/core.h
tools/perf/lib/include/perf/cpumap.h
tools/perf/lib/include/perf/evlist.h
tools/perf/lib/lib.c
tools/perf/lib/libperf.map
tools/perf/lib/tests/test-cpumap.c
tools/perf/lib/tests/test-evlist.c
tools/perf/lib/tests/test-evsel.c
tools/perf/lib/tests/test-threadmap.c
tools/perf/perf.c
tools/perf/pmu-events/README
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/mapfile.csv
tools/perf/pmu-events/arch/powerpc/power8/other.json
tools/perf/pmu-events/arch/s390/cf_m8561/basic.json [deleted file]
tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json [deleted file]
tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json [deleted file]
tools/perf/pmu-events/arch/s390/cf_m8561/extended.json [deleted file]
tools/perf/pmu-events/arch/s390/cf_z15/basic.json [new file with mode: 0644]
tools/perf/pmu-events/arch/s390/cf_z15/crypto.json [new file with mode: 0644]
tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json [new file with mode: 0644]
tools/perf/pmu-events/arch/s390/cf_z15/extended.json [new file with mode: 0644]
tools/perf/pmu-events/arch/s390/cf_z15/transaction.json [new file with mode: 0644]
tools/perf/pmu-events/arch/s390/mapfile.csv
tools/perf/pmu-events/arch/x86/amdfam17h/cache.json
tools/perf/pmu-events/arch/x86/amdfam17h/core.json
tools/perf/pmu-events/jevents.c
tools/perf/tests/backward-ring-buffer.c
tools/perf/tests/bitmap.c
tools/perf/tests/bpf.c
tools/perf/tests/clang.c
tools/perf/tests/code-reading.c
tools/perf/tests/cpumap.c
tools/perf/tests/dso-data.c
tools/perf/tests/dwarf-unwind.c
tools/perf/tests/event-times.c
tools/perf/tests/event_update.c
tools/perf/tests/evsel-roundtrip-name.c
tools/perf/tests/hists_common.c
tools/perf/tests/hists_cumulate.c
tools/perf/tests/hists_link.c
tools/perf/tests/hists_output.c
tools/perf/tests/keep-tracking.c
tools/perf/tests/llvm.c
tools/perf/tests/make
tools/perf/tests/mem2node.c
tools/perf/tests/mmap-basic.c
tools/perf/tests/mmap-thread-lookup.c
tools/perf/tests/openat-syscall-all-cpus.c
tools/perf/tests/openat-syscall-tp-fields.c
tools/perf/tests/parse-events.c
tools/perf/tests/parse-no-sample-id-all.c
tools/perf/tests/perf-hooks.c
tools/perf/tests/perf-record.c
tools/perf/tests/pmu.c
tools/perf/tests/sample-parsing.c
tools/perf/tests/sdt.c
tools/perf/tests/stat.c
tools/perf/tests/sw-clock.c
tools/perf/tests/switch-tracking.c
tools/perf/tests/task-exit.c
tools/perf/tests/thread-map.c
tools/perf/tests/topology.c
tools/perf/tests/vmlinux-kallsyms.c
tools/perf/ui/browser.c
tools/perf/ui/browsers/annotate.c
tools/perf/ui/browsers/header.c
tools/perf/ui/browsers/hists.c
tools/perf/ui/browsers/map.c
tools/perf/ui/browsers/res_sample.c
tools/perf/ui/browsers/scripts.c
tools/perf/ui/gtk/helpline.c
tools/perf/ui/gtk/hists.c
tools/perf/ui/gtk/progress.c
tools/perf/ui/gtk/setup.c
tools/perf/ui/gtk/util.c
tools/perf/ui/helpline.c
tools/perf/ui/hist.c
tools/perf/ui/setup.c
tools/perf/ui/stdio/hist.c
tools/perf/ui/tui/helpline.c
tools/perf/ui/tui/setup.c
tools/perf/ui/tui/util.c
tools/perf/util/Build
tools/perf/util/annotate.c
tools/perf/util/annotate.h
tools/perf/util/arm-spe.c
tools/perf/util/auxtrace.c
tools/perf/util/auxtrace.h
tools/perf/util/bpf-event.c
tools/perf/util/bpf-event.h
tools/perf/util/bpf-loader.c
tools/perf/util/branch.c
tools/perf/util/branch.h
tools/perf/util/build-id.c
tools/perf/util/callchain.c
tools/perf/util/callchain.h
tools/perf/util/cloexec.c
tools/perf/util/copyfile.c [new file with mode: 0644]
tools/perf/util/copyfile.h [new file with mode: 0644]
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
tools/perf/util/cs-etm.c
tools/perf/util/data-convert-bt.c
tools/perf/util/data.c
tools/perf/util/debug.c
tools/perf/util/debug.h
tools/perf/util/demangle-java.c
tools/perf/util/demangle-rust.c
tools/perf/util/dwarf-regs.c
tools/perf/util/env.h
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/evsel_config.h [new file with mode: 0644]
tools/perf/util/evsel_fprintf.c
tools/perf/util/evsel_fprintf.h [new file with mode: 0644]
tools/perf/util/genelf.h
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/hist.h
tools/perf/util/intel-bts.c
tools/perf/util/intel-pt.c
tools/perf/util/jitdump.c
tools/perf/util/kvm-stat.h
tools/perf/util/libunwind/arm64.c
tools/perf/util/libunwind/x86_32.c
tools/perf/util/llvm-utils.c
tools/perf/util/lzma.c
tools/perf/util/machine.c
tools/perf/util/machine.h
tools/perf/util/map.c
tools/perf/util/memswap.h
tools/perf/util/mmap.c
tools/perf/util/mmap.h
tools/perf/util/namespaces.c
tools/perf/util/namespaces.h
tools/perf/util/parse-events.c
tools/perf/util/parse-events.y
tools/perf/util/perf-hooks.c
tools/perf/util/perf_event_attr_fprintf.c [new file with mode: 0644]
tools/perf/util/pmu.c
tools/perf/util/probe-event.c
tools/perf/util/probe-file.c
tools/perf/util/probe-finder.c
tools/perf/util/python-ext-sources
tools/perf/util/python.c
tools/perf/util/record.c
tools/perf/util/rwsem.c
tools/perf/util/s390-cpumsf.c
tools/perf/util/s390-sample-raw.c
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/session.c
tools/perf/util/session.h
tools/perf/util/sort.c
tools/perf/util/srccode.c
tools/perf/util/stat-shadow.c
tools/perf/util/stat.c
tools/perf/util/stat.h
tools/perf/util/svghelper.c
tools/perf/util/symbol-elf.c
tools/perf/util/symbol-minimal.c
tools/perf/util/symbol.c
tools/perf/util/synthetic-events.c [new file with mode: 0644]
tools/perf/util/synthetic-events.h [new file with mode: 0644]
tools/perf/util/target.c
tools/perf/util/top.c
tools/perf/util/trace-event-info.c
tools/perf/util/trace-event-read.c
tools/perf/util/trace-event.c
tools/perf/util/tsc.h
tools/perf/util/unwind-libdw.c
tools/perf/util/unwind-libunwind-local.c
tools/perf/util/usage.c
tools/perf/util/util.c
tools/perf/util/util.h
tools/perf/util/vdso.c
tools/perf/util/zlib.c
tools/power/x86/intel-speed-select/isst-config.c
tools/power/x86/intel-speed-select/isst-core.c
tools/power/x86/intel-speed-select/isst-display.c
tools/power/x86/intel-speed-select/isst.h
tools/testing/nvdimm/test/nfit_test.h
tools/testing/selftests/.gitignore
tools/testing/selftests/Makefile
tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
tools/testing/selftests/bpf/progs/strobemeta.h
tools/testing/selftests/bpf/test_flow_dissector.sh
tools/testing/selftests/bpf/test_lwt_ip_encap.sh
tools/testing/selftests/bpf/test_offload.py
tools/testing/selftests/bpf/test_sysctl.c
tools/testing/selftests/bpf/test_tc_edt.sh
tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
tools/testing/selftests/kexec/test_kexec_file_load.sh
tools/testing/selftests/kselftest/runner.sh
tools/testing/selftests/kselftest_install.sh
tools/testing/selftests/kvm/.gitignore
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/dirty_log_test.c
tools/testing/selftests/kvm/include/kvm_util.h
tools/testing/selftests/kvm/include/x86_64/processor.h
tools/testing/selftests/kvm/include/x86_64/vmx.h
tools/testing/selftests/kvm/lib/aarch64/processor.c
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/kvm_util_internal.h
tools/testing/selftests/kvm/lib/x86_64/processor.c
tools/testing/selftests/kvm/lib/x86_64/ucall.c
tools/testing/selftests/kvm/lib/x86_64/vmx.c
tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
tools/testing/selftests/kvm/x86_64/sync_regs_test.c
tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
tools/testing/selftests/livepatch/config
tools/testing/selftests/membarrier/.gitignore
tools/testing/selftests/membarrier/Makefile
tools/testing/selftests/membarrier/membarrier_test.c [deleted file]
tools/testing/selftests/membarrier/membarrier_test_impl.h [new file with mode: 0644]
tools/testing/selftests/membarrier/membarrier_test_multi_thread.c [new file with mode: 0644]
tools/testing/selftests/membarrier/membarrier_test_single_thread.c [new file with mode: 0644]
tools/testing/selftests/net/.gitignore
tools/testing/selftests/net/fib_nexthop_multiprefix.sh
tools/testing/selftests/net/fib_nexthops.sh
tools/testing/selftests/net/fib_tests.sh
tools/testing/selftests/net/l2tp.sh [changed mode: 0644->0755]
tools/testing/selftests/net/reuseport_dualstack.c
tools/testing/selftests/net/udpgso.c
tools/testing/selftests/pidfd/Makefile
tools/testing/selftests/powerpc/mm/Makefile
tools/testing/selftests/powerpc/mm/tlbie_test.c [new file with mode: 0644]
tools/testing/selftests/powerpc/tm/.gitignore
tools/testing/selftests/powerpc/tm/Makefile
tools/testing/selftests/powerpc/tm/tm-poison.c [new file with mode: 0644]
tools/testing/selftests/rtc/settings [new file with mode: 0644]
tools/testing/selftests/seccomp/seccomp_bpf.c
tools/testing/selftests/tpm2/Makefile
tools/testing/selftests/vm/gup_benchmark.c
tools/testing/selftests/watchdog/watchdog-test.c
tools/usb/usbip/libsrc/usbip_device_driver.c
tools/virtio/crypto/hash.h [new file with mode: 0644]
tools/virtio/linux/dma-mapping.h
tools/virtio/xen/xen.h [new file with mode: 0644]
usr/Makefile
usr/include/Makefile
virt/kvm/arm/pmu.c
virt/kvm/arm/vgic/trace.h
virt/kvm/kvm_main.c

index edcac87..83d7e75 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -196,7 +196,8 @@ Oleksij Rempel <linux@rempel-privat.de> <o.rempel@pengutronix.de>
 Oleksij Rempel <linux@rempel-privat.de> <ore@pengutronix.de>
 Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
 Patrick Mochel <mochel@digitalimplant.org>
-Paul Burton <paul.burton@mips.com> <paul.burton@imgtec.com>
+Paul Burton <paulburton@kernel.org> <paul.burton@imgtec.com>
+Paul Burton <paulburton@kernel.org> <paul.burton@mips.com>
 Peter A Jonsson <pj@ludd.ltu.se>
 Peter Oruba <peter@oruba.de>
 Peter Oruba <peter.oruba@amd.com>
@@ -229,6 +230,7 @@ Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
 Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
 Shuah Khan <shuah@kernel.org> <shuahkh@osg.samsung.com>
 Shuah Khan <shuah@kernel.org> <shuah.kh@samsung.com>
+Simon Arlott <simon@octiron.net> <simon@fire.lp0.eu>
 Simon Kelley <simon@thekelleys.org.uk>
 Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
 Stephen Hemminger <shemminger@osdl.org>
diff --git a/CREDITS b/CREDITS
index 8b67a85..031605d 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -1637,6 +1637,10 @@ S: Panoramastrasse 18
 S: D-69126 Heidelberg
 S: Germany
 
+N: Simon Horman
+M: horms@verge.net.au
+D: Renesas ARM/ARM64 SoC maintainer
+
 N: Christopher Horn
 E: chorn@warwick.net
 D: Miscellaneous sysctl hacks
index fc376a3..29ebe9a 100644 (file)
@@ -37,7 +37,7 @@ Description:
                        euid:= decimal value
                        fowner:= decimal value
                lsm:    are LSM specific
-               option: appraise_type:= [imasig]
+               option: appraise_type:= [imasig] [imasig|modsig]
                        template:= name of a defined IMA template type
                        (eg, ima-ng). Only valid when action is "measure".
                        pcr:= decimal value
@@ -105,3 +105,7 @@ Description:
 
                        measure func=KEXEC_KERNEL_CHECK pcr=4
                        measure func=KEXEC_INITRAMFS_CHECK pcr=5
+
+               Example of appraise rule allowing modsig appended signatures:
+
+                       appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig
diff --git a/Documentation/ABI/testing/sysfs-class-backlight b/Documentation/ABI/testing/sysfs-class-backlight
new file mode 100644 (file)
index 0000000..3ab175a
--- /dev/null
@@ -0,0 +1,26 @@
+What:          /sys/class/backlight/<backlight>/scale
+Date:          July 2019
+KernelVersion: 5.4
+Contact:       Daniel Thompson <daniel.thompson@linaro.org>
+Description:
+               Description of the scale of the brightness curve.
+
+               The human eye senses brightness approximately logarithmically,
+               hence linear changes in brightness are perceived as being
+               non-linear. To achieve a linear perception of brightness changes
+               controls like sliders need to apply a logarithmic mapping for
+               backlights with a linear brightness curve.
+
+               Possible values of the attribute are:
+
+               unknown
+                 The scale of the brightness curve is unknown.
+
+               linear
+                 The brightness changes linearly with each step. Brightness
+                 controls should apply a logarithmic mapping for a linear
+                 perception.
+
+               non-linear
+                 The brightness changes non-linearly with each step. Brightness
+                 controls should use a linear mapping for a linear perception.
index 6317ade..675f9b5 100644 (file)
@@ -72,3 +72,37 @@ Description:
                It is a read/write file. When read, the currently assigned
                pretimeout governor is returned.  When written, it sets
                the pretimeout governor.
+
+What:          /sys/class/watchdog/watchdog1/access_cs0
+Date:          August 2019
+Contact:       Ivan Mikhaylov <i.mikhaylov@yadro.com>,
+               Alexander Amelkin <a.amelkin@yadro.com>
+Description:
+               It is a read/write file. This attribute exists only if the
+               system has booted from the alternate flash chip due to
+               expiration of a watchdog timer of AST2400/AST2500 when
+               alternate boot function was enabled with 'aspeed,alt-boot'
+               devicetree option for that watchdog or with an appropriate
+               h/w strapping (for WDT2 only).
+
+               At alternate flash the 'access_cs0' sysfs node provides:
+                       ast2400: a way to get access to the primary SPI flash
+                               chip at CS0 after booting from the alternate
+                               chip at CS1.
+                       ast2500: a way to restore the normal address mapping
+                               from (CS0->CS1, CS1->CS0) to (CS0->CS0,
+                               CS1->CS1).
+
+               Clearing the boot code selection and timeout counter also
+               resets to the initial state the chip select line mapping. When
+               the SoC is in normal mapping state (i.e. booted from CS0),
+               clearing those bits does nothing for both versions of the SoC.
+               For alternate boot mode (booted from CS1 due to wdt2
+               expiration) the behavior differs as described above.
+
+               This option can be used with wdt2 (watchdog1) only.
+
+               When read, the current status of the boot code selection is
+               shown. When written with any non-zero value, it clears
+               the boot code selection and the timeout counter, which results
+               in chipselect reset for AST2400/AST2500.
index 29601d9..ed35833 100644 (file)
@@ -429,10 +429,15 @@ KernelVersion:    2.6.22
 Contact:       Pekka Enberg <penberg@cs.helsinki.fi>,
                Christoph Lameter <cl@linux-foundation.org>
 Description:
-               The shrink file is written when memory should be reclaimed from
-               a cache.  Empty partial slabs are freed and the partial list is
-               sorted so the slabs with the fewest available objects are used
-               first.
+               The shrink file is used to reclaim unused slab cache
+               memory from a cache.  Empty per-cpu or partial slabs
+               are freed and the partial list is sorted so the slabs
+               with the fewest available objects are used first.
+               It only accepts a value of "1" on write for shrinking
+               the cache. Other input values are considered invalid.
+               Shrinking slab caches might be expensive and can
+               adversely impact other running applications.  So it
+               should be used with care.
 
 What:          /sys/kernel/slab/cache/slab_size
 Date:          May 2007
index 41bdc03..0ae4f56 100644 (file)
@@ -85,8 +85,10 @@ Brief summary of control files.
  memory.oom_control                 set/show oom controls.
  memory.numa_stat                   show the number of memory usage per numa
                                     node
-
  memory.kmem.limit_in_bytes          set/show hard limit for kernel memory
+                                     This knob is deprecated and shouldn't be
+                                     used. It is planned that this be removed in
+                                     the foreseeable future.
  memory.kmem.usage_in_bytes          show current kernel memory allocation
  memory.kmem.failcnt                 show the number of kernel memory usage
                                     hits limits
index 0fa8c0e..5361ebe 100644 (file)
@@ -615,8 +615,8 @@ on an IO device and is an example of this type.
 Protections
 -----------
 
-A cgroup is protected to be allocated upto the configured amount of
-the resource if the usages of all its ancestors are under their
+A cgroup is protected upto the configured amount of the resource
+as long as the usages of all its ancestors are under their
 protected levels.  Protections can be hard guarantees or best effort
 soft boundaries.  Protections can also be over-committed in which case
 only upto the amount available to the parent is protected among
@@ -1096,7 +1096,10 @@ PAGE_SIZE multiple when read back.
        is within its effective min boundary, the cgroup's memory
        won't be reclaimed under any conditions. If there is no
        unprotected reclaimable memory available, OOM killer
-       is invoked.
+       is invoked. Above the effective min boundary (or
+       effective low boundary if it is higher), pages are reclaimed
+       proportionally to the overage, reducing reclaim pressure for
+       smaller overages.
 
        Effective min boundary is limited by memory.min values of
        all ancestor cgroups. If there is memory.min overcommitment
@@ -1118,7 +1121,10 @@ PAGE_SIZE multiple when read back.
        Best-effort memory protection.  If the memory usage of a
        cgroup is within its effective low boundary, the cgroup's
        memory won't be reclaimed unless memory can be reclaimed
-       from unprotected cgroups.
+       from unprotected cgroups.  Above the effective low boundary (or
+       effective min boundary if it is higher), pages are reclaimed
+       proportionally to the overage, reducing reclaim pressure for
+       smaller overages.
 
        Effective low boundary is limited by memory.low values of
        all ancestor cgroups. If there is memory.low overcommitment
@@ -2482,8 +2488,10 @@ system performance due to overreclaim, to the point where the feature
 becomes self-defeating.
 
 The memory.low boundary on the other hand is a top-down allocated
-reserve.  A cgroup enjoys reclaim protection when it's within its low,
-which makes delegation of subtrees possible.
+reserve.  A cgroup enjoys reclaim protection when it's within its
+effective low, which makes delegation of subtrees possible. It also
+enjoys having reclaim pressure proportional to its overage when
+above its effective low.
 
 The original high boundary, the hard limit, is defined as a strict
 limit that can not budge, even if the OOM killer has to be called.
index d381478..a84a83f 100644 (file)
                        enables the feature at boot time. By default, it is
                        disabled and the system will work mostly the same as a
                        kernel built without CONFIG_DEBUG_PAGEALLOC.
+                       Note: to get most of debug_pagealloc error reports, it's
+                       useful to also enable the page_owner functionality.
                        on: enable the feature
 
        debugpat        [X86] Enable PAT debugging
        lockd.nlm_udpport=M     [NFS] Assign UDP port.
                        Format: <integer>
 
+       lockdown=       [SECURITY]
+                       { integrity | confidentiality }
+                       Enable the kernel lockdown feature. If set to
+                       integrity, kernel features that allow userland to
+                       modify the running kernel are disabled. If set to
+                       confidentiality, kernel features that allow userland
+                       to extract confidential information from the kernel
+                       are also disabled.
+
        locktorture.nreaders_stress= [KNL]
                        Set the number of locking read-acquisition kthreads.
                        Defaults to being automatically set based on the
                                specify the device is described above.
                                If <order of align> is not specified,
                                PAGE_SIZE is used as alignment.
-                               PCI-PCI bridge can be specified, if resource
+                               A PCI-PCI bridge can be specified if resource
                                windows need to be expanded.
                                To specify the alignment for several
                                instances of a device, the PCI vendor,
                                device, subvendor, and subdevice may be
-                               specified, e.g., 4096@pci:8086:9c22:103c:198f
+                               specified, e.g., 12@pci:8086:9c22:103c:198f
+                               for 4096-byte alignment.
                ecrc=           Enable/disable PCIe ECRC (transaction layer
                                end-to-end CRC checking).
                                bios: Use BIOS/firmware settings. This is the
                                the unplug protocol
                        never -- do not unplug even if version check succeeds
 
+       xen_legacy_crash        [X86,XEN]
+                       Crash from Xen panic notifier, without executing late
+                       panic() code such as dumping handler.
+
        xen_nopvspin    [X86,XEN]
                        Disables the ticketlock slowpath using Xen PV
                        optimizations.
index b040909..02e0217 100644 (file)
@@ -154,11 +154,18 @@ return virtual addresses to userspace from a 48-bit range.
 
 Software can "opt-in" to receiving VAs from a 52-bit space by
 specifying an mmap hint parameter that is larger than 48-bit.
+
 For example:
-    maybe_high_address = mmap(~0UL, size, prot, flags,...);
+
+.. code-block:: c
+
+   maybe_high_address = mmap(~0UL, size, prot, flags,...);
 
 It is also possible to build a debug kernel that returns addresses
 from a 52-bit space by enabling the following kernel config options:
+
+.. code-block:: sh
+
    CONFIG_EXPERT=y && CONFIG_ARM64_FORCE_52BIT=y
 
 Note that this option is only intended for debugging applications
index 17ea3fe..5a09661 100644 (file)
@@ -91,6 +91,11 @@ stable kernels.
 | ARM            | MMU-500         | #841119,826419  | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
 +----------------+-----------------+-----------------+-----------------------------+
+| Broadcom       | Brahma-B53      | N/A             | ARM64_ERRATUM_845719        |
++----------------+-----------------+-----------------+-----------------------------+
+| Broadcom       | Brahma-B53      | N/A             | ARM64_ERRATUM_843419        |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX ITS    | #22375,24313    | CAVIUM_ERRATUM_22375        |
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
@@ -107,6 +112,8 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX2 SMMUv3| #126            | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
+| Cavium         | ThunderX2 Core  | #219            | CAVIUM_TX2_ERRATUM_219      |
++----------------+-----------------+-----------------+-----------------------------+
 +----------------+-----------------+-----------------+-----------------------------+
 | Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585         |
 +----------------+-----------------+-----------------+-----------------------------+
@@ -124,7 +131,7 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Qualcomm Tech. | Kryo/Falkor v1  | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 +----------------+-----------------+-----------------+-----------------------------+
-| Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
+| Qualcomm Tech. | Kryo/Falkor v1  | E1009           | QCOM_FALKOR_ERRATUM_1009    |
 +----------------+-----------------+-----------------+-----------------------------+
 | Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
 +----------------+-----------------+-----------------+-----------------------------+
index fa16a05..ab0eae1 100644 (file)
@@ -38,6 +38,7 @@ Core utilities
    protection-keys
    ../RCU/index
    gcc-plugins
+   symbol-namespaces
 
 
 Interfaces for kernel debugging
index 08af5ca..f77de49 100644 (file)
@@ -42,6 +42,9 @@ String Manipulation
 .. kernel-doc:: lib/string.c
    :export:
 
+.. kernel-doc:: include/linux/string.h
+   :internal:
+
 .. kernel-doc:: mm/util.c
    :functions: kstrdup kstrdup_const kstrndup kmemdup kmemdup_nul memdup_user
                vmemdup_user strndup_user memdup_user_nul
index 7744aa3..939e3df 100644 (file)
@@ -98,6 +98,10 @@ limited. The actual limit depends on the hardware and the kernel
 configuration, but it is a good practice to use `kmalloc` for objects
 smaller than page size.
 
+The address of a chunk allocated with `kmalloc` is aligned to at least
+ARCH_KMALLOC_MINALIGN bytes.  For sizes which are a power of two, the
+alignment is also guaranteed to be at least the respective size.
+
 For large allocations you can use :c:func:`vmalloc` and
 :c:func:`vzalloc`, or directly request pages from the page
 allocator. The memory allocated by `vmalloc` and related functions is
diff --git a/Documentation/core-api/symbol-namespaces.rst b/Documentation/core-api/symbol-namespaces.rst
new file mode 100644 (file)
index 0000000..982ed7b
--- /dev/null
@@ -0,0 +1,154 @@
+=================
+Symbol Namespaces
+=================
+
+The following document describes how to use Symbol Namespaces to structure the
+export surface of in-kernel symbols exported through the family of
+EXPORT_SYMBOL() macros.
+
+.. Table of Contents
+
+       === 1 Introduction
+       === 2 How to define Symbol Namespaces
+          --- 2.1 Using the EXPORT_SYMBOL macros
+          --- 2.2 Using the DEFAULT_SYMBOL_NAMESPACE define
+       === 3 How to use Symbols exported in Namespaces
+       === 4 Loading Modules that use namespaced Symbols
+       === 5 Automatically creating MODULE_IMPORT_NS statements
+
+1. Introduction
+===============
+
+Symbol Namespaces have been introduced as a means to structure the export
+surface of the in-kernel API. It allows subsystem maintainers to partition
+their exported symbols into separate namespaces. That is useful for
+documentation purposes (think of the SUBSYSTEM_DEBUG namespace) as well as for
+limiting the availability of a set of symbols for use in other parts of the
+kernel. As of today, modules that make use of symbols exported into namespaces,
+are required to import the namespace. Otherwise the kernel will, depending on
+its configuration, reject loading the module or warn about a missing import.
+
+2. How to define Symbol Namespaces
+==================================
+
+Symbols can be exported into namespace using different methods. All of them are
+changing the way EXPORT_SYMBOL and friends are instrumented to create ksymtab
+entries.
+
+2.1 Using the EXPORT_SYMBOL macros
+==================================
+
+In addition to the macros EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(), that allow
+exporting of kernel symbols to the kernel symbol table, variants of these are
+available to export symbols into a certain namespace: EXPORT_SYMBOL_NS() and
+EXPORT_SYMBOL_NS_GPL(). They take one additional argument: the namespace.
+Please note that due to macro expansion that argument needs to be a
+preprocessor symbol. E.g. to export the symbol `usb_stor_suspend` into the
+namespace `USB_STORAGE`, use::
+
+       EXPORT_SYMBOL_NS(usb_stor_suspend, USB_STORAGE);
+
+The corresponding ksymtab entry struct `kernel_symbol` will have the member
+`namespace` set accordingly. A symbol that is exported without a namespace will
+refer to `NULL`. There is no default namespace if none is defined. `modpost`
+and kernel/module.c make use the namespace at build time or module load time,
+respectively.
+
+2.2 Using the DEFAULT_SYMBOL_NAMESPACE define
+=============================================
+
+Defining namespaces for all symbols of a subsystem can be very verbose and may
+become hard to maintain. Therefore a default define (DEFAULT_SYMBOL_NAMESPACE)
+is been provided, that, if set, will become the default for all EXPORT_SYMBOL()
+and EXPORT_SYMBOL_GPL() macro expansions that do not specify a namespace.
+
+There are multiple ways of specifying this define and it depends on the
+subsystem and the maintainer's preference, which one to use. The first option
+is to define the default namespace in the `Makefile` of the subsystem. E.g. to
+export all symbols defined in usb-common into the namespace USB_COMMON, add a
+line like this to drivers/usb/common/Makefile::
+
+       ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=USB_COMMON
+
+That will affect all EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL() statements. A
+symbol exported with EXPORT_SYMBOL_NS() while this definition is present, will
+still be exported into the namespace that is passed as the namespace argument
+as this argument has preference over a default symbol namespace.
+
+A second option to define the default namespace is directly in the compilation
+unit as preprocessor statement. The above example would then read::
+
+       #undef  DEFAULT_SYMBOL_NAMESPACE
+       #define DEFAULT_SYMBOL_NAMESPACE USB_COMMON
+
+within the corresponding compilation unit before any EXPORT_SYMBOL macro is
+used.
+
+3. How to use Symbols exported in Namespaces
+============================================
+
+In order to use symbols that are exported into namespaces, kernel modules need
+to explicitly import these namespaces. Otherwise the kernel might reject to
+load the module. The module code is required to use the macro MODULE_IMPORT_NS
+for the namespaces it uses symbols from. E.g. a module using the
+usb_stor_suspend symbol from above, needs to import the namespace USB_STORAGE
+using a statement like::
+
+       MODULE_IMPORT_NS(USB_STORAGE);
+
+This will create a `modinfo` tag in the module for each imported namespace.
+This has the side effect, that the imported namespaces of a module can be
+inspected with modinfo::
+
+       $ modinfo drivers/usb/storage/ums-karma.ko
+       [...]
+       import_ns:      USB_STORAGE
+       [...]
+
+
+It is advisable to add the MODULE_IMPORT_NS() statement close to other module
+metadata definitions like MODULE_AUTHOR() or MODULE_LICENSE(). Refer to section
+5. for a way to create missing import statements automatically.
+
+4. Loading Modules that use namespaced Symbols
+==============================================
+
+At module loading time (e.g. `insmod`), the kernel will check each symbol
+referenced from the module for its availability and whether the namespace it
+might be exported to has been imported by the module. The default behaviour of
+the kernel is to reject loading modules that don't specify sufficient imports.
+An error will be logged and loading will be failed with EINVAL. In order to
+allow loading of modules that don't satisfy this precondition, a configuration
+option is available: Setting MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS=y will
+enable loading regardless, but will emit a warning.
+
+5. Automatically creating MODULE_IMPORT_NS statements
+=====================================================
+
+Missing namespaces imports can easily be detected at build time. In fact,
+modpost will emit a warning if a module uses a symbol from a namespace
+without importing it.
+MODULE_IMPORT_NS() statements will usually be added at a definite location
+(along with other module meta data). To make the life of module authors (and
+subsystem maintainers) easier, a script and make target is available to fixup
+missing imports. Fixing missing imports can be done with::
+
+       $ make nsdeps
+
+A typical scenario for module authors would be::
+
+       - write code that depends on a symbol from a not imported namespace
+       - `make`
+       - notice the warning of modpost telling about a missing import
+       - run `make nsdeps` to add the import to the correct code location
+
+For subsystem maintainers introducing a namespace, the steps are very similar.
+Again, `make nsdeps` will eventually add the missing namespace imports for
+in-tree modules::
+
+       - move or add symbols to a namespace (e.g. with EXPORT_SYMBOL_NS())
+       - `make` (preferably with an allmodconfig to cover all in-kernel
+         modules)
+       - notice the warning of modpost telling about a missing import
+       - run `make nsdeps` to add the import to the correct code location
+
index b72d07d..5252961 100644 (file)
@@ -41,6 +41,9 @@ smaller binary while the latter is 1.1 - 2 times faster.
 Both KASAN modes work with both SLUB and SLAB memory allocators.
 For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
 
+To augment reports with last allocation and freeing stack of the physical page,
+it is recommended to enable also CONFIG_PAGE_OWNER and boot with page_owner=on.
+
 To disable instrumentation for specific files or directories, add a line
 similar to the following to the respective kernel Makefile:
 
index 2560490..ecdfdc9 100644 (file)
@@ -89,6 +89,22 @@ To build, save output files in a separate directory with KBUILD_OUTPUT ::
 
   $ export KBUILD_OUTPUT=/tmp/kselftest; make TARGETS="size timers" kselftest
 
+Additionally you can use the "SKIP_TARGETS" variable on the make command
+line to specify one or more targets to exclude from the TARGETS list.
+
+To run all tests but a single subsystem::
+
+  $ make -C tools/testing/selftests SKIP_TARGETS=ptrace run_tests
+
+You can specify multiple tests to skip::
+
+  $  make SKIP_TARGETS="size timers" kselftest
+
+You can also specify a restricted list of tests to run together with a
+dedicated skiplist::
+
+  $  make TARGETS="bpf breakpoints size timers" SKIP_TARGETS=bpf kselftest
+
 See the top-level tools/testing/selftests/Makefile for the list of all
 possible targets.
 
index c82c5e5..9c7e703 100644 (file)
@@ -496,12 +496,12 @@ properties:
 
       - description: Theobroma Systems RK3368-uQ7 with Haikou baseboard
         items:
-          - const: tsd,rk3368-uq7-haikou
+          - const: tsd,rk3368-lion-haikou
           - const: rockchip,rk3368
 
       - description: Theobroma Systems RK3399-Q7 with Haikou baseboard
         items:
-          - const: tsd,rk3399-q7-haikou
+          - const: tsd,rk3399-puma-haikou
           - const: rockchip,rk3399
 
       - description: Tronsmart Orion R68 Meta
index 3248595..f04870d 100644 (file)
@@ -85,4 +85,5 @@ examples:
                         <&pd IMX_SC_R_DSP_RAM>;
         mbox-names = "txdb0", "txdb1", "rxdb0", "rxdb1";
         mboxes = <&lsio_mu13 2 0>, <&lsio_mu13 2 1>, <&lsio_mu13 3 0>, <&lsio_mu13 3 1>;
+        memory-region = <&dsp_reserved>;
     };
index e9de375..c9a6587 100644 (file)
@@ -1,7 +1,9 @@
 Broadcom BCM2835 I2C controller
 
 Required properties:
-- compatible : Should be "brcm,bcm2835-i2c".
+- compatible : Should be one of:
+       "brcm,bcm2711-i2c"
+       "brcm,bcm2835-i2c"
 - reg: Should contain register location and length.
 - interrupts: Should contain interrupt.
 - clocks : The clock feeding the I2C controller.
diff --git a/Documentation/devicetree/bindings/i2c/i2c-emev2.txt b/Documentation/devicetree/bindings/i2c/i2c-emev2.txt
deleted file mode 100644 (file)
index 5ed1ea1..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-Device tree configuration for Renesas EMEV2 IIC controller
-
-Required properties:
-- compatible      : "renesas,iic-emev2"
-- reg             : address start and address range size of device
-- interrupts      : specifier for the IIC controller interrupt
-- clocks          : phandle to the IP core SCLK
-- clock-names     : must be "sclk"
-- #address-cells  : should be <1>
-- #size-cells     : should be <0>
-
-Example:
-
-       iic0: i2c@e0070000 {
-               #address-cells = <1>;
-               #size-cells = <0>;
-               compatible = "renesas,iic-emev2";
-               reg = <0xe0070000 0x28>;
-               interrupts = <0 32 IRQ_TYPE_EDGE_RISING>;
-               clocks = <&iic0_sclk>;
-               clock-names = "sclk";
-       };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-rcar.txt b/Documentation/devicetree/bindings/i2c/i2c-rcar.txt
deleted file mode 100644 (file)
index 3ee5e8f..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-I2C for R-Car platforms
-
-Required properties:
-- compatible:
-       "renesas,i2c-r8a7743" if the device is a part of a R8A7743 SoC.
-       "renesas,i2c-r8a7744" if the device is a part of a R8A7744 SoC.
-       "renesas,i2c-r8a7745" if the device is a part of a R8A7745 SoC.
-       "renesas,i2c-r8a77470" if the device is a part of a R8A77470 SoC.
-       "renesas,i2c-r8a774a1" if the device is a part of a R8A774A1 SoC.
-       "renesas,i2c-r8a774c0" if the device is a part of a R8A774C0 SoC.
-       "renesas,i2c-r8a7778" if the device is a part of a R8A7778 SoC.
-       "renesas,i2c-r8a7779" if the device is a part of a R8A7779 SoC.
-       "renesas,i2c-r8a7790" if the device is a part of a R8A7790 SoC.
-       "renesas,i2c-r8a7791" if the device is a part of a R8A7791 SoC.
-       "renesas,i2c-r8a7792" if the device is a part of a R8A7792 SoC.
-       "renesas,i2c-r8a7793" if the device is a part of a R8A7793 SoC.
-       "renesas,i2c-r8a7794" if the device is a part of a R8A7794 SoC.
-       "renesas,i2c-r8a7795" if the device is a part of a R8A7795 SoC.
-       "renesas,i2c-r8a7796" if the device is a part of a R8A7796 SoC.
-       "renesas,i2c-r8a77965" if the device is a part of a R8A77965 SoC.
-       "renesas,i2c-r8a77970" if the device is a part of a R8A77970 SoC.
-       "renesas,i2c-r8a77980" if the device is a part of a R8A77980 SoC.
-       "renesas,i2c-r8a77990" if the device is a part of a R8A77990 SoC.
-       "renesas,i2c-r8a77995" if the device is a part of a R8A77995 SoC.
-       "renesas,rcar-gen1-i2c" for a generic R-Car Gen1 compatible device.
-       "renesas,rcar-gen2-i2c" for a generic R-Car Gen2 or RZ/G1 compatible
-                               device.
-       "renesas,rcar-gen3-i2c" for a generic R-Car Gen3 or RZ/G2 compatible
-                               device.
-       "renesas,i2c-rcar" (deprecated)
-
-       When compatible with the generic version, nodes must list the
-       SoC-specific version corresponding to the platform first followed
-       by the generic version.
-
-- reg: physical base address of the controller and length of memory mapped
-  region.
-- interrupts: interrupt specifier.
-
-Optional properties:
-- clock-frequency: desired I2C bus clock frequency in Hz. The absence of this
-  property indicates the default frequency 100 kHz.
-- clocks: clock specifier.
-- dmas: Must contain a list of two references to DMA specifiers, one for
-  transmission, and one for reception.
-- dma-names: Must contain a list of two DMA names, "tx" and "rx".
-
-- i2c-scl-falling-time-ns: see i2c.txt
-- i2c-scl-internal-delay-ns: see i2c.txt
-- i2c-scl-rising-time-ns: see i2c.txt
-
-Examples :
-
-i2c0: i2c@e6508000 {
-       #address-cells = <1>;
-       #size-cells = <0>;
-       compatible = "renesas,i2c-r8a7791", "renesas,rcar-gen2-i2c";
-       reg = <0 0xe6508000 0 0x40>;
-       interrupts = <0 287 IRQ_TYPE_LEVEL_HIGH>;
-       clocks = <&mstp9_clks R8A7791_CLK_I2C0>;
-       clock-frequency = <400000>;
-};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-riic.txt b/Documentation/devicetree/bindings/i2c/i2c-riic.txt
deleted file mode 100644 (file)
index e26fe3a..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-Device tree configuration for Renesas RIIC driver
-
-Required properties:
-- compatible      :
-       "renesas,riic-r7s72100" if the device is a part of a R7S72100 SoC.
-       "renesas,riic-r7s9210" if the device is a part of a R7S9210 SoC.
-       "renesas,riic-rz" for a generic RZ/A compatible device.
-- reg             : address start and address range size of device
-- interrupts      : 8 interrupts (TEI, RI, TI, SPI, STI, NAKI, ALI, TMOI)
-- clock-frequency : frequency of bus clock in Hz
-- #address-cells  : should be <1>
-- #size-cells     : should be <0>
-
-Pinctrl properties might be needed, too. See there.
-
-Example:
-
-       i2c0: i2c@fcfee000 {
-               compatible = "renesas,riic-r7s72100", "renesas,riic-rz";
-               reg = <0xfcfee000 0x44>;
-               interrupts = <0 157 IRQ_TYPE_LEVEL_HIGH>,
-                            <0 158 IRQ_TYPE_EDGE_RISING>,
-                            <0 159 IRQ_TYPE_EDGE_RISING>,
-                            <0 160 IRQ_TYPE_LEVEL_HIGH>,
-                            <0 161 IRQ_TYPE_LEVEL_HIGH>,
-                            <0 162 IRQ_TYPE_LEVEL_HIGH>,
-                            <0 163 IRQ_TYPE_LEVEL_HIGH>,
-                            <0 164 IRQ_TYPE_LEVEL_HIGH>;
-               clock-frequency = <100000>;
-               #address-cells = <1>;
-               #size-cells = <0>;
-       };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt b/Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt
deleted file mode 100644 (file)
index 202602e..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-Device tree configuration for Renesas IIC (sh_mobile) driver
-
-Required properties:
-- compatible      :
-                       - "renesas,iic-r8a73a4" (R-Mobile APE6)
-                       - "renesas,iic-r8a7740" (R-Mobile A1)
-                       - "renesas,iic-r8a7743" (RZ/G1M)
-                       - "renesas,iic-r8a7744" (RZ/G1N)
-                       - "renesas,iic-r8a7745" (RZ/G1E)
-                       - "renesas,iic-r8a774a1" (RZ/G2M)
-                       - "renesas,iic-r8a774c0" (RZ/G2E)
-                       - "renesas,iic-r8a7790" (R-Car H2)
-                       - "renesas,iic-r8a7791" (R-Car M2-W)
-                       - "renesas,iic-r8a7792" (R-Car V2H)
-                       - "renesas,iic-r8a7793" (R-Car M2-N)
-                       - "renesas,iic-r8a7794" (R-Car E2)
-                       - "renesas,iic-r8a7795" (R-Car H3)
-                       - "renesas,iic-r8a7796" (R-Car M3-W)
-                       - "renesas,iic-r8a77965" (R-Car M3-N)
-                       - "renesas,iic-r8a77990" (R-Car E3)
-                       - "renesas,iic-sh73a0" (SH-Mobile AG5)
-                       - "renesas,rcar-gen2-iic" (generic R-Car Gen2 or RZ/G1
-                                                       compatible device)
-                       - "renesas,rcar-gen3-iic" (generic R-Car Gen3 or RZ/G2
-                                                       compatible device)
-                       - "renesas,rmobile-iic" (generic device)
-
-                       When compatible with a generic R-Car version, nodes
-                       must list the SoC-specific version corresponding to
-                       the platform first followed by the generic R-Car
-                       version.
-
-                       When compatible with "renesas,rmobile-iic" it should
-                       be the last compatibility string listed.
-
-                       The r8a77990 (R-Car E3) and r8a774c0 (RZ/G2E)
-                       controllers are not considered compatible with
-                       "renesas,rcar-gen3-iic" or "renesas,rmobile-iic"
-                       due to the absence of automatic transmission registers.
-
-- reg             : address start and address range size of device
-- interrupts      : interrupt of device
-- clocks          : clock for device
-- #address-cells  : should be <1>
-- #size-cells     : should be <0>
-
-Optional properties:
-- clock-frequency : frequency of bus clock in Hz. Default 100kHz if unset.
-- dmas            : Must contain a list of two references to DMA
-                   specifiers, one for transmission, and one for
-                   reception.
-- dma-names       : Must contain a list of two DMA names, "tx" and "rx".
-
-
-Pinctrl properties might be needed, too. See there.
-
-Example:
-
-       iic0: i2c@e6500000 {
-               compatible = "renesas,iic-r8a7790", "renesas,rcar-gen2-iic",
-                            "renesas,rmobile-iic";
-               reg = <0 0xe6500000 0 0x425>;
-               interrupts = <0 174 IRQ_TYPE_LEVEL_HIGH>;
-               clocks = <&mstp3_clks R8A7790_CLK_IIC0>;
-               clock-frequency = <400000>;
-               #address-cells = <1>;
-               #size-cells = <0>;
-       };
diff --git a/Documentation/devicetree/bindings/i2c/renesas,i2c.txt b/Documentation/devicetree/bindings/i2c/renesas,i2c.txt
new file mode 100644 (file)
index 0000000..3ee5e8f
--- /dev/null
@@ -0,0 +1,62 @@
+I2C for R-Car platforms
+
+Required properties:
+- compatible:
+       "renesas,i2c-r8a7743" if the device is a part of a R8A7743 SoC.
+       "renesas,i2c-r8a7744" if the device is a part of a R8A7744 SoC.
+       "renesas,i2c-r8a7745" if the device is a part of a R8A7745 SoC.
+       "renesas,i2c-r8a77470" if the device is a part of a R8A77470 SoC.
+       "renesas,i2c-r8a774a1" if the device is a part of a R8A774A1 SoC.
+       "renesas,i2c-r8a774c0" if the device is a part of a R8A774C0 SoC.
+       "renesas,i2c-r8a7778" if the device is a part of a R8A7778 SoC.
+       "renesas,i2c-r8a7779" if the device is a part of a R8A7779 SoC.
+       "renesas,i2c-r8a7790" if the device is a part of a R8A7790 SoC.
+       "renesas,i2c-r8a7791" if the device is a part of a R8A7791 SoC.
+       "renesas,i2c-r8a7792" if the device is a part of a R8A7792 SoC.
+       "renesas,i2c-r8a7793" if the device is a part of a R8A7793 SoC.
+       "renesas,i2c-r8a7794" if the device is a part of a R8A7794 SoC.
+       "renesas,i2c-r8a7795" if the device is a part of a R8A7795 SoC.
+       "renesas,i2c-r8a7796" if the device is a part of a R8A7796 SoC.
+       "renesas,i2c-r8a77965" if the device is a part of a R8A77965 SoC.
+       "renesas,i2c-r8a77970" if the device is a part of a R8A77970 SoC.
+       "renesas,i2c-r8a77980" if the device is a part of a R8A77980 SoC.
+       "renesas,i2c-r8a77990" if the device is a part of a R8A77990 SoC.
+       "renesas,i2c-r8a77995" if the device is a part of a R8A77995 SoC.
+       "renesas,rcar-gen1-i2c" for a generic R-Car Gen1 compatible device.
+       "renesas,rcar-gen2-i2c" for a generic R-Car Gen2 or RZ/G1 compatible
+                               device.
+       "renesas,rcar-gen3-i2c" for a generic R-Car Gen3 or RZ/G2 compatible
+                               device.
+       "renesas,i2c-rcar" (deprecated)
+
+       When compatible with the generic version, nodes must list the
+       SoC-specific version corresponding to the platform first followed
+       by the generic version.
+
+- reg: physical base address of the controller and length of memory mapped
+  region.
+- interrupts: interrupt specifier.
+
+Optional properties:
+- clock-frequency: desired I2C bus clock frequency in Hz. The absence of this
+  property indicates the default frequency 100 kHz.
+- clocks: clock specifier.
+- dmas: Must contain a list of two references to DMA specifiers, one for
+  transmission, and one for reception.
+- dma-names: Must contain a list of two DMA names, "tx" and "rx".
+
+- i2c-scl-falling-time-ns: see i2c.txt
+- i2c-scl-internal-delay-ns: see i2c.txt
+- i2c-scl-rising-time-ns: see i2c.txt
+
+Examples :
+
+i2c0: i2c@e6508000 {
+       #address-cells = <1>;
+       #size-cells = <0>;
+       compatible = "renesas,i2c-r8a7791", "renesas,rcar-gen2-i2c";
+       reg = <0 0xe6508000 0 0x40>;
+       interrupts = <0 287 IRQ_TYPE_LEVEL_HIGH>;
+       clocks = <&mstp9_clks R8A7791_CLK_I2C0>;
+       clock-frequency = <400000>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/renesas,iic-emev2.txt b/Documentation/devicetree/bindings/i2c/renesas,iic-emev2.txt
new file mode 100644 (file)
index 0000000..5ed1ea1
--- /dev/null
@@ -0,0 +1,22 @@
+Device tree configuration for Renesas EMEV2 IIC controller
+
+Required properties:
+- compatible      : "renesas,iic-emev2"
+- reg             : address start and address range size of device
+- interrupts      : specifier for the IIC controller interrupt
+- clocks          : phandle to the IP core SCLK
+- clock-names     : must be "sclk"
+- #address-cells  : should be <1>
+- #size-cells     : should be <0>
+
+Example:
+
+       iic0: i2c@e0070000 {
+               #address-cells = <1>;
+               #size-cells = <0>;
+               compatible = "renesas,iic-emev2";
+               reg = <0xe0070000 0x28>;
+               interrupts = <0 32 IRQ_TYPE_EDGE_RISING>;
+               clocks = <&iic0_sclk>;
+               clock-names = "sclk";
+       };
diff --git a/Documentation/devicetree/bindings/i2c/renesas,iic.txt b/Documentation/devicetree/bindings/i2c/renesas,iic.txt
new file mode 100644 (file)
index 0000000..202602e
--- /dev/null
@@ -0,0 +1,68 @@
+Device tree configuration for Renesas IIC (sh_mobile) driver
+
+Required properties:
+- compatible      :
+                       - "renesas,iic-r8a73a4" (R-Mobile APE6)
+                       - "renesas,iic-r8a7740" (R-Mobile A1)
+                       - "renesas,iic-r8a7743" (RZ/G1M)
+                       - "renesas,iic-r8a7744" (RZ/G1N)
+                       - "renesas,iic-r8a7745" (RZ/G1E)
+                       - "renesas,iic-r8a774a1" (RZ/G2M)
+                       - "renesas,iic-r8a774c0" (RZ/G2E)
+                       - "renesas,iic-r8a7790" (R-Car H2)
+                       - "renesas,iic-r8a7791" (R-Car M2-W)
+                       - "renesas,iic-r8a7792" (R-Car V2H)
+                       - "renesas,iic-r8a7793" (R-Car M2-N)
+                       - "renesas,iic-r8a7794" (R-Car E2)
+                       - "renesas,iic-r8a7795" (R-Car H3)
+                       - "renesas,iic-r8a7796" (R-Car M3-W)
+                       - "renesas,iic-r8a77965" (R-Car M3-N)
+                       - "renesas,iic-r8a77990" (R-Car E3)
+                       - "renesas,iic-sh73a0" (SH-Mobile AG5)
+                       - "renesas,rcar-gen2-iic" (generic R-Car Gen2 or RZ/G1
+                                                       compatible device)
+                       - "renesas,rcar-gen3-iic" (generic R-Car Gen3 or RZ/G2
+                                                       compatible device)
+                       - "renesas,rmobile-iic" (generic device)
+
+                       When compatible with a generic R-Car version, nodes
+                       must list the SoC-specific version corresponding to
+                       the platform first followed by the generic R-Car
+                       version.
+
+                       When compatible with "renesas,rmobile-iic" it should
+                       be the last compatibility string listed.
+
+                       The r8a77990 (R-Car E3) and r8a774c0 (RZ/G2E)
+                       controllers are not considered compatible with
+                       "renesas,rcar-gen3-iic" or "renesas,rmobile-iic"
+                       due to the absence of automatic transmission registers.
+
+- reg             : address start and address range size of device
+- interrupts      : interrupt of device
+- clocks          : clock for device
+- #address-cells  : should be <1>
+- #size-cells     : should be <0>
+
+Optional properties:
+- clock-frequency : frequency of bus clock in Hz. Default 100kHz if unset.
+- dmas            : Must contain a list of two references to DMA
+                   specifiers, one for transmission, and one for
+                   reception.
+- dma-names       : Must contain a list of two DMA names, "tx" and "rx".
+
+
+Pinctrl properties might be needed, too. See there.
+
+Example:
+
+       iic0: i2c@e6500000 {
+               compatible = "renesas,iic-r8a7790", "renesas,rcar-gen2-iic",
+                            "renesas,rmobile-iic";
+               reg = <0 0xe6500000 0 0x425>;
+               interrupts = <0 174 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&mstp3_clks R8A7790_CLK_IIC0>;
+               clock-frequency = <400000>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+       };
diff --git a/Documentation/devicetree/bindings/i2c/renesas,riic.txt b/Documentation/devicetree/bindings/i2c/renesas,riic.txt
new file mode 100644 (file)
index 0000000..e26fe3a
--- /dev/null
@@ -0,0 +1,32 @@
+Device tree configuration for Renesas RIIC driver
+
+Required properties:
+- compatible      :
+       "renesas,riic-r7s72100" if the device is a part of a R7S72100 SoC.
+       "renesas,riic-r7s9210" if the device is a part of a R7S9210 SoC.
+       "renesas,riic-rz" for a generic RZ/A compatible device.
+- reg             : address start and address range size of device
+- interrupts      : 8 interrupts (TEI, RI, TI, SPI, STI, NAKI, ALI, TMOI)
+- clock-frequency : frequency of bus clock in Hz
+- #address-cells  : should be <1>
+- #size-cells     : should be <0>
+
+Pinctrl properties might be needed, too. See there.
+
+Example:
+
+       i2c0: i2c@fcfee000 {
+               compatible = "renesas,riic-r7s72100", "renesas,riic-rz";
+               reg = <0xfcfee000 0x44>;
+               interrupts = <0 157 IRQ_TYPE_LEVEL_HIGH>,
+                            <0 158 IRQ_TYPE_EDGE_RISING>,
+                            <0 159 IRQ_TYPE_EDGE_RISING>,
+                            <0 160 IRQ_TYPE_LEVEL_HIGH>,
+                            <0 161 IRQ_TYPE_LEVEL_HIGH>,
+                            <0 162 IRQ_TYPE_LEVEL_HIGH>,
+                            <0 163 IRQ_TYPE_LEVEL_HIGH>,
+                            <0 164 IRQ_TYPE_LEVEL_HIGH>;
+               clock-frequency = <100000>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+       };
index 676ec42..567a33a 100644 (file)
@@ -43,13 +43,9 @@ properties:
 
   dvdd-supply:
     description: DVdd voltage supply
-    items:
-      - const: dvdd
 
   avdd-supply:
     description: AVdd voltage supply
-    items:
-      - const: avdd
 
   adi,rejection-60-Hz-enable:
     description: |
@@ -99,6 +95,9 @@ required:
 examples:
   - |
     spi0 {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
       adc@0 {
         compatible = "adi,ad7192";
         reg = <0>;
index f4c5d34..7079d44 100644 (file)
@@ -1,8 +1,11 @@
 * Advanced Interrupt Controller (AIC)
 
 Required properties:
-- compatible: Should be "atmel,<chip>-aic"
-  <chip> can be "at91rm9200", "sama5d2", "sama5d3" or "sama5d4"
+- compatible: Should be:
+    - "atmel,<chip>-aic" where  <chip> can be "at91rm9200", "sama5d2",
+      "sama5d3" or "sama5d4"
+    - "microchip,<chip>-aic" where <chip> can be "sam9x60"
+
 - interrupt-controller: Identifies the node as an interrupt controller.
 - #interrupt-cells: The number of cells to define the interrupts. It should be 3.
   The first cell is the IRQ number (aka "Peripheral IDentifier" on datasheet).
index 27f38ee..d3e423f 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
 %YAML 1.2
 ---
-$id: http://devicetree.org/schemas/arm/allwinner,sun4i-a10-csi.yaml#
+$id: http://devicetree.org/schemas/media/allwinner,sun4i-a10-csi.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: Allwinner A10 CMOS Sensor Interface (CSI) Device Tree Bindings
@@ -27,14 +27,12 @@ properties:
   clocks:
     items:
       - description: The CSI interface clock
-      - description: The CSI module clock
       - description: The CSI ISP clock
       - description: The CSI DRAM clock
 
   clock-names:
     items:
       - const: bus
-      - const: mod
       - const: isp
       - const: ram
 
@@ -89,9 +87,8 @@ examples:
         compatible = "allwinner,sun7i-a20-csi0";
         reg = <0x01c09000 0x1000>;
         interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
-        clocks = <&ccu CLK_AHB_CSI0>, <&ccu CLK_CSI0>,
-                 <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI0>;
-        clock-names = "bus", "mod", "isp", "ram";
+        clocks = <&ccu CLK_AHB_CSI0>, <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI0>;
+        clock-names = "bus", "isp", "ram";
         resets = <&ccu RST_CSI0>;
 
         port {
index 3d5c154..9054555 100644 (file)
@@ -73,7 +73,6 @@ properties:
           - rc-genius-tvgo-a11mce
           - rc-gotview7135
           - rc-hauppauge
-          - rc-hauppauge
           - rc-hisi-poplar
           - rc-hisi-tv-demo
           - rc-imon-mce
index 0ebd08a..a9b105a 100644 (file)
@@ -8,11 +8,12 @@ MT6397/MT6323 is a multifunction device with the following sub modules:
 - Clock
 - LED
 - Keys
+- Power controller
 
 It is interfaced to host controller using SPI interface by a proprietary hardware
 called PMIC wrapper or pwrap. MT6397/MT6323 MFD is a child device of pwrap.
 See the following for pwarp node definitions:
-Documentation/devicetree/bindings/soc/mediatek/pwrap.txt
+../soc/mediatek/pwrap.txt
 
 This document describes the binding for MFD device and its sub module.
 
@@ -22,14 +23,16 @@ compatible: "mediatek,mt6397" or "mediatek,mt6323"
 Optional subnodes:
 
 - rtc
-       Required properties:
+       Required properties: Should be one of follows
+               - compatible: "mediatek,mt6323-rtc"
                - compatible: "mediatek,mt6397-rtc"
+       For details, see ../rtc/rtc-mt6397.txt
 - regulators
        Required properties:
                - compatible: "mediatek,mt6397-regulator"
-       see Documentation/devicetree/bindings/regulator/mt6397-regulator.txt
+       see ../regulator/mt6397-regulator.txt
                - compatible: "mediatek,mt6323-regulator"
-       see Documentation/devicetree/bindings/regulator/mt6323-regulator.txt
+       see ../regulator/mt6323-regulator.txt
 - codec
        Required properties:
                - compatible: "mediatek,mt6397-codec"
@@ -39,12 +42,17 @@ Optional subnodes:
 - led
        Required properties:
                - compatible: "mediatek,mt6323-led"
-       see Documentation/devicetree/bindings/leds/leds-mt6323.txt
+       see ../leds/leds-mt6323.txt
 
 - keys
        Required properties:
                - compatible: "mediatek,mt6397-keys" or "mediatek,mt6323-keys"
-       see Documentation/devicetree/bindings/input/mtk-pmic-keys.txt
+       see ../input/mtk-pmic-keys.txt
+
+- power-controller
+       Required properties:
+               - compatible: "mediatek,mt6323-pwrc"
+       For details, see ../power/reset/mt6323-poweroff.txt
 
 Example:
        pwrap: pwrap@1000f000 {
index 65c2326..b74e5e9 100644 (file)
@@ -14,6 +14,10 @@ Required properties:
                "ricoh,rc5t619"
  - reg: the I2C slave address of the device
 
+Optional properties:
+ - system-power-controller:
+   See Documentation/devicetree/bindings/power/power-controller.txt
+
 Sub-nodes:
  - regulators: the node is required if the regulator functionality is
    needed. The valid regulator names are: DCDC1, DCDC2, DCDC3, DCDC4
@@ -28,6 +32,7 @@ Example:
        pmic@32 {
                compatible = "ricoh,rn5t618";
                reg = <0x32>;
+               system-power-controller;
 
                regulators {
                        DCDC1 {
index 69375cb..d95cc69 100644 (file)
@@ -36,12 +36,6 @@ properties:
     enum: [ 4, 8, 12, 16, 20, 24 ]
     default: 8
 
-  adi,disable-energy-detect:
-    description: |
-      Disables Energy Detect Powerdown Mode (default disabled, i.e energy detect
-      is enabled if this property is unspecified)
-    type: boolean
-
 examples:
   - |
     ethernet {
@@ -68,6 +62,5 @@ examples:
             reg = <1>;
 
             adi,fifo-depth-bits = <16>;
-            adi,disable-energy-detect;
         };
     };
index 5100358..b921731 100644 (file)
@@ -12,8 +12,36 @@ and therefore may overwrite them.
 KSZ9021:
 
   All skew control options are specified in picoseconds. The minimum
-  value is 0, the maximum value is 3000, and it is incremented by 200ps
-  steps.
+  value is 0, the maximum value is 3000, and it can be specified in 200ps
+  steps, *but* these values are in not fact what you get because this chip's
+  skew values actually increase in 120ps steps, starting from -840ps. The
+  incorrect values came from an error in the original KSZ9021 datasheet
+  before it was corrected in revision 1.2 (Feb 2014), but it is too late to
+  change the driver now because of the many existing device trees that have
+  been created using values that go up in increments of 200.
+
+  The following table shows the actual skew delay you will get for each of the
+  possible devicetree values, and the number that will be programmed into the
+  corresponding pad skew register:
+
+  Device Tree Value    Delay   Pad Skew Register Value
+  -----------------------------------------------------
+       0               -840ps          0000
+       200             -720ps          0001
+       400             -600ps          0010
+       600             -480ps          0011
+       800             -360ps          0100
+       1000            -240ps          0101
+       1200            -120ps          0110
+       1400               0ps          0111
+       1600             120ps          1000
+       1800             240ps          1001
+       2000             360ps          1010
+       2200             480ps          1011
+       2400             600ps          1100
+       2600             720ps          1101
+       2800             840ps          1110
+       3000             960ps          1111
 
   Optional properties:
 
index 7ad3621..5df4aa7 100644 (file)
@@ -18,6 +18,7 @@ Required properties:
                R-Car Gen2 and RZ/G1 devices.
 
       - "renesas,etheravb-r8a774a1" for the R8A774A1 SoC.
+      - "renesas,etheravb-r8a774b1" for the R8A774B1 SoC.
       - "renesas,etheravb-r8a774c0" for the R8A774C0 SoC.
       - "renesas,etheravb-r8a7795" for the R8A7795 SoC.
       - "renesas,etheravb-r8a7796" for the R8A7796 SoC.
index ebe4537..4845e29 100644 (file)
@@ -113,7 +113,7 @@ properties:
     const: stmmaceth
 
   mac-mode:
-    maxItems: 1
+    $ref: ethernet-controller.yaml#/properties/phy-connection-type
     description:
       The property is identical to 'phy-mode', and assumes that there is mode
       converter in-between the MAC & PHY (e.g. GMII-to-RGMII). This converter
index 5561a1c..78494c4 100644 (file)
@@ -11,7 +11,6 @@ Required properties:
             the ATU address space.
     (The old way of getting the configuration address space from "ranges"
     is deprecated and should be avoided.)
-- num-lanes: number of lanes to use
 RC mode:
 - #address-cells: set to <3>
 - #size-cells: set to <2>
@@ -34,6 +33,11 @@ Optional properties:
 - clock-names: Must include the following entries:
        - "pcie"
        - "pcie_bus"
+- snps,enable-cdm-check: This is a boolean property and if present enables
+   automatic checking of CDM (Configuration Dependent Module) registers
+   for data corruption. CDM registers include standard PCIe configuration
+   space registers, Port Logic registers, DMA and iATU (internal Address
+   Translation Unit) registers.
 RC mode:
 - num-viewport: number of view ports configured in hardware. If a platform
   does not specify it, the driver assumes 2.
index a7f5f5a..de4b2ba 100644 (file)
@@ -50,7 +50,7 @@ Additional required properties for imx7d-pcie and imx8mq-pcie:
 - power-domains: Must be set to a phandle pointing to PCIE_PHY power domain
 - resets: Must contain phandles to PCIe-related reset lines exposed by SRC
   IP block
-- reset-names: Must contain the following entires:
+- reset-names: Must contain the following entries:
               - "pciephy"
               - "apps"
               - "turnoff"
index 92437a3..7468d66 100644 (file)
@@ -6,6 +6,7 @@ Required properties:
        "mediatek,mt2712-pcie"
        "mediatek,mt7622-pcie"
        "mediatek,mt7623-pcie"
+       "mediatek,mt7629-pcie"
 - device_type: Must be "pci"
 - reg: Base addresses and lengths of the PCIe subsys and root ports.
 - reg-names: Names of the above areas to use during resource lookup.
diff --git a/Documentation/devicetree/bindings/pci/nvidia,tegra194-pcie.txt b/Documentation/devicetree/bindings/pci/nvidia,tegra194-pcie.txt
new file mode 100644 (file)
index 0000000..b739f92
--- /dev/null
@@ -0,0 +1,171 @@
+NVIDIA Tegra PCIe controller (Synopsys DesignWare Core based)
+
+This PCIe host controller is based on the Synopsis Designware PCIe IP
+and thus inherits all the common properties defined in designware-pcie.txt.
+
+Required properties:
+- compatible: For Tegra19x, must contain "nvidia,tegra194-pcie".
+- device_type: Must be "pci"
+- power-domains: A phandle to the node that controls power to the respective
+  PCIe controller and a specifier name for the PCIe controller. Following are
+  the specifiers for the different PCIe controllers
+    TEGRA194_POWER_DOMAIN_PCIEX8B: C0
+    TEGRA194_POWER_DOMAIN_PCIEX1A: C1
+    TEGRA194_POWER_DOMAIN_PCIEX1A: C2
+    TEGRA194_POWER_DOMAIN_PCIEX1A: C3
+    TEGRA194_POWER_DOMAIN_PCIEX4A: C4
+    TEGRA194_POWER_DOMAIN_PCIEX8A: C5
+  these specifiers are defined in
+  "include/dt-bindings/power/tegra194-powergate.h" file.
+- reg: A list of physical base address and length pairs for each set of
+  controller registers. Must contain an entry for each entry in the reg-names
+  property.
+- reg-names: Must include the following entries:
+  "appl": Controller's application logic registers
+  "config": As per the definition in designware-pcie.txt
+  "atu_dma": iATU and DMA registers. This is where the iATU (internal Address
+             Translation Unit) registers of the PCIe core are made available
+             for SW access.
+  "dbi": The aperture where root port's own configuration registers are
+         available
+- interrupts: A list of interrupt outputs of the controller. Must contain an
+  entry for each entry in the interrupt-names property.
+- interrupt-names: Must include the following entries:
+  "intr": The Tegra interrupt that is asserted for controller interrupts
+  "msi": The Tegra interrupt that is asserted when an MSI is received
+- bus-range: Range of bus numbers associated with this controller
+- #address-cells: Address representation for root ports (must be 3)
+  - cell 0 specifies the bus and device numbers of the root port:
+    [23:16]: bus number
+    [15:11]: device number
+  - cell 1 denotes the upper 32 address bits and should be 0
+  - cell 2 contains the lower 32 address bits and is used to translate to the
+    CPU address space
+- #size-cells: Size representation for root ports (must be 2)
+- ranges: Describes the translation of addresses for root ports and standard
+  PCI regions. The entries must be 7 cells each, where the first three cells
+  correspond to the address as described for the #address-cells property
+  above, the fourth and fifth cells are for the physical CPU address to
+  translate to and the sixth and seventh cells are as described for the
+  #size-cells property above.
+  - Entries setup the mapping for the standard I/O, memory and
+    prefetchable PCI regions. The first cell determines the type of region
+    that is setup:
+    - 0x81000000: I/O memory region
+    - 0x82000000: non-prefetchable memory region
+    - 0xc2000000: prefetchable memory region
+  Please refer to the standard PCI bus binding document for a more detailed
+  explanation.
+- #interrupt-cells: Size representation for interrupts (must be 1)
+- interrupt-map-mask and interrupt-map: Standard PCI IRQ mapping properties
+  Please refer to the standard PCI bus binding document for a more detailed
+  explanation.
+- clocks: Must contain an entry for each entry in clock-names.
+  See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+  - core
+- resets: Must contain an entry for each entry in reset-names.
+  See ../reset/reset.txt for details.
+- reset-names: Must include the following entries:
+  - apb
+  - core
+- phys: Must contain a phandle to P2U PHY for each entry in phy-names.
+- phy-names: Must include an entry for each active lane.
+  "p2u-N": where N ranges from 0 to one less than the total number of lanes
+- nvidia,bpmp: Must contain a pair of phandle to BPMP controller node followed
+  by controller-id. Following are the controller ids for each controller.
+    0: C0
+    1: C1
+    2: C2
+    3: C3
+    4: C4
+    5: C5
+- vddio-pex-ctl-supply: Regulator supply for PCIe side band signals
+
+Optional properties:
+- pinctrl-names: A list of pinctrl state names.
+  It is mandatory for C5 controller and optional for other controllers.
+  - "default": Configures PCIe I/O for proper operation.
+- pinctrl-0: phandle for the 'default' state of pin configuration.
+  It is mandatory for C5 controller and optional for other controllers.
+- supports-clkreq: Refer to Documentation/devicetree/bindings/pci/pci.txt
+- nvidia,update-fc-fixup: This is a boolean property and needs to be present to
+    improve performance when a platform is designed in such a way that it
+    satisfies at least one of the following conditions thereby enabling root
+    port to exchange optimum number of FC (Flow Control) credits with
+    downstream devices
+    1. If C0/C4/C5 run at x1/x2 link widths (irrespective of speed and MPS)
+    2. If C0/C1/C2/C3/C4/C5 operate at their respective max link widths and
+       a) speed is Gen-2 and MPS is 256B
+       b) speed is >= Gen-3 with any MPS
+- nvidia,aspm-cmrt-us: Common Mode Restore Time for proper operation of ASPM
+   to be specified in microseconds
+- nvidia,aspm-pwr-on-t-us: Power On time for proper operation of ASPM to be
+   specified in microseconds
+- nvidia,aspm-l0s-entrance-latency-us: ASPM L0s entrance latency to be
+   specified in microseconds
+- vpcie3v3-supply: A phandle to the regulator node that supplies 3.3V to the slot
+  if the platform has one such slot. (Ex:- x16 slot owned by C5 controller
+  in p2972-0000 platform).
+- vpcie12v-supply: A phandle to the regulator node that supplies 12V to the slot
+  if the platform has one such slot. (Ex:- x16 slot owned by C5 controller
+  in p2972-0000 platform).
+
+Examples:
+=========
+
+Tegra194:
+--------
+
+       pcie@14180000 {
+               compatible = "nvidia,tegra194-pcie", "snps,dw-pcie";
+               power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX8B>;
+               reg = <0x00 0x14180000 0x0 0x00020000   /* appl registers (128K)      */
+                      0x00 0x38000000 0x0 0x00040000   /* configuration space (256K) */
+                      0x00 0x38040000 0x0 0x00040000>; /* iATU_DMA reg space (256K)  */
+               reg-names = "appl", "config", "atu_dma";
+
+               #address-cells = <3>;
+               #size-cells = <2>;
+               device_type = "pci";
+               num-lanes = <8>;
+               linux,pci-domain = <0>;
+
+               pinctrl-names = "default";
+               pinctrl-0 = <&pex_rst_c5_out_state>, <&clkreq_c5_bi_dir_state>;
+
+               clocks = <&bpmp TEGRA194_CLK_PEX0_CORE_0>;
+               clock-names = "core";
+
+               resets = <&bpmp TEGRA194_RESET_PEX0_CORE_0_APB>,
+                        <&bpmp TEGRA194_RESET_PEX0_CORE_0>;
+               reset-names = "apb", "core";
+
+               interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>,  /* controller interrupt */
+                            <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;  /* MSI interrupt */
+               interrupt-names = "intr", "msi";
+
+               #interrupt-cells = <1>;
+               interrupt-map-mask = <0 0 0 0>;
+               interrupt-map = <0 0 0 0 &gic GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
+
+               nvidia,bpmp = <&bpmp 0>;
+
+               supports-clkreq;
+               nvidia,aspm-cmrt-us = <60>;
+               nvidia,aspm-pwr-on-t-us = <20>;
+               nvidia,aspm-l0s-entrance-latency-us = <3>;
+
+               bus-range = <0x0 0xff>;
+               ranges = <0x81000000 0x0  0x38100000 0x0  0x38100000 0x0 0x00100000    /* downstream I/O (1MB) */
+                         0x82000000 0x0  0x38200000 0x0  0x38200000 0x0 0x01E00000    /* non-prefetchable memory (30MB) */
+                         0xc2000000 0x18 0x00000000 0x18 0x00000000 0x4 0x00000000>;  /* prefetchable memory (16GB) */
+
+               vddio-pex-ctl-supply = <&vdd_1v8ao>;
+               vpcie3v3-supply = <&vdd_3v3_pcie>;
+               vpcie12v-supply = <&vdd_12v_pcie>;
+
+               phys = <&p2u_hsio_2>, <&p2u_hsio_3>, <&p2u_hsio_4>,
+                      <&p2u_hsio_5>;
+               phy-names = "p2u-0", "p2u-1", "p2u-2", "p2u-3";
+       };
index 8324a4e..7a813d0 100644 (file)
@@ -11,7 +11,7 @@ Required properties:
 - reg-names:
    - "ctrl" for the control register region
    - "config" for the config space region
-- interrupts: Interrupt specifier for the PCIe controler
+- interrupts: Interrupt specifier for the PCIe controller
 - clocks: reference to the PCIe controller clocks
 - clock-names: mandatory if there is a second clock, in this case the
    name must be "core" for the first clock and "reg" for the second
index 2a5d910..29bcbd8 100644 (file)
@@ -27,6 +27,11 @@ driver implementation may support the following properties:
 - reset-gpios:
    If present this property specifies PERST# GPIO. Host drivers can parse the
    GPIO and apply fundamental reset to endpoints.
+- supports-clkreq:
+   If present this property specifies that CLKREQ signal routing exists from
+   root port to downstream device and host bridge drivers can do programming
+   which depends on CLKREQ signal existence. For example, programming root port
+   not to advertise ASPM L1 Sub-States support if there is no CLKREQ signal.
 
 PCI-PCI Bridge properties
 -------------------------
diff --git a/Documentation/devicetree/bindings/pci/pcie-al.txt b/Documentation/devicetree/bindings/pci/pcie-al.txt
new file mode 100644 (file)
index 0000000..557a508
--- /dev/null
@@ -0,0 +1,46 @@
+* Amazon Annapurna Labs PCIe host bridge
+
+Amazon's Annapurna Labs PCIe Host Controller is based on the Synopsys DesignWare
+PCI core. It inherits common properties defined in
+Documentation/devicetree/bindings/pci/designware-pcie.txt.
+
+Properties of the host controller node that differ from it are:
+
+- compatible:
+       Usage: required
+       Value type: <stringlist>
+       Definition: Value should contain
+                       - "amazon,al-alpine-v2-pcie" for alpine_v2
+                       - "amazon,al-alpine-v3-pcie" for alpine_v3
+
+- reg:
+       Usage: required
+       Value type: <prop-encoded-array>
+       Definition: Register ranges as listed in the reg-names property
+
+- reg-names:
+       Usage: required
+       Value type: <stringlist>
+       Definition: Must include the following entries
+                       - "config"      PCIe ECAM space
+                       - "controller"  AL proprietary registers
+                       - "dbi"         Designware PCIe registers
+
+Example:
+
+       pcie-external0: pcie@fb600000 {
+               compatible = "amazon,al-alpine-v3-pcie";
+               reg = <0x0 0xfb600000 0x0 0x00100000
+                      0x0 0xfd800000 0x0 0x00010000
+                      0x0 0xfd810000 0x0 0x00001000>;
+               reg-names = "config", "controller", "dbi";
+               bus-range = <0 255>;
+               device_type = "pci";
+               #address-cells = <3>;
+               #size-cells = <2>;
+               #interrupt-cells = <1>;
+               interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-map-mask = <0x00 0 0 7>;
+               interrupt-map = <0x0000 0 0 1 &gic GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>; /* INTa */
+               ranges = <0x02000000 0x0 0xc0010000 0x0 0xc0010000 0x0 0x07ff0000>;
+       };
index 8a56a85..a974821 100644 (file)
@@ -37,7 +37,7 @@ properties:
       - description: exclusive PHY reset line
       - description: shared reset line between the PCIe PHY and PCIe controller
 
-  resets-names:
+  reset-names:
     items:
       - const: phy
       - const: pcie
diff --git a/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.txt b/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.txt
new file mode 100644 (file)
index 0000000..d23ff90
--- /dev/null
@@ -0,0 +1,28 @@
+NVIDIA Tegra194 P2U binding
+
+Tegra194 has two PHY bricks namely HSIO (High Speed IO) and NVHS (NVIDIA High
+Speed) each interfacing with 12 and 8 P2U instances respectively.
+A P2U instance is a glue logic between Synopsys DesignWare Core PCIe IP's PIPE
+interface and PHY of HSIO/NVHS bricks. Each P2U instance represents one PCIe
+lane.
+
+Required properties:
+- compatible: For Tegra19x, must contain "nvidia,tegra194-p2u".
+- reg: Should be the physical address space and length of respective each P2U
+       instance.
+- reg-names: Must include the entry "ctl".
+
+Required properties for PHY port node:
+- #phy-cells: Defined by generic PHY bindings.  Must be 0.
+
+Refer to phy/phy-bindings.txt for the generic PHY binding properties.
+
+Example:
+
+p2u_hsio_0: phy@3e10000 {
+       compatible = "nvidia,tegra194-p2u";
+       reg = <0x03e10000 0x10000>;
+       reg-names = "ctl";
+
+       #phy-cells = <0>;
+};
index f83d888..064b7df 100644 (file)
@@ -33,13 +33,13 @@ patternProperties:
           allOf:
             - $ref: "/schemas/types.yaml#/definitions/string"
             - enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15,
-              ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, ESPI,
-              ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1,
-              GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, GPIU2,
-              GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, I2C1, I2C10, I2C11, I2C12,
-              I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, I2C6, I2C7,
-              I2C8, I2C9, I3C3, I3C4, I3C5, I3C6, JTAGM, LHPD, LHSIRQ, LPC,
-              LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ, MACLINK1, MACLINK2,
+              ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMC,
+              ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0,
+              GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1,
+              GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, I2C1, I2C10, I2C11,
+              I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, I2C6,
+              I2C7, I2C8, I2C9, I3C3, I3C4, I3C5, I3C6, JTAGM, LHPD, LHSIRQ,
+              LPC, LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ, MACLINK1, MACLINK2,
               MACLINK3, MACLINK4, MDIO1, MDIO2, MDIO3, MDIO4, NCTS1, NCTS2,
               NCTS3, NCTS4, NDCD1, NDCD2, NDCD3, NDCD4, NDSR1, NDSR2, NDSR3,
               NDSR4, NDTR1, NDTR2, NDTR3, NDTR4, NRI1, NRI2, NRI3, NRI4, NRTS1,
@@ -48,47 +48,45 @@ patternProperties:
               PWM8, PWM9, RGMII1, RGMII2, RGMII3, RGMII4, RMII1, RMII2, RMII3,
               RMII4, RXD1, RXD2, RXD3, RXD4, SALT1, SALT10, SALT11, SALT12,
               SALT13, SALT14, SALT15, SALT16, SALT2, SALT3, SALT4, SALT5,
-              SALT6, SALT7, SALT8, SALT9, SD1, SD2, SD3, SD3DAT4, SD3DAT5,
-              SD3DAT6, SD3DAT7, SGPM1, SGPS1, SIOONCTRL, SIOPBI, SIOPBO,
-              SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1, SPI1ABR, SPI1CS1,
-              SPI1WP, SPI2, SPI2CS1, SPI2CS2, TACH0, TACH1, TACH10, TACH11,
-              TACH12, TACH13, TACH14, TACH15, TACH2, TACH3, TACH4, TACH5,
-              TACH6, TACH7, TACH8, TACH9, THRU0, THRU1, THRU2, THRU3, TXD1,
-              TXD2, TXD3, TXD4, UART10, UART11, UART12, UART13, UART6, UART7,
-              UART8, UART9, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3,
-              WDTRST4, ]
+              SALT6, SALT7, SALT8, SALT9, SD1, SD2, SGPM1, SGPS1, SIOONCTRL,
+              SIOPBI, SIOPBO, SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1,
+              SPI1ABR, SPI1CS1, SPI1WP, SPI2, SPI2CS1, SPI2CS2, TACH0, TACH1,
+              TACH10, TACH11, TACH12, TACH13, TACH14, TACH15, TACH2, TACH3,
+              TACH4, TACH5, TACH6, TACH7, TACH8, TACH9, THRU0, THRU1, THRU2,
+              THRU3, TXD1, TXD2, TXD3, TXD4, UART10, UART11, UART12, UART13,
+              UART6, UART7, UART8, UART9, VB, VGAHS, VGAVS, WDTRST1, WDTRST2,
+              WDTRST3, WDTRST4, ]
         groups:
           allOf:
             - $ref: "/schemas/types.yaml#/definitions/string"
             - enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15,
-              ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, ESPI,
-              ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWQSPID, FWSPIWP, GPIT0,
-              GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1,
-              GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, HVI3C3, HVI3C4, I2C1,
-              I2C10, I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3,
-              I2C4, I2C5, I2C6, I2C7, I2C8, I2C9, I3C3, I3C4, I3C5, I3C6,
-              JTAGM, LHPD, LHSIRQ, LPC, LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ,
-              MACLINK1, MACLINK2, MACLINK3, MACLINK4, MDIO1, MDIO2, MDIO3,
-              MDIO4, NCTS1, NCTS2, NCTS3, NCTS4, NDCD1, NDCD2, NDCD3, NDCD4,
-              NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2, NDTR3, NDTR4, NRI1,
-              NRI2, NRI3, NRI4, NRTS1, NRTS2, NRTS3, NRTS4, OSCCLK, PEWAKE,
-              PWM0, PWM1, PWM10G0, PWM10G1, PWM11G0, PWM11G1, PWM12G0, PWM12G1,
-              PWM13G0, PWM13G1, PWM14G0, PWM14G1, PWM15G0, PWM15G1, PWM2, PWM3,
-              PWM4, PWM5, PWM6, PWM7, PWM8G0, PWM8G1, PWM9G0, PWM9G1, QSPI1,
-              QSPI2, RGMII1, RGMII2, RGMII3, RGMII4, RMII1, RMII2, RMII3,
-              RMII4, RXD1, RXD2, RXD3, RXD4, SALT1, SALT10G0, SALT10G1,
-              SALT11G0, SALT11G1, SALT12G0, SALT12G1, SALT13G0, SALT13G1,
-              SALT14G0, SALT14G1, SALT15G0, SALT15G1, SALT16G0, SALT16G1,
-              SALT2, SALT3, SALT4, SALT5, SALT6, SALT7, SALT8, SALT9G0,
-              SALT9G1, SD1, SD2, SD3, SD3DAT4, SD3DAT5, SD3DAT6, SD3DAT7,
-              SGPM1, SGPS1, SIOONCTRL, SIOPBI, SIOPBO, SIOPWREQ, SIOPWRGD,
-              SIOS3, SIOS5, SIOSCI, SPI1, SPI1ABR, SPI1CS1, SPI1WP, SPI2,
-              SPI2CS1, SPI2CS2, TACH0, TACH1, TACH10, TACH11, TACH12, TACH13,
-              TACH14, TACH15, TACH2, TACH3, TACH4, TACH5, TACH6, TACH7, TACH8,
-              TACH9, THRU0, THRU1, THRU2, THRU3, TXD1, TXD2, TXD3, TXD4,
-              UART10, UART11, UART12G0, UART12G1, UART13G0, UART13G1, UART6,
-              UART7, UART8, UART9, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3,
-              WDTRST4, ]
+              ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMCG1,
+              EMMCG4, EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID,
+              FWQSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5,
+              GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, GPIU3, GPIU4, GPIU5, GPIU6,
+              GPIU7, HVI3C3, HVI3C4, I2C1, I2C10, I2C11, I2C12, I2C13, I2C14,
+              I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, I2C6, I2C7, I2C8, I2C9,
+              I3C3, I3C4, I3C5, I3C6, JTAGM, LHPD, LHSIRQ, LPC, LPCHC, LPCPD,
+              LPCPME, LPCSMI, LSIRQ, MACLINK1, MACLINK2, MACLINK3, MACLINK4,
+              MDIO1, MDIO2, MDIO3, MDIO4, NCTS1, NCTS2, NCTS3, NCTS4, NDCD1,
+              NDCD2, NDCD3, NDCD4, NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2,
+              NDTR3, NDTR4, NRI1, NRI2, NRI3, NRI4, NRTS1, NRTS2, NRTS3, NRTS4,
+              OSCCLK, PEWAKE, PWM0, PWM1, PWM10G0, PWM10G1, PWM11G0, PWM11G1,
+              PWM12G0, PWM12G1, PWM13G0, PWM13G1, PWM14G0, PWM14G1, PWM15G0,
+              PWM15G1, PWM2, PWM3, PWM4, PWM5, PWM6, PWM7, PWM8G0, PWM8G1,
+              PWM9G0, PWM9G1, QSPI1, QSPI2, RGMII1, RGMII2, RGMII3, RGMII4,
+              RMII1, RMII2, RMII3, RMII4, RXD1, RXD2, RXD3, RXD4, SALT1,
+              SALT10G0, SALT10G1, SALT11G0, SALT11G1, SALT12G0, SALT12G1,
+              SALT13G0, SALT13G1, SALT14G0, SALT14G1, SALT15G0, SALT15G1,
+              SALT16G0, SALT16G1, SALT2, SALT3, SALT4, SALT5, SALT6, SALT7,
+              SALT8, SALT9G0, SALT9G1, SD1, SD2, SD3, SGPM1, SGPS1, SIOONCTRL,
+              SIOPBI, SIOPBO, SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1,
+              SPI1ABR, SPI1CS1, SPI1WP, SPI2, SPI2CS1, SPI2CS2, TACH0, TACH1,
+              TACH10, TACH11, TACH12, TACH13, TACH14, TACH15, TACH2, TACH3,
+              TACH4, TACH5, TACH6, TACH7, TACH8, TACH9, THRU0, THRU1, THRU2,
+              THRU3, TXD1, TXD2, TXD3, TXD4, UART10, UART11, UART12G0,
+              UART12G1, UART13G0, UART13G1, UART6, UART7, UART8, UART9, VB,
+              VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3, WDTRST4, ]
 
 required:
   - compatible
diff --git a/Documentation/devicetree/bindings/power/reset/mt6323-poweroff.txt b/Documentation/devicetree/bindings/power/reset/mt6323-poweroff.txt
new file mode 100644 (file)
index 0000000..933f0c4
--- /dev/null
@@ -0,0 +1,20 @@
+Device Tree Bindings for Power Controller on MediaTek PMIC
+
+The power controller which could be found on PMIC is responsible for externally
+powering off or on the remote MediaTek SoC through the circuit BBPU.
+
+Required properties:
+- compatible: Should be one of follows
+       "mediatek,mt6323-pwrc": for MT6323 PMIC
+
+Example:
+
+       pmic {
+               compatible = "mediatek,mt6323";
+
+               ...
+
+               power-controller {
+                       compatible = "mediatek,mt6323-pwrc";
+               };
+       }
index 991728c..c850153 100644 (file)
@@ -6,6 +6,8 @@ Required properties:
    - "mediatek,mt7622-pwm": found on mt7622 SoC.
    - "mediatek,mt7623-pwm": found on mt7623 SoC.
    - "mediatek,mt7628-pwm": found on mt7628 SoC.
+   - "mediatek,mt7629-pwm", "mediatek,mt7622-pwm": found on mt7629 SoC.
+   - "mediatek,mt8516-pwm": found on mt8516 SoC.
  - reg: physical base address and length of the controller's registers.
  - #pwm-cells: must be 2. See pwm.txt in this directory for a description of
    the cell format.
diff --git a/Documentation/devicetree/bindings/pwm/pwm-sprd.txt b/Documentation/devicetree/bindings/pwm/pwm-sprd.txt
new file mode 100644 (file)
index 0000000..16fa5a0
--- /dev/null
@@ -0,0 +1,40 @@
+Spreadtrum PWM controller
+
+Spreadtrum SoCs PWM controller provides 4 PWM channels.
+
+Required properties:
+- compatible : Should be "sprd,ums512-pwm".
+- reg: Physical base address and length of the controller's registers.
+- clocks: The phandle and specifier referencing the controller's clocks.
+- clock-names: Should contain following entries:
+  "pwmn": used to derive the functional clock for PWM channel n (n range: 0 ~ 3).
+  "enablen": for PWM channel n enable clock (n range: 0 ~ 3).
+- #pwm-cells: Should be 2. See pwm.txt in this directory for a description of
+  the cells format.
+
+Optional properties:
+- assigned-clocks: Reference to the PWM clock entries.
+- assigned-clock-parents: The phandle of the parent clock of PWM clock.
+
+Example:
+       pwms: pwm@32260000 {
+               compatible = "sprd,ums512-pwm";
+               reg = <0 0x32260000 0 0x10000>;
+               clock-names = "pwm0", "enable0",
+                       "pwm1", "enable1",
+                       "pwm2", "enable2",
+                       "pwm3", "enable3";
+               clocks = <&aon_clk CLK_PWM0>, <&aonapb_gate CLK_PWM0_EB>,
+                      <&aon_clk CLK_PWM1>, <&aonapb_gate CLK_PWM1_EB>,
+                      <&aon_clk CLK_PWM2>, <&aonapb_gate CLK_PWM2_EB>,
+                      <&aon_clk CLK_PWM3>, <&aonapb_gate CLK_PWM3_EB>;
+               assigned-clocks = <&aon_clk CLK_PWM0>,
+                       <&aon_clk CLK_PWM1>,
+                       <&aon_clk CLK_PWM2>,
+                       <&aon_clk CLK_PWM3>;
+               assigned-clock-parents = <&ext_26m>,
+                       <&ext_26m>,
+                       <&ext_26m>,
+                       <&ext_26m>;
+               #pwm-cells = <2>;
+       };
index a78150c..f324169 100644 (file)
@@ -30,8 +30,8 @@ if:
 properties:
   compatible:
     enum:
-      - const: regulator-fixed
-      - const: regulator-fixed-clock
+      - regulator-fixed
+      - regulator-fixed-clock
 
   regulator-name: true
 
index b261a30..04819ad 100644 (file)
@@ -24,15 +24,17 @@ description: |
 
 properties:
   compatible:
-    items:
-      - enum:
-          - sifive,rocket0
-          - sifive,e5
-          - sifive,e51
-          - sifive,u54-mc
-          - sifive,u54
-          - sifive,u5
-      - const: riscv
+    oneOf:
+      - items:
+          - enum:
+              - sifive,rocket0
+              - sifive,e5
+              - sifive,e51
+              - sifive,u54-mc
+              - sifive,u54
+              - sifive,u5
+          - const: riscv
+      - const: riscv    # Simulator only
     description:
       Identifies that the hart uses the RISC-V instruction set
       and identifies the type of the hart.
@@ -66,12 +68,8 @@ properties:
       insensitive, letters in the riscv,isa string must be all
       lowercase to simplify parsing.
 
-  timebase-frequency:
-    type: integer
-    minimum: 1
-    description:
-      Specifies the clock frequency of the system timer in Hz.
-      This value is common to all harts on a single system image.
+  # RISC-V requires 'timebase-frequency' in /cpus, so disallow it here
+  timebase-frequency: false
 
   interrupt-controller:
     type: object
@@ -93,7 +91,6 @@ properties:
 
 required:
   - riscv,isa
-  - timebase-frequency
   - interrupt-controller
 
 examples:
index dd63151..b143d9a 100644 (file)
@@ -26,6 +26,8 @@ Required properties:
     - "renesas,hscif-r8a77470" for R8A77470 (RZ/G1C) HSCIF compatible UART.
     - "renesas,scif-r8a774a1" for R8A774A1 (RZ/G2M) SCIF compatible UART.
     - "renesas,hscif-r8a774a1" for R8A774A1 (RZ/G2M) HSCIF compatible UART.
+    - "renesas,scif-r8a774b1" for R8A774B1 (RZ/G2N) SCIF compatible UART.
+    - "renesas,hscif-r8a774b1" for R8A774B1 (RZ/G2N) HSCIF compatible UART.
     - "renesas,scif-r8a774c0" for R8A774C0 (RZ/G2E) SCIF compatible UART.
     - "renesas,hscif-r8a774c0" for R8A774C0 (RZ/G2E) HSCIF compatible UART.
     - "renesas,scif-r8a7778" for R8A7778 (R-Car M1) SCIF compatible UART.
index 04cbb90..28f2cba 100644 (file)
@@ -23,6 +23,7 @@ Required properties:
 Optional property:
 - little-endian : If present, the TMU registers are little endian. If absent,
        the default is big endian.
+- clocks : the clock for clocking the TMU silicon.
 
 Example:
 
index b9f04e6..6ffb09b 100644 (file)
@@ -85,8 +85,8 @@ A child node must exist to represent the core DWC2 IP block. The name of
 the node is not important. The content of the node is defined in dwc2.txt.
 
 PHY documentation is provided in the following places:
-- Documentation/devicetree/bindings/phy/meson-g12a-usb2-phy.txt
-- Documentation/devicetree/bindings/phy/meson-g12a-usb3-pcie-phy.txt
+- Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml
+- Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml
 
 Example device nodes:
        usb: usb@ffe09000 {
index 059f6ef..1ca64c8 100644 (file)
@@ -63,7 +63,11 @@ properties:
     description:
       Set this flag to force EHCI reset after resume.
 
-  phys: true
+  phys:
+    description: PHY specifier for the USB PHY
+
+  phy-names:
+    const: usb
 
 required:
   - compatible
@@ -89,6 +93,7 @@ examples:
         interrupts = <39>;
         clocks = <&ahb_gates 1>;
         phys = <&usbphy 1>;
+        phy-names = "usb";
     };
 
 ...
index da5a14b..bcffec1 100644 (file)
@@ -67,7 +67,11 @@ properties:
     description:
       Overrides the detected port count
 
-  phys: true
+  phys:
+    description: PHY specifier for the USB PHY
+
+  phy-names:
+    const: usb
 
 required:
   - compatible
@@ -84,6 +88,7 @@ examples:
           interrupts = <64>;
           clocks = <&usb_clk 6>, <&ahb_gates 2>;
           phys = <&usbphy 1>;
+          phy-names = "usb";
       };
 
 ...
index f3e4ace..42d8814 100644 (file)
@@ -33,7 +33,7 @@ Required properties:
        "dma_ck": dma_bus clock for data transfer by DMA,
        "xhci_ck": controller clock
 
- - phys : see usb-hcd.txt in the current directory
+ - phys : see usb-hcd.yaml in the current directory
 
 Optional properties:
  - wakeup-source : enable USB remote wakeup;
@@ -53,7 +53,7 @@ Optional properties:
        See: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
  - imod-interval-ns: default interrupt moderation interval is 5000ns
 
-additionally the properties from usb-hcd.txt (in the current directory) are
+additionally the properties from usb-hcd.yaml (in the current directory) are
 supported.
 
 Example:
index b9af7f5..e0ae609 100644 (file)
@@ -17,7 +17,7 @@ Required properties:
  - clock-names : must contain "sys_ck" for clock of controller,
        the following clocks are optional:
        "ref_ck", "mcu_ck" and "dma_ck";
- - phys : see usb-hcd.txt in the current directory
+ - phys : see usb-hcd.yaml in the current directory
  - dr_mode : should be one of "host", "peripheral" or "otg",
        refer to usb/generic.txt
 
@@ -60,7 +60,7 @@ Optional properties:
  - mediatek,u3p-dis-msk : mask to disable u3ports, bit0 for u3port0,
        bit1 for u3port1, ... etc;
 
-additionally the properties from usb-hcd.txt (in the current directory) are
+additionally the properties from usb-hcd.yaml (in the current directory) are
 supported.
 
 Sub-nodes:
index 9c8c56d..7263b7f 100644 (file)
@@ -18,8 +18,13 @@ properties:
     description:
       List of all the USB PHYs on this HCD
 
+  phy-names:
+    description:
+      Name specifier for the USB PHY
+
 examples:
   - |
     usb {
         phys = <&usb2_phy1>, <&usb3_phy1>;
+        phy-names = "usb";
     };
index cc2e6f7..d1702eb 100644 (file)
@@ -6,7 +6,7 @@ Required properties:
 - reg : Should contain 1 register ranges(address and length)
 - interrupts : UHCI controller interrupt
 
-additionally the properties from usb-hcd.txt (in the current directory) are
+additionally the properties from usb-hcd.yaml (in the current directory) are
 supported.
 
 Example:
index 97400e8..b49b819 100644 (file)
@@ -41,9 +41,9 @@ Optional properties:
   - usb3-lpm-capable: determines if platform is USB3 LPM capable
   - quirk-broken-port-ped: set if the controller has broken port disable mechanism
   - imod-interval-ns: default interrupt moderation interval is 5000ns
-  - phys : see usb-hcd.txt in the current directory
+  - phys : see usb-hcd.yaml in the current directory
 
-additionally the properties from usb-hcd.txt (in the current directory) are
+additionally the properties from usb-hcd.yaml (in the current directory) are
 supported.
 
 
diff --git a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml
new file mode 100644 (file)
index 0000000..3a54f58
--- /dev/null
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/allwinner,sun4i-a10-wdt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Watchdog Device Tree Bindings
+
+allOf:
+  - $ref: "watchdog.yaml#"
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  compatible:
+    oneOf:
+      - const: allwinner,sun4i-a10-wdt
+      - const: allwinner,sun6i-a31-wdt
+      - items:
+          - const: allwinner,sun50i-a64-wdt
+          - const: allwinner,sun6i-a31-wdt
+      - items:
+          - const: allwinner,sun50i-h6-wdt
+          - const: allwinner,sun6i-a31-wdt
+      - items:
+          - const: allwinner,suniv-f1c100s-wdt
+          - const: allwinner,sun4i-a10-wdt
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - interrupts
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    wdt: watchdog@1c20c90 {
+        compatible = "allwinner,sun4i-a10-wdt";
+        reg = <0x01c20c90 0x10>;
+        interrupts = <24>;
+        clocks = <&osc24M>;
+        timeout-sec = <10>;
+    };
+
+...
index c5077a1..d78d4a8 100644 (file)
@@ -4,6 +4,7 @@ Required properties:
  - compatible: must be one of:
        - "aspeed,ast2400-wdt"
        - "aspeed,ast2500-wdt"
+       - "aspeed,ast2600-wdt"
 
  - reg: physical base address of the controller and length of memory mapped
    region
diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.txt
new file mode 100644 (file)
index 0000000..f902508
--- /dev/null
@@ -0,0 +1,22 @@
+* Freescale i.MX7ULP Watchdog Timer (WDT) Controller
+
+Required properties:
+- compatible : Should be "fsl,imx7ulp-wdt"
+- reg : Should contain WDT registers location and length
+- interrupts : Should contain WDT interrupt
+- clocks: Should contain a phandle pointing to the gated peripheral clock.
+
+Optional properties:
+- timeout-sec : Contains the watchdog timeout in seconds
+
+Examples:
+
+wdog1: watchdog@403d0000 {
+       compatible = "fsl,imx7ulp-wdt";
+       reg = <0x403d0000 0x10000>;
+       interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+       clocks = <&pcc2 IMX7ULP_CLK_WDG1>;
+       assigned-clocks = <&pcc2 IMX7ULP_CLK_WDG1>;
+       assigned-clocks-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>;
+       timeout-sec = <40>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt b/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt
deleted file mode 100644 (file)
index e65198d..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-Allwinner SoCs Watchdog timer
-
-Required properties:
-
-- compatible : should be one of
-       "allwinner,sun4i-a10-wdt"
-       "allwinner,sun6i-a31-wdt"
-       "allwinner,sun50i-a64-wdt","allwinner,sun6i-a31-wdt"
-       "allwinner,sun50i-h6-wdt","allwinner,sun6i-a31-wdt"
-       "allwinner,suniv-f1c100s-wdt", "allwinner,sun4i-a10-wdt"
-- reg : Specifies base physical address and size of the registers.
-
-Optional properties:
-- timeout-sec : Contains the watchdog timeout in seconds
-
-Example:
-
-wdt: watchdog@1c20c90 {
-       compatible = "allwinner,sun4i-a10-wdt";
-       reg = <0x01c20c90 0x10>;
-       timeout-sec = <10>;
-};
diff --git a/Documentation/devicetree/bindings/watchdog/watchdog.yaml b/Documentation/devicetree/bindings/watchdog/watchdog.yaml
new file mode 100644 (file)
index 0000000..187bf6c
--- /dev/null
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/watchdog.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Watchdog Generic Bindings
+
+maintainers:
+  - Guenter Roeck <linux@roeck-us.net>
+  - Wim Van Sebroeck <wim@linux-watchdog.org>
+
+description: |
+  This document describes generic bindings which can be used to
+  describe watchdog devices in a device tree.
+
+properties:
+  $nodename:
+    pattern: "^watchdog(@.*|-[0-9a-f])?$"
+
+  timeout-sec:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Contains the watchdog timeout in seconds.
+
+...
index d2c6a5c..b19b6a0 100644 (file)
@@ -158,6 +158,20 @@ Mount Options
         copies.  Currently, it's only used in copy_file_range, which will revert
         to the default VFS implementation if this option is used.
 
+  recover_session=<no|clean>
+       Set auto reconnect mode in the case where the client is blacklisted. The
+       available modes are "no" and "clean". The default is "no".
+
+       * no: never attempt to reconnect when client detects that it has been
+       blacklisted. Operations will generally fail after being blacklisted.
+
+       * clean: client reconnects to the ceph cluster automatically when it
+       detects that it has been blacklisted. During reconnect, client drops
+       dirty data/metadata, invalidates page caches and writable file handles.
+       After reconnect, file locks become stale because the MDS loses track
+       of them. If an inode contains any stale file locks, read/write on the
+       inode is not allowed until applications release all stale file locks.
+
 More Information
 ================
 
index fd2bcf9..2c3a9f7 100644 (file)
@@ -37,3 +37,13 @@ filesystem implementations.
    journalling
    fscrypt
    fsverity
+
+Filesystems
+===========
+
+Documentation for filesystem implementations.
+
+.. toctree::
+   :maxdepth: 2
+
+   virtiofs
diff --git a/Documentation/filesystems/virtiofs.rst b/Documentation/filesystems/virtiofs.rst
new file mode 100644 (file)
index 0000000..4f338e3
--- /dev/null
@@ -0,0 +1,60 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================================
+virtiofs: virtio-fs host<->guest shared file system
+===================================================
+
+- Copyright (C) 2019 Red Hat, Inc.
+
+Introduction
+============
+The virtiofs file system for Linux implements a driver for the paravirtualized
+VIRTIO "virtio-fs" device for guest<->host file system sharing.  It allows a
+guest to mount a directory that has been exported on the host.
+
+Guests often require access to files residing on the host or remote systems.
+Use cases include making files available to new guests during installation,
+booting from a root file system located on the host, persistent storage for
+stateless or ephemeral guests, and sharing a directory between guests.
+
+Although it is possible to use existing network file systems for some of these
+tasks, they require configuration steps that are hard to automate and they
+expose the storage network to the guest.  The virtio-fs device was designed to
+solve these problems by providing file system access without networking.
+
+Furthermore the virtio-fs device takes advantage of the co-location of the
+guest and host to increase performance and provide semantics that are not
+possible with network file systems.
+
+Usage
+=====
+Mount file system with tag ``myfs`` on ``/mnt``:
+
+.. code-block:: sh
+
+  guest# mount -t virtiofs myfs /mnt
+
+Please see https://virtio-fs.gitlab.io/ for details on how to configure QEMU
+and the virtiofsd daemon.
+
+Internals
+=========
+Since the virtio-fs device uses the FUSE protocol for file system requests, the
+virtiofs file system for Linux is integrated closely with the FUSE file system
+client.  The guest acts as the FUSE client while the host acts as the FUSE
+server.  The /dev/fuse interface between the kernel and userspace is replaced
+with the virtio-fs device interface.
+
+FUSE requests are placed into a virtqueue and processed by the host.  The
+response portion of the buffer is filled in by the host and the guest handles
+the request completion.
+
+Mapping /dev/fuse to virtqueues requires solving differences in semantics
+between /dev/fuse and virtqueues.  Each time the /dev/fuse device is read, the
+FUSE client may choose which request to transfer, making it possible to
+prioritize certain requests over others.  Virtqueues have queue semantics and
+it is not possible to change the order of requests that have been enqueued.
+This is especially important if the virtqueue becomes full since it is then
+impossible to add high priority requests.  In order to address this difference,
+the virtio-fs device uses a "hiprio" virtqueue specifically for requests that
+have priority over normal requests.
index 8147c3f..230ad59 100644 (file)
@@ -7,6 +7,7 @@ Linux Hardware Monitoring
 
    hwmon-kernel-api
    pmbus-core
+   inspur-ipsps1
    submitting-patches
    sysfs-interface
    userspace-tools
index 2b871ae..292c0c2 100644 (file)
@@ -1,5 +1,5 @@
 Kernel driver inspur-ipsps1
-=======================
+===========================
 
 Supported chips:
 
index 12a86ba..4451d59 100644 (file)
@@ -21,10 +21,17 @@ Supported chips:
 
 * AMD Family 14h processors: "Brazos" (C/E/G/Z-Series)
 
-* AMD Family 15h processors: "Bulldozer" (FX-Series), "Trinity", "Kaveri", "Carrizo"
+* AMD Family 15h processors: "Bulldozer" (FX-Series), "Trinity", "Kaveri",
+  "Carrizo", "Stoney Ridge", "Bristol Ridge"
 
 * AMD Family 16h processors: "Kabini", "Mullins"
 
+* AMD Family 17h processors: "Zen", "Zen 2"
+
+* AMD Family 18h processors: "Hygon Dhyana"
+
+* AMD Family 19h processors: "Zen 3"
+
   Prefix: 'k10temp'
 
   Addresses scanned: PCI space
@@ -110,3 +117,12 @@ The maximum value for Tctl is available in the file temp1_max.
 If the BIOS has enabled hardware temperature control, the threshold at
 which the processor will throttle itself to avoid damage is available in
 temp1_crit and temp1_crit_hyst.
+
+On some AMD CPUs, there is a difference between the die temperature (Tdie) and
+the reported temperature (Tctl). Tdie is the real measured temperature, and
+Tctl is used for fan control. While Tctl is always available as temp1_input,
+the driver exports Tdie temperature as temp2_input for those CPUs which support
+it.
+
+Models from 17h family report relative temperature, the driver aims to
+compensate and report the real temperature.
index 6ba9d53..b89c881 100644 (file)
@@ -954,11 +954,6 @@ When kbuild executes, the following steps are followed (roughly):
 
        From commandline LDFLAGS_MODULE shall be used (see kbuild.txt).
 
-    KBUILD_ARFLAGS   Options for $(AR) when creating archives
-
-       $(KBUILD_ARFLAGS) set by the top level Makefile to "D" (deterministic
-       mode) if this option is supported by $(AR).
-
     KBUILD_LDS
 
        The linker script with full path. Assigned by the top-level Makefile.
index d2ae799..774a998 100644 (file)
@@ -498,10 +498,11 @@ build.
        will be written containing all exported symbols that were not
        defined in the kernel.
 
---- 6.3 Symbols From Another External Module
+6.3 Symbols From Another External Module
+----------------------------------------
 
        Sometimes, an external module uses exported symbols from
-       another external module. kbuild needs to have full knowledge of
+       another external module. Kbuild needs to have full knowledge of
        all symbols to avoid spitting out warnings about undefined
        symbols. Three solutions exist for this situation.
 
@@ -521,7 +522,7 @@ build.
                The top-level kbuild file would then look like::
 
                        #./Kbuild (or ./Makefile):
-                               obj-y := foo/ bar/
+                               obj-m := foo/ bar/
 
                And executing::
 
diff --git a/Documentation/kbuild/namespaces.rst b/Documentation/kbuild/namespaces.rst
deleted file mode 100644 (file)
index 982ed7b..0000000
+++ /dev/null
@@ -1,154 +0,0 @@
-=================
-Symbol Namespaces
-=================
-
-The following document describes how to use Symbol Namespaces to structure the
-export surface of in-kernel symbols exported through the family of
-EXPORT_SYMBOL() macros.
-
-.. Table of Contents
-
-       === 1 Introduction
-       === 2 How to define Symbol Namespaces
-          --- 2.1 Using the EXPORT_SYMBOL macros
-          --- 2.2 Using the DEFAULT_SYMBOL_NAMESPACE define
-       === 3 How to use Symbols exported in Namespaces
-       === 4 Loading Modules that use namespaced Symbols
-       === 5 Automatically creating MODULE_IMPORT_NS statements
-
-1. Introduction
-===============
-
-Symbol Namespaces have been introduced as a means to structure the export
-surface of the in-kernel API. It allows subsystem maintainers to partition
-their exported symbols into separate namespaces. That is useful for
-documentation purposes (think of the SUBSYSTEM_DEBUG namespace) as well as for
-limiting the availability of a set of symbols for use in other parts of the
-kernel. As of today, modules that make use of symbols exported into namespaces,
-are required to import the namespace. Otherwise the kernel will, depending on
-its configuration, reject loading the module or warn about a missing import.
-
-2. How to define Symbol Namespaces
-==================================
-
-Symbols can be exported into namespace using different methods. All of them are
-changing the way EXPORT_SYMBOL and friends are instrumented to create ksymtab
-entries.
-
-2.1 Using the EXPORT_SYMBOL macros
-==================================
-
-In addition to the macros EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(), that allow
-exporting of kernel symbols to the kernel symbol table, variants of these are
-available to export symbols into a certain namespace: EXPORT_SYMBOL_NS() and
-EXPORT_SYMBOL_NS_GPL(). They take one additional argument: the namespace.
-Please note that due to macro expansion that argument needs to be a
-preprocessor symbol. E.g. to export the symbol `usb_stor_suspend` into the
-namespace `USB_STORAGE`, use::
-
-       EXPORT_SYMBOL_NS(usb_stor_suspend, USB_STORAGE);
-
-The corresponding ksymtab entry struct `kernel_symbol` will have the member
-`namespace` set accordingly. A symbol that is exported without a namespace will
-refer to `NULL`. There is no default namespace if none is defined. `modpost`
-and kernel/module.c make use the namespace at build time or module load time,
-respectively.
-
-2.2 Using the DEFAULT_SYMBOL_NAMESPACE define
-=============================================
-
-Defining namespaces for all symbols of a subsystem can be very verbose and may
-become hard to maintain. Therefore a default define (DEFAULT_SYMBOL_NAMESPACE)
-is been provided, that, if set, will become the default for all EXPORT_SYMBOL()
-and EXPORT_SYMBOL_GPL() macro expansions that do not specify a namespace.
-
-There are multiple ways of specifying this define and it depends on the
-subsystem and the maintainer's preference, which one to use. The first option
-is to define the default namespace in the `Makefile` of the subsystem. E.g. to
-export all symbols defined in usb-common into the namespace USB_COMMON, add a
-line like this to drivers/usb/common/Makefile::
-
-       ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=USB_COMMON
-
-That will affect all EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL() statements. A
-symbol exported with EXPORT_SYMBOL_NS() while this definition is present, will
-still be exported into the namespace that is passed as the namespace argument
-as this argument has preference over a default symbol namespace.
-
-A second option to define the default namespace is directly in the compilation
-unit as preprocessor statement. The above example would then read::
-
-       #undef  DEFAULT_SYMBOL_NAMESPACE
-       #define DEFAULT_SYMBOL_NAMESPACE USB_COMMON
-
-within the corresponding compilation unit before any EXPORT_SYMBOL macro is
-used.
-
-3. How to use Symbols exported in Namespaces
-============================================
-
-In order to use symbols that are exported into namespaces, kernel modules need
-to explicitly import these namespaces. Otherwise the kernel might reject to
-load the module. The module code is required to use the macro MODULE_IMPORT_NS
-for the namespaces it uses symbols from. E.g. a module using the
-usb_stor_suspend symbol from above, needs to import the namespace USB_STORAGE
-using a statement like::
-
-       MODULE_IMPORT_NS(USB_STORAGE);
-
-This will create a `modinfo` tag in the module for each imported namespace.
-This has the side effect, that the imported namespaces of a module can be
-inspected with modinfo::
-
-       $ modinfo drivers/usb/storage/ums-karma.ko
-       [...]
-       import_ns:      USB_STORAGE
-       [...]
-
-
-It is advisable to add the MODULE_IMPORT_NS() statement close to other module
-metadata definitions like MODULE_AUTHOR() or MODULE_LICENSE(). Refer to section
-5. for a way to create missing import statements automatically.
-
-4. Loading Modules that use namespaced Symbols
-==============================================
-
-At module loading time (e.g. `insmod`), the kernel will check each symbol
-referenced from the module for its availability and whether the namespace it
-might be exported to has been imported by the module. The default behaviour of
-the kernel is to reject loading modules that don't specify sufficient imports.
-An error will be logged and loading will be failed with EINVAL. In order to
-allow loading of modules that don't satisfy this precondition, a configuration
-option is available: Setting MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS=y will
-enable loading regardless, but will emit a warning.
-
-5. Automatically creating MODULE_IMPORT_NS statements
-=====================================================
-
-Missing namespaces imports can easily be detected at build time. In fact,
-modpost will emit a warning if a module uses a symbol from a namespace
-without importing it.
-MODULE_IMPORT_NS() statements will usually be added at a definite location
-(along with other module meta data). To make the life of module authors (and
-subsystem maintainers) easier, a script and make target is available to fixup
-missing imports. Fixing missing imports can be done with::
-
-       $ make nsdeps
-
-A typical scenario for module authors would be::
-
-       - write code that depends on a symbol from a not imported namespace
-       - `make`
-       - notice the warning of modpost telling about a missing import
-       - run `make nsdeps` to add the import to the correct code location
-
-For subsystem maintainers introducing a namespace, the steps are very similar.
-Again, `make nsdeps` will eventually add the missing namespace imports for
-in-tree modules::
-
-       - move or add symbols to a namespace (e.g. with EXPORT_SYMBOL_NS())
-       - `make` (preferably with an allmodconfig to cover all in-kernel
-         modules)
-       - notice the warning of modpost telling about a missing import
-       - run `make nsdeps` to add the import to the correct code location
-
index ab92e98..5033938 100644 (file)
@@ -16,16 +16,21 @@ the kernel may be unreproducible, and how to avoid them.
 Timestamps
 ----------
 
-The kernel embeds a timestamp in two places:
+The kernel embeds timestamps in three places:
 
 * The version string exposed by ``uname()`` and included in
   ``/proc/version``
 
 * File timestamps in the embedded initramfs
 
-By default the timestamp is the current time.  This must be overridden
-using the `KBUILD_BUILD_TIMESTAMP`_ variable.  If you are building
-from a git commit, you could use its commit date.
+* If enabled via ``CONFIG_IKHEADERS``, file timestamps of kernel
+  headers embedded in the kernel or respective module,
+  exposed via ``/sys/kernel/kheaders.tar.xz``
+
+By default the timestamp is the current time and in the case of
+``kheaders`` the various files' modification times. This must
+be overridden using the `KBUILD_BUILD_TIMESTAMP`_ variable.
+If you are building from a git commit, you could use its commit date.
 
 The kernel does *not* use the ``__DATE__`` and ``__TIME__`` macros,
 and enables warnings if they are used.  If you incorporate external
index f51f925..c1f7f75 100644 (file)
@@ -23,6 +23,7 @@ Contents:
    intel/ice
    google/gve
    mellanox/mlx5
+   netronome/nfp
    pensando/ionic
 
 .. only::  subproject and html
index 2b9f488..caf023c 100644 (file)
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
-==============================================================
-Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters
-==============================================================
+=============================================================
+Linux Base Driver for the Intel(R) PRO/100 Family of Adapters
+=============================================================
 
 June 1, 2018
 
@@ -21,7 +21,7 @@ Contents
 In This Release
 ===============
 
-This file describes the Linux* Base Driver for the Intel(R) PRO/100 Family of
+This file describes the Linux Base Driver for the Intel(R) PRO/100 Family of
 Adapters. This driver includes support for Itanium(R)2-based systems.
 
 For questions related to hardware requirements, refer to the documentation
@@ -138,9 +138,9 @@ version 1.6 or later is required for this functionality.
 The latest release of ethtool can be found from
 https://www.kernel.org/pub/software/network/ethtool/
 
-Enabling Wake on LAN* (WoL)
----------------------------
-WoL is provided through the ethtool* utility.  For instructions on
+Enabling Wake on LAN (WoL)
+--------------------------
+WoL is provided through the ethtool utility.  For instructions on
 enabling WoL with ethtool, refer to the ethtool man page.  WoL will be
 enabled on the system during the next shut down or reboot.  For this
 driver version, in order to enable WoL, the e100 driver must be loaded
index 956560b..4aaae0f 100644 (file)
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
-===========================================================
-Linux* Base Driver for Intel(R) Ethernet Network Connection
-===========================================================
+==========================================================
+Linux Base Driver for Intel(R) Ethernet Network Connection
+==========================================================
 
 Intel Gigabit Linux driver.
 Copyright(c) 1999 - 2013 Intel Corporation.
@@ -438,10 +438,10 @@ ethtool
   The latest release of ethtool can be found from
   https://www.kernel.org/pub/software/network/ethtool/
 
-Enabling Wake on LAN* (WoL)
----------------------------
+Enabling Wake on LAN (WoL)
+--------------------------
 
-  WoL is configured through the ethtool* utility.
+  WoL is configured through the ethtool utility.
 
   WoL will be enabled on the system during the next shut down or reboot.
   For this driver version, in order to enable WoL, the e1000 driver must be
index 01999f0..f49cd37 100644 (file)
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
-======================================================
-Linux* Driver for Intel(R) Ethernet Network Connection
-======================================================
+=====================================================
+Linux Driver for Intel(R) Ethernet Network Connection
+=====================================================
 
 Intel Gigabit Linux driver.
 Copyright(c) 2008-2018 Intel Corporation.
@@ -338,7 +338,7 @@ and higher cannot be forced. Use the autonegotiation advertising setting to
 manually set devices for 1 Gbps and higher.
 
 Speed, duplex, and autonegotiation advertising are configured through the
-ethtool* utility.
+ethtool utility.
 
 Caution: Only experienced network administrators should force speed and duplex
 or change autonegotiation advertising manually. The settings at the switch must
@@ -351,9 +351,9 @@ will not attempt to auto-negotiate with its link partner since those adapters
 operate only in full duplex and only at their native speed.
 
 
-Enabling Wake on LAN* (WoL)
----------------------------
-WoL is configured through the ethtool* utility.
+Enabling Wake on LAN (WoL)
+--------------------------
+WoL is configured through the ethtool utility.
 
 WoL will be enabled on the system during the next shut down or reboot. For
 this driver version, in order to enable WoL, the e1000e driver must be loaded
index ac3269e..4d279e6 100644 (file)
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
-==============================================================
-Linux* Base Driver for Intel(R) Ethernet Multi-host Controller
-==============================================================
+=============================================================
+Linux Base Driver for Intel(R) Ethernet Multi-host Controller
+=============================================================
 
 August 20, 2018
 Copyright(c) 2015-2018 Intel Corporation.
@@ -120,8 +120,8 @@ rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 m|v|t|s|d|f|n|r
 Known Issues/Troubleshooting
 ============================
 
-Enabling SR-IOV in a 64-bit Microsoft* Windows Server* 2012/R2 guest OS under Linux KVM
----------------------------------------------------------------------------------------
+Enabling SR-IOV in a 64-bit Microsoft Windows Server 2012/R2 guest OS under Linux KVM
+-------------------------------------------------------------------------------------
 KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM. This
 includes traditional PCIe devices, as well as SR-IOV-capable devices based on
 the Intel Ethernet Controller XL710.
index 848fd38..8a9b185 100644 (file)
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
-==================================================================
-Linux* Base Driver for the Intel(R) Ethernet Controller 700 Series
-==================================================================
+=================================================================
+Linux Base Driver for the Intel(R) Ethernet Controller 700 Series
+=================================================================
 
 Intel 40 Gigabit Linux driver.
 Copyright(c) 1999-2018 Intel Corporation.
@@ -384,7 +384,7 @@ NOTE: You cannot set the speed for devices based on the Intel(R) Ethernet
 Network Adapter XXV710 based devices.
 
 Speed, duplex, and autonegotiation advertising are configured through the
-ethtool* utility.
+ethtool utility.
 
 Caution: Only experienced network administrators should force speed and duplex
 or change autonegotiation advertising manually. The settings at the switch must
index cfc0884..84ac7e7 100644 (file)
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
-==================================================================
-Linux* Base Driver for Intel(R) Ethernet Adaptive Virtual Function
-==================================================================
+=================================================================
+Linux Base Driver for Intel(R) Ethernet Adaptive Virtual Function
+=================================================================
 
 Intel Ethernet Adaptive Virtual Function Linux driver.
 Copyright(c) 2013-2018 Intel Corporation.
@@ -19,7 +19,7 @@ Contents
 Overview
 ========
 
-This file describes the iavf Linux* Base Driver. This driver was formerly
+This file describes the iavf Linux Base Driver. This driver was formerly
 called i40evf.
 
 The iavf driver supports the below mentioned virtual function devices and
index c220aa2..ee43ea5 100644 (file)
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
-===================================================================
-Linux* Base Driver for the Intel(R) Ethernet Connection E800 Series
-===================================================================
+==================================================================
+Linux Base Driver for the Intel(R) Ethernet Connection E800 Series
+==================================================================
 
 Intel ice Linux driver.
 Copyright(c) 2018 Intel Corporation.
index fc8cfaa..87e560f 100644 (file)
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
-===========================================================
-Linux* Base Driver for Intel(R) Ethernet Network Connection
-===========================================================
+==========================================================
+Linux Base Driver for Intel(R) Ethernet Network Connection
+==========================================================
 
 Intel Gigabit Linux driver.
 Copyright(c) 1999-2018 Intel Corporation.
@@ -129,9 +129,9 @@ version is required for this functionality. Download it at:
 https://www.kernel.org/pub/software/network/ethtool/
 
 
-Enabling Wake on LAN* (WoL)
----------------------------
-WoL is configured through the ethtool* utility.
+Enabling Wake on LAN (WoL)
+--------------------------
+WoL is configured through the ethtool utility.
 
 WoL will be enabled on the system during the next shut down or reboot. For
 this driver version, in order to enable WoL, the igb driver must be loaded
index 9cddabe..557fc02 100644 (file)
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
-============================================================
-Linux* Base Virtual Function Driver for Intel(R) 1G Ethernet
-============================================================
+===========================================================
+Linux Base Virtual Function Driver for Intel(R) 1G Ethernet
+===========================================================
 
 Intel Gigabit Virtual Function Linux driver.
 Copyright(c) 1999-2018 Intel Corporation.
index c7d2548..f1d5233 100644 (file)
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
-=============================================================================
-Linux* Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Adapters
-=============================================================================
+===========================================================================
+Linux Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Adapters
+===========================================================================
 
 Intel 10 Gigabit Linux driver.
 Copyright(c) 1999-2018 Intel Corporation.
@@ -519,8 +519,8 @@ The offload is also supported for ixgbe's VFs, but the VF must be set as
 Known Issues/Troubleshooting
 ============================
 
-Enabling SR-IOV in a 64-bit Microsoft* Windows Server* 2012/R2 guest OS
------------------------------------------------------------------------
+Enabling SR-IOV in a 64-bit Microsoft Windows Server 2012/R2 guest OS
+---------------------------------------------------------------------
 Linux KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM.
 This includes traditional PCIe devices, as well as SR-IOV-capable devices based
 on the Intel Ethernet Controller XL710.
index 5d49773..76bbde7 100644 (file)
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
-=============================================================
-Linux* Base Virtual Function Driver for Intel(R) 10G Ethernet
-=============================================================
+============================================================
+Linux Base Virtual Function Driver for Intel(R) 10G Ethernet
+============================================================
 
 Intel 10 Gigabit Virtual Function Linux driver.
 Copyright(c) 1999-2018 Intel Corporation.
index 67b6839..c17d680 100644 (file)
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
-==========================================================
-Linux* Driver for the Pensando(R) Ethernet adapter family
-==========================================================
+========================================================
+Linux Driver for the Pensando(R) Ethernet adapter family
+========================================================
 
 Pensando Linux Ethernet driver.
 Copyright(c) 2019 Pensando Systems, Inc
@@ -36,8 +36,10 @@ Support
 =======
 For general Linux networking support, please use the netdev mailing
 list, which is monitored by Pensando personnel::
+
   netdev@vger.kernel.org
 
 For more specific support needs, please use the Pensando driver support
 email::
-       drivers@pensando.io
+
+  drivers@pensando.io
index c20c7c4..8e90a85 100644 (file)
@@ -143,7 +143,8 @@ be added to the following table:
    * - ``port_list_is_empty``
      - ``drop``
      - Traps packets that the device decided to drop in case they need to be
-       flooded and the flood list is empty
+       flooded (e.g., unknown unicast, unregistered multicast) and there are
+       no ports the packets should be flooded to
    * - ``port_loopback_filter``
      - ``drop``
      - Traps packets that the device decided to drop in case after layer 2
index 49e95f4..8d4ad1d 100644 (file)
@@ -207,8 +207,8 @@ TCP variables:
 
 somaxconn - INTEGER
        Limit of socket listen() backlog, known in userspace as SOMAXCONN.
-       Defaults to 128.  See also tcp_max_syn_backlog for additional tuning
-       for TCP sockets.
+       Defaults to 4096. (Was 128 before linux-5.4)
+       See also tcp_max_syn_backlog for additional tuning for TCP sockets.
 
 tcp_abort_on_overflow - BOOLEAN
        If listening service is too slow to accept new connections,
@@ -408,11 +408,14 @@ tcp_max_orphans - INTEGER
        up to ~64K of unswappable memory.
 
 tcp_max_syn_backlog - INTEGER
-       Maximal number of remembered connection requests, which have not
-       received an acknowledgment from connecting client.
+       Maximal number of remembered connection requests (SYN_RECV),
+       which have not received an acknowledgment from connecting client.
+       This is a per-listener limit.
        The minimal value is 128 for low memory machines, and it will
        increase in proportion to the memory of machine.
        If server suffers from overload, try increasing this number.
+       Remember to also check /proc/sys/net/core/somaxconn
+       A SYN_RECV request socket consumes about 304 bytes of memory.
 
 tcp_max_tw_buckets - INTEGER
        Maximal number of timewait sockets held by system simultaneously.
index ce7e7a0..dc60b13 100644 (file)
@@ -272,7 +272,7 @@ supported flags are:
 * MSG_DONTWAIT, i.e. non-blocking operation.
 
 recvmsg(2)
-^^^^^^^^^
+^^^^^^^^^^
 
 In most cases recvmsg(2) is needed if you want to extract more information than
 recvfrom(2) can provide. For example package priority and timestamp. The
index 9cb31c5..9bdb7d5 100644 (file)
@@ -92,16 +92,16 @@ under some conditions.
 Part III: Registering a Network Device to DIM
 ==============================================
 
-Net DIM API exposes the main function net_dim(struct net_dim *dim,
-struct net_dim_sample end_sample). This function is the entry point to the Net
+Net DIM API exposes the main function net_dim(struct dim *dim,
+struct dim_sample end_sample). This function is the entry point to the Net
 DIM algorithm and has to be called every time the driver would like to check if
 it should change interrupt moderation parameters. The driver should provide two
-data structures: struct net_dim and struct net_dim_sample. Struct net_dim
+data structures: struct dim and struct dim_sample. Struct dim
 describes the state of DIM for a specific object (RX queue, TX queue,
 other queues, etc.). This includes the current selected profile, previous data
 samples, the callback function provided by the driver and more.
-Struct net_dim_sample describes a data sample, which will be compared to the
-data sample stored in struct net_dim in order to decide on the algorithm's next
+Struct dim_sample describes a data sample, which will be compared to the
+data sample stored in struct dim in order to decide on the algorithm's next
 step. The sample should include bytes, packets and interrupts, measured by
 the driver.
 
@@ -110,9 +110,9 @@ main net_dim() function. The recommended method is to call net_dim() on each
 interrupt. Since Net DIM has a built-in moderation and it might decide to skip
 iterations under certain conditions, there is no need to moderate the net_dim()
 calls as well. As mentioned above, the driver needs to provide an object of type
-struct net_dim to the net_dim() function call. It is advised for each entity
-using Net DIM to hold a struct net_dim as part of its data structure and use it
-as the main Net DIM API object. The struct net_dim_sample should hold the latest
+struct dim to the net_dim() function call. It is advised for each entity
+using Net DIM to hold a struct dim as part of its data structure and use it
+as the main Net DIM API object. The struct dim_sample should hold the latest
 bytes, packets and interrupts count. No need to perform any calculations, just
 include the raw data.
 
@@ -132,19 +132,19 @@ usage is not complete but it should make the outline of the usage clear.
 
 my_driver.c:
 
-#include <linux/net_dim.h>
+#include <linux/dim.h>
 
 /* Callback for net DIM to schedule on a decision to change moderation */
 void my_driver_do_dim_work(struct work_struct *work)
 {
-       /* Get struct net_dim from struct work_struct */
-       struct net_dim *dim = container_of(work, struct net_dim,
-                                          work);
+       /* Get struct dim from struct work_struct */
+       struct dim *dim = container_of(work, struct dim,
+                                      work);
        /* Do interrupt moderation related stuff */
        ...
 
        /* Signal net DIM work is done and it should move to next iteration */
-       dim->state = NET_DIM_START_MEASURE;
+       dim->state = DIM_START_MEASURE;
 }
 
 /* My driver's interrupt handler */
@@ -152,13 +152,13 @@ int my_driver_handle_interrupt(struct my_driver_entity *my_entity, ...)
 {
        ...
        /* A struct to hold current measured data */
-       struct net_dim_sample dim_sample;
+       struct dim_sample dim_sample;
        ...
        /* Initiate data sample struct with current data */
-       net_dim_sample(my_entity->events,
-                      my_entity->packets,
-                      my_entity->bytes,
-                      &dim_sample);
+       dim_update_sample(my_entity->events,
+                         my_entity->packets,
+                         my_entity->bytes,
+                         &dim_sample);
        /* Call net DIM */
        net_dim(&my_entity->dim, dim_sample);
        ...
index f4a2198..ada573b 100644 (file)
@@ -56,7 +56,7 @@ instead of ``double-indenting`` the ``case`` labels.  E.g.:
        case 'K':
        case 'k':
                mem <<= 10;
-               /* fall through */
+               fallthrough;
        default:
                break;
        }
index 053b24a..179f2a5 100644 (file)
@@ -122,14 +122,27 @@ memory adjacent to the stack (when built without `CONFIG_VMAP_STACK=y`)
 
 Implicit switch case fall-through
 ---------------------------------
-The C language allows switch cases to "fall through" when
-a "break" statement is missing at the end of a case. This,
-however, introduces ambiguity in the code, as it's not always
-clear if the missing break is intentional or a bug. As there
-have been a long list of flaws `due to missing "break" statements
+The C language allows switch cases to "fall-through" when a "break" statement
+is missing at the end of a case. This, however, introduces ambiguity in the
+code, as it's not always clear if the missing break is intentional or a bug.
+
+As there have been a long list of flaws `due to missing "break" statements
 <https://cwe.mitre.org/data/definitions/484.html>`_, we no longer allow
-"implicit fall-through". In order to identify an intentional fall-through
-case, we have adopted the marking used by static analyzers: a comment
-saying `/* Fall through */`. Once the C++17 `__attribute__((fallthrough))`
-is more widely handled by C compilers, static analyzers, and IDEs, we can
-switch to using that instead.
+"implicit fall-through".
+
+In order to identify intentional fall-through cases, we have adopted a
+pseudo-keyword macro 'fallthrough' which expands to gcc's extension
+__attribute__((__fallthrough__)).  `Statement Attributes
+<https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html>`_
+
+When the C17/C18  [[fallthrough]] syntax is more commonly supported by
+C compilers, static analyzers, and IDEs, we can switch to using that syntax
+for the macro pseudo-keyword.
+
+All switch/case blocks must end in one of:
+
+       break;
+       fallthrough;
+       continue;
+       goto <label>;
+       return [expression];
index 4026363..a3c3349 100644 (file)
@@ -143,6 +143,20 @@ via their employer, they cannot enter individual non-disclosure agreements
 in their role as Linux kernel developers. They will, however, agree to
 adhere to this documented process and the Memorandum of Understanding.
 
+The disclosing party should provide a list of contacts for all other
+entities who have already been, or should be, informed about the issue.
+This serves several purposes:
+
+ - The list of disclosed entities allows communication accross the
+   industry, e.g. other OS vendors, HW vendors, etc.
+
+ - The disclosed entities can be contacted to name experts who should
+   participate in the mitigation development.
+
+ - If an expert which is required to handle an issue is employed by an
+   listed entity or member of an listed entity, then the response teams can
+   request the disclosure of that expert from that entity. This ensures
+   that the expert is also part of the entity's response team.
 
 Disclosure
 """"""""""
@@ -158,10 +172,7 @@ Mitigation development
 """"""""""""""""""""""
 
 The initial response team sets up an encrypted mailing-list or repurposes
-an existing one if appropriate. The disclosing party should provide a list
-of contacts for all other parties who have already been, or should be,
-informed about the issue. The response team contacts these parties so they
-can name experts who should be subscribed to the mailing-list.
+an existing one if appropriate.
 
 Using a mailing-list is close to the normal Linux development process and
 has been successfully used in developing mitigations for various hardware
@@ -175,9 +186,24 @@ development branch against the mainline kernel and backport branches for
 stable kernel versions as necessary.
 
 The initial response team will identify further experts from the Linux
-kernel developer community as needed and inform the disclosing party about
-their participation. Bringing in experts can happen at any time of the
-development process and often needs to be handled in a timely manner.
+kernel developer community as needed. Bringing in experts can happen at any
+time of the development process and needs to be handled in a timely manner.
+
+If an expert is employed by or member of an entity on the disclosure list
+provided by the disclosing party, then participation will be requested from
+the relevant entity.
+
+If not, then the disclosing party will be informed about the experts
+participation. The experts are covered by the Memorandum of Understanding
+and the disclosing party is requested to acknowledge the participation. In
+case that the disclosing party has a compelling reason to object, then this
+objection has to be raised within five work days and resolved with the
+incident team immediately. If the disclosing party does not react within
+five work days this is taken as silent acknowledgement.
+
+After acknowledgement or resolution of an objection the expert is disclosed
+by the incident team and brought into the development process.
+
 
 Coordinated release
 """""""""""""""""""
@@ -216,7 +242,7 @@ an involved disclosed party. The current ambassadors list:
   ARM
   AMD
   IBM
-  Intel
+  Intel                Tony Luck <tony.luck@intel.com>
   Qualcomm     Trilok Soni <tsoni@codeaurora.org>
 
   Microsoft    Sasha Levin <sashal@kernel.org>
index 3d1cca2..c5a8432 100644 (file)
@@ -68,8 +68,10 @@ descriptors by adding their identifier to the format string
  - 'd-ng': the digest of the event, calculated with an arbitrary hash
    algorithm (field format: [<hash algo>:]digest, where the digest
    prefix is shown only if the hash algorithm is not SHA1 or MD5);
+ - 'd-modsig': the digest of the event without the appended modsig;
  - 'n-ng': the name of the event, without size limitations;
  - 'sig': the file signature;
+ - 'modsig' the appended file signature;
  - 'buf': the buffer data that was used to generate the hash without size limitations;
 
 
@@ -79,6 +81,7 @@ Below, there is the list of defined template descriptors:
  - "ima-ng" (default): its format is ``d-ng|n-ng``;
  - "ima-sig": its format is ``d-ng|n-ng|sig``;
  - "ima-buf": its format is ``d-ng|n-ng|buf``;
+ - "ima-modsig": its format is ``d-ng|n-ng|sig|d-modsig|modsig``;
 
 
 Use
diff --git a/Documentation/usb/rio.rst b/Documentation/usb/rio.rst
deleted file mode 100644 (file)
index ea73475..0000000
+++ /dev/null
@@ -1,109 +0,0 @@
-============
-Diamonds Rio
-============
-
-Copyright (C) 1999, 2000 Bruce Tenison
-
-Portions Copyright (C) 1999, 2000 David Nelson
-
-Thanks to David Nelson for guidance and the usage of the scanner.txt
-and scanner.c files to model our driver and this informative file.
-
-Mar. 2, 2000
-
-Changes
-=======
-
-- Initial Revision
-
-
-Overview
-========
-
-This README will address issues regarding how to configure the kernel
-to access a RIO 500 mp3 player.
-Before I explain how to use this to access the Rio500 please be warned:
-
-.. warning::
-
-   Please note that this software is still under development.  The authors
-   are in no way responsible for any damage that may occur, no matter how
-   inconsequential.
-
-It seems that the Rio has a problem when sending .mp3 with low batteries.
-I suggest when the batteries are low and you want to transfer stuff that you
-replace it with a fresh one. In my case, what happened is I lost two 16kb
-blocks (they are no longer usable to store information to it). But I don't
-know if that's normal or not; it could simply be a problem with the flash
-memory.
-
-In an extreme case, I left my Rio playing overnight and the batteries wore
-down to nothing and appear to have corrupted the flash memory. My RIO
-needed to be replaced as a result.  Diamond tech support is aware of the
-problem.  Do NOT allow your batteries to wear down to nothing before
-changing them.  It appears RIO 500 firmware does not handle low battery
-power well at all.
-
-On systems with OHCI controllers, the kernel OHCI code appears to have
-power on problems with some chipsets.  If you are having problems
-connecting to your RIO 500, try turning it on first and then plugging it
-into the USB cable.
-
-Contact Information
--------------------
-
-   The main page for the project is hosted at sourceforge.net in the following
-   URL: <http://rio500.sourceforge.net>. You can also go to the project's
-   sourceforge home page at: <http://sourceforge.net/projects/rio500/>.
-   There is also a mailing list: rio500-users@lists.sourceforge.net
-
-Authors
--------
-
-Most of the code was written by Cesar Miquel <miquel@df.uba.ar>. Keith
-Clayton <kclayton@jps.net> is incharge of the PPC port and making sure
-things work there. Bruce Tenison <btenison@dibbs.net> is adding support
-for .fon files and also does testing. The program will mostly sure be
-re-written and Pete Ikusz along with the rest will re-design it. I would
-also like to thank Tri Nguyen <tmn_3022000@hotmail.com> who provided use
-with some important information regarding the communication with the Rio.
-
-Additional Information and userspace tools
-
-       http://rio500.sourceforge.net/
-
-
-Requirements
-============
-
-A host with a USB port running a Linux kernel with RIO 500 support enabled.
-
-The driver is a module called rio500, which should be automatically loaded
-as you plug in your device. If that fails you can manually load it with
-
-  modprobe rio500
-
-Udev should automatically create a device node as soon as plug in your device.
-If that fails, you can manually add a device for the USB rio500::
-
-  mknod /dev/usb/rio500 c 180 64
-
-In that case, set appropriate permissions for /dev/usb/rio500 (don't forget
-about group and world permissions).  Both read and write permissions are
-required for proper operation.
-
-That's it.  The Rio500 Utils at: http://rio500.sourceforge.net should
-be able to access the rio500.
-
-Limits
-======
-
-You can use only a single rio500 device at a time with your computer.
-
-Bugs
-====
-
-If you encounter any problems feel free to drop me an email.
-
-Bruce Tenison
-btenison@dibbs.net
index 136f1ee..4833904 100644 (file)
@@ -5309,3 +5309,16 @@ Architectures: x86
 This capability indicates that KVM supports paravirtualized Hyper-V IPI send
 hypercalls:
 HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
+8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
+
+Architecture: x86
+
+This capability indicates that KVM running on top of Hyper-V hypervisor
+enables Direct TLB flush for its guests meaning that TLB flush
+hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM.
+Due to the different ABI for hypercall parameters between Hyper-V and
+KVM, enabling this capability effectively disables all hypercall
+handling by KVM (as some KVM hypercall may be mistakenly treated as TLB
+flush hypercalls by Hyper-V) so userspace should disable KVM identification
+in CPUID and only exposes Hyper-V identification. In this case, guest
+thinks it's running on Hyper-V and only use Hyper-V hypercalls.
index 889b00b..ff51f4a 100644 (file)
@@ -54,9 +54,9 @@ Hugetlb-specific helpers:
 Support of split page table lock by an architecture
 ===================================================
 
-There's no need in special enabling of PTE split page table lock:
-everything required is done by pgtable_page_ctor() and pgtable_page_dtor(),
-which must be called on PTE table allocation / freeing.
+There's no need in special enabling of PTE split page table lock: everything
+required is done by pgtable_pte_page_ctor() and pgtable_pte_page_dtor(), which
+must be called on PTE table allocation / freeing.
 
 Make sure the architecture doesn't use slab allocator for page table
 allocation: slab uses page->slab_cache for its pages.
@@ -74,7 +74,7 @@ paths: i.e X86_PAE preallocate few PMDs on pgd_alloc().
 
 With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK.
 
-NOTE: pgtable_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must
+NOTE: pgtable_pte_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must
 be handled properly.
 
 page->ptl
@@ -94,7 +94,7 @@ trick:
    split lock with enabled DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC, but costs
    one more cache line for indirect access;
 
-The spinlock_t allocated in pgtable_page_ctor() for PTE table and in
+The spinlock_t allocated in pgtable_pte_page_ctor() for PTE table and in
 pgtable_pmd_page_ctor() for PMD table.
 
 Please, never access page->ptl directly -- use appropriate helper.
index a3985cc..223c993 100644 (file)
@@ -301,15 +301,6 @@ ixp4xx_wdt:
 
 -------------------------------------------------
 
-ks8695_wdt:
-    wdt_time:
-       Watchdog time in seconds. (default=5)
-    nowayout:
-       Watchdog cannot be stopped once started
-       (default=kernel config parameter)
-
--------------------------------------------------
-
 machzwd:
     nowayout:
        Watchdog cannot be stopped once started
@@ -375,16 +366,6 @@ nic7018_wdt:
 
 -------------------------------------------------
 
-nuc900_wdt:
-    heartbeat:
-       Watchdog heartbeats in seconds.
-       (default = 15)
-    nowayout:
-       Watchdog cannot be stopped once started
-       (default=kernel config parameter)
-
--------------------------------------------------
-
 omap_wdt:
     timer_margin:
        initial watchdog timeout (in seconds)
index 2912962..c179529 100644 (file)
@@ -643,6 +643,7 @@ F:  drivers/net/ethernet/alacritech/*
 
 FORCEDETH GIGABIT ETHERNET DRIVER
 M:     Rain River <rain.1986.08.12@gmail.com>
+M:     Zhu Yanjun <yanjun.zhu@oracle.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     drivers/net/ethernet/nvidia/*
@@ -728,7 +729,7 @@ ALTERA SYSTEM MANAGER DRIVER
 M:     Thor Thayer <thor.thayer@linux.intel.com>
 S:     Maintained
 F:     drivers/mfd/altera-sysmgr.c
-F:     include/linux/mfd/altera-sysgmr.h
+F:     include/linux/mfd/altera-sysmgr.h
 
 ALTERA SYSTEM RESOURCE DRIVER FOR ARRIA10 DEVKIT
 M:     Thor Thayer <thor.thayer@linux.intel.com>
@@ -2164,12 +2165,10 @@ F:      arch/arm64/boot/dts/realtek/
 F:     Documentation/devicetree/bindings/arm/realtek.yaml
 
 ARM/RENESAS ARM64 ARCHITECTURE
-M:     Simon Horman <horms@verge.net.au>
 M:     Geert Uytterhoeven <geert+renesas@glider.be>
 M:     Magnus Damm <magnus.damm@gmail.com>
 L:     linux-renesas-soc@vger.kernel.org
 Q:     http://patchwork.kernel.org/project/linux-renesas-soc/list/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git next
 S:     Supported
 F:     arch/arm64/boot/dts/renesas/
@@ -2281,12 +2280,10 @@ S:      Maintained
 F:     drivers/media/platform/s5p-mfc/
 
 ARM/SHMOBILE ARM ARCHITECTURE
-M:     Simon Horman <horms@verge.net.au>
 M:     Geert Uytterhoeven <geert+renesas@glider.be>
 M:     Magnus Damm <magnus.damm@gmail.com>
 L:     linux-renesas-soc@vger.kernel.org
 Q:     http://patchwork.kernel.org/project/linux-renesas-soc/list/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git next
 S:     Supported
 F:     arch/arm/boot/dts/emev2*
@@ -2326,11 +2323,13 @@ F:      drivers/edac/altera_edac.
 
 ARM/SPREADTRUM SoC SUPPORT
 M:     Orson Zhai <orsonzhai@gmail.com>
-M:     Baolin Wang <baolin.wang@linaro.org>
+M:     Baolin Wang <baolin.wang7@gmail.com>
 M:     Chunyan Zhang <zhang.lyra@gmail.com>
 S:     Maintained
 F:     arch/arm64/boot/dts/sprd
 N:     sprd
+N:     sc27xx
+N:     sc2731
 
 ARM/STI ARCHITECTURE
 M:     Patrice Chotard <patrice.chotard@st.com>
@@ -2921,6 +2920,8 @@ F:        drivers/video/backlight/
 F:     include/linux/backlight.h
 F:     include/linux/pwm_backlight.h
 F:     Documentation/devicetree/bindings/leds/backlight
+F:     Documentation/ABI/stable/sysfs-class-backlight
+F:     Documentation/ABI/testing/sysfs-class-backlight
 
 BATMAN ADVANCED
 M:     Marek Lindner <mareklindner@neomailbox.ch>
@@ -3097,7 +3098,7 @@ S:        Supported
 F:     arch/arm64/net/
 
 BPF JIT for MIPS (32-BIT AND 64-BIT)
-M:     Paul Burton <paul.burton@mips.com>
+M:     Paul Burton <paulburton@kernel.org>
 L:     netdev@vger.kernel.org
 L:     bpf@vger.kernel.org
 S:     Maintained
@@ -3184,7 +3185,7 @@ N:        bcm216*
 N:     kona
 F:     arch/arm/mach-bcm/
 
-BROADCOM BCM2835 ARM ARCHITECTURE
+BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE
 M:     Eric Anholt <eric@anholt.net>
 M:     Stefan Wahren <wahrenst@gmx.net>
 L:     bcm-kernel-feedback-list@broadcom.com
@@ -3192,6 +3193,7 @@ L:        linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:     git git://github.com/anholt/linux
 S:     Maintained
+N:     bcm2711
 N:     bcm2835
 F:     drivers/staging/vc04_services
 
@@ -3238,8 +3240,6 @@ S:        Maintained
 F:     drivers/usb/gadget/udc/bcm63xx_udc.*
 
 BROADCOM BCM7XXX ARM ARCHITECTURE
-M:     Brian Norris <computersforpeace@gmail.com>
-M:     Gregory Fong <gregory.0xf0@gmail.com>
 M:     Florian Fainelli <f.fainelli@gmail.com>
 M:     bcm-kernel-feedback-list@broadcom.com
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -6109,7 +6109,10 @@ M:       Gao Xiang <gaoxiang25@huawei.com>
 M:     Chao Yu <yuchao0@huawei.com>
 L:     linux-erofs@lists.ozlabs.org
 S:     Maintained
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git
+F:     Documentation/filesystems/erofs.txt
 F:     fs/erofs/
+F:     include/trace/events/erofs.h
 
 ERRSEQ ERROR TRACKING INFRASTRUCTURE
 M:     Jeff Layton <jlayton@kernel.org>
@@ -7863,6 +7866,12 @@ S:       Maintained
 F:     drivers/mfd/lpc_ich.c
 F:     drivers/gpio/gpio-ich.c
 
+ICY I2C DRIVER
+M:     Max Staudt <max@enpas.org>
+L:     linux-i2c@vger.kernel.org
+S:     Maintained
+F:     drivers/i2c/busses/i2c-icy.c
+
 IDE SUBSYSTEM
 M:     "David S. Miller" <davem@davemloft.net>
 L:     linux-ide@vger.kernel.org
@@ -7993,7 +8002,7 @@ S:        Maintained
 F:     drivers/usb/atm/ueagle-atm.c
 
 IMGTEC ASCII LCD DRIVER
-M:     Paul Burton <paul.burton@mips.com>
+M:     Paul Burton <paulburton@kernel.org>
 S:     Maintained
 F:     Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt
 F:     drivers/auxdisplay/img-ascii-lcd.c
@@ -9048,10 +9057,11 @@ S:      Supported
 F:     Documentation/security/keys/trusted-encrypted.rst
 F:     include/keys/trusted-type.h
 F:     security/keys/trusted.c
-F:     security/keys/trusted.h
+F:     include/keys/trusted.h
 
 KEYS/KEYRINGS:
 M:     David Howells <dhowells@redhat.com>
+M:     Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
 L:     keyrings@vger.kernel.org
 S:     Maintained
 F:     Documentation/security/keys/core.rst
@@ -9065,6 +9075,7 @@ F:        security/keys/
 KGDB / KDB /debug_core
 M:     Jason Wessel <jason.wessel@windriver.com>
 M:     Daniel Thompson <daniel.thompson@linaro.org>
+R:     Douglas Anderson <dianders@chromium.org>
 W:     http://kgdb.wiki.kernel.org/
 L:     kgdb-bugreport@lists.sourceforge.net
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jwessel/kgdb.git
@@ -9112,7 +9123,7 @@ F:        drivers/auxdisplay/ks0108.c
 F:     include/linux/ks0108.h
 
 L3MDEV
-M:     David Ahern <dsa@cumulusnetworks.com>
+M:     David Ahern <dsahern@kernel.org>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     net/l3mdev
@@ -9173,6 +9184,7 @@ M:        Pavel Machek <pavel@ucw.cz>
 R:     Dan Murphy <dmurphy@ti.com>
 L:     linux-leds@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git
 S:     Maintained
 F:     Documentation/devicetree/bindings/leds/
 F:     drivers/leds/
@@ -10251,7 +10263,7 @@ MEDIATEK ETHERNET DRIVER
 M:     Felix Fietkau <nbd@openwrt.org>
 M:     John Crispin <john@phrozen.org>
 M:     Sean Wang <sean.wang@mediatek.com>
-M:     Nelson Chang <nelson.chang@mediatek.com>
+M:     Mark Lee <Mark-MC.Lee@mediatek.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     drivers/net/ethernet/mediatek/
@@ -10514,8 +10526,12 @@ F:     mm/memblock.c
 F:     Documentation/core-api/boot-time-mm.rst
 
 MEMORY MANAGEMENT
+M:     Andrew Morton <akpm@linux-foundation.org>
 L:     linux-mm@kvack.org
 W:     http://www.linux-mm.org
+T:     quilt https://ozlabs.org/~akpm/mmotm/
+T:     quilt https://ozlabs.org/~akpm/mmots/
+T:     git git://github.com/hnaz/linux-mm.git
 S:     Maintained
 F:     include/linux/mm.h
 F:     include/linux/gfp.h
@@ -10824,7 +10840,7 @@ F:      drivers/usb/image/microtek.*
 
 MIPS
 M:     Ralf Baechle <ralf@linux-mips.org>
-M:     Paul Burton <paul.burton@mips.com>
+M:     Paul Burton <paulburton@kernel.org>
 M:     James Hogan <jhogan@kernel.org>
 L:     linux-mips@vger.kernel.org
 W:     http://www.linux-mips.org/
@@ -10838,7 +10854,7 @@ F:      arch/mips/
 F:     drivers/platform/mips/
 
 MIPS BOSTON DEVELOPMENT BOARD
-M:     Paul Burton <paul.burton@mips.com>
+M:     Paul Burton <paulburton@kernel.org>
 L:     linux-mips@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/clock/img,boston-clock.txt
@@ -10848,7 +10864,7 @@ F:      drivers/clk/imgtec/clk-boston.c
 F:     include/dt-bindings/clock/boston-clock.h
 
 MIPS GENERIC PLATFORM
-M:     Paul Burton <paul.burton@mips.com>
+M:     Paul Burton <paulburton@kernel.org>
 L:     linux-mips@vger.kernel.org
 S:     Supported
 F:     Documentation/devicetree/bindings/power/mti,mips-cpc.txt
@@ -11403,7 +11419,6 @@ F:      include/trace/events/tcp.h
 NETWORKING [TLS]
 M:     Boris Pismenny <borisp@mellanox.com>
 M:     Aviad Yehezkel <aviadye@mellanox.com>
-M:     Dave Watson <davejwatson@fb.com>
 M:     John Fastabend <john.fastabend@gmail.com>
 M:     Daniel Borkmann <daniel@iogearbox.net>
 M:     Jakub Kicinski <jakub.kicinski@netronome.com>
@@ -11540,6 +11555,7 @@ NSDEPS
 M:     Matthias Maennich <maennich@google.com>
 S:     Maintained
 F:     scripts/nsdeps
+F:     Documentation/core-api/symbol-namespaces.rst
 
 NTB AMD DRIVER
 M:     Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
@@ -12307,12 +12323,15 @@ F:    arch/parisc/
 F:     Documentation/parisc/
 F:     drivers/parisc/
 F:     drivers/char/agp/parisc-agp.c
+F:     drivers/input/misc/hp_sdc_rtc.c
 F:     drivers/input/serio/gscps2.c
+F:     drivers/input/serio/hp_sdc*
 F:     drivers/parport/parport_gsc.*
 F:     drivers/tty/serial/8250/8250_gsc.c
 F:     drivers/video/fbdev/sti*
 F:     drivers/video/console/sti*
 F:     drivers/video/logo/logo_parisc*
+F:     include/linux/hp_sdc.h
 
 PARMAN
 M:     Jiri Pirko <jiri@mellanox.com>
@@ -12587,16 +12606,18 @@ F:    arch/x86/kernel/early-quirks.c
 
 PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS
 M:     Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+R:     Andrew Murray <andrew.murray@arm.com>
 L:     linux-pci@vger.kernel.org
 Q:     http://patchwork.ozlabs.org/project/linux-pci/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git/
 S:     Supported
 F:     drivers/pci/controller/
 
-PCIE DRIVER FOR ANNAPURNA LABS
+PCIE DRIVER FOR AMAZON ANNAPURNA LABS
 M:     Jonathan Chocron <jonnyc@amazon.com>
 L:     linux-pci@vger.kernel.org
 S:     Maintained
+F:     Documentation/devicetree/bindings/pci/pcie-al.txt
 F:     drivers/pci/controller/dwc/pcie-al.c
 
 PCIE DRIVER FOR AMLOGIC MESON
@@ -13242,9 +13263,11 @@ F:     drivers/media/rc/pwm-ir-tx.c
 
 PWM SUBSYSTEM
 M:     Thierry Reding <thierry.reding@gmail.com>
+R:     Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
 L:     linux-pwm@vger.kernel.org
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/thierry.reding/linux-pwm.git
+Q:     https://patchwork.ozlabs.org/project/linux-pwm/list/
 F:     Documentation/driver-api/pwm.rst
 F:     Documentation/devicetree/bindings/pwm/
 F:     include/linux/pwm.h
@@ -13253,6 +13276,7 @@ F:      drivers/video/backlight/pwm_bl.c
 F:     include/linux/pwm_backlight.h
 F:     drivers/gpio/gpio-mvebu.c
 F:     Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
+K:     pwm_(config|apply_state|ops)
 
 PXA GPIO DRIVER
 M:     Robert Jarzmik <robert.jarzmik@free.fr>
@@ -13351,7 +13375,7 @@ S:      Maintained
 F:     drivers/scsi/qla1280.[ch]
 
 QLOGIC QLA2XXX FC-SCSI DRIVER
-M:     qla2xxx-upstream@qlogic.com
+M:     hmadhani@marvell.com
 L:     linux-scsi@vger.kernel.org
 S:     Supported
 F:     Documentation/scsi/LICENSE.qla2xxx
@@ -13794,7 +13818,7 @@ F:      drivers/clk/renesas/
 RENESAS EMEV2 I2C DRIVER
 M:     Wolfram Sang <wsa+renesas@sang-engineering.com>
 S:     Supported
-F:     Documentation/devicetree/bindings/i2c/i2c-emev2.txt
+F:     Documentation/devicetree/bindings/i2c/renesas,iic-emev2.txt
 F:     drivers/i2c/busses/i2c-emev2.c
 
 RENESAS ETHERNET DRIVERS
@@ -13816,15 +13840,15 @@ F:    drivers/iio/adc/rcar-gyroadc.c
 RENESAS R-CAR I2C DRIVERS
 M:     Wolfram Sang <wsa+renesas@sang-engineering.com>
 S:     Supported
-F:     Documentation/devicetree/bindings/i2c/i2c-rcar.txt
-F:     Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt
+F:     Documentation/devicetree/bindings/i2c/renesas,i2c.txt
+F:     Documentation/devicetree/bindings/i2c/renesas,iic.txt
 F:     drivers/i2c/busses/i2c-rcar.c
 F:     drivers/i2c/busses/i2c-sh_mobile.c
 
 RENESAS RIIC DRIVER
 M:     Chris Brandt <chris.brandt@renesas.com>
 S:     Supported
-F:     Documentation/devicetree/bindings/i2c/i2c-riic.txt
+F:     Documentation/devicetree/bindings/i2c/renesas,riic.txt
 F:     drivers/i2c/busses/i2c-riic.c
 
 RENESAS USB PHY DRIVER
@@ -13892,7 +13916,7 @@ F:      drivers/mtd/nand/raw/r852.h
 
 RISC-V ARCHITECTURE
 M:     Paul Walmsley <paul.walmsley@sifive.com>
-M:     Palmer Dabbelt <palmer@sifive.com>
+M:     Palmer Dabbelt <palmer@dabbelt.com>
 M:     Albert Ou <aou@eecs.berkeley.edu>
 L:     linux-riscv@lists.infradead.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git
@@ -14769,7 +14793,7 @@ F:      drivers/media/usb/siano/
 F:     drivers/media/mmc/siano/
 
 SIFIVE DRIVERS
-M:     Palmer Dabbelt <palmer@sifive.com>
+M:     Palmer Dabbelt <palmer@dabbelt.com>
 M:     Paul Walmsley <paul.walmsley@sifive.com>
 L:     linux-riscv@lists.infradead.org
 T:     git git://github.com/sifive/riscv-linux.git
@@ -14779,7 +14803,7 @@ N:      sifive
 
 SIFIVE FU540 SYSTEM-ON-CHIP
 M:     Paul Walmsley <paul.walmsley@sifive.com>
-M:     Palmer Dabbelt <palmer@sifive.com>
+M:     Palmer Dabbelt <palmer@dabbelt.com>
 L:     linux-riscv@lists.infradead.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/pjw/sifive.git
 S:     Supported
@@ -16068,6 +16092,7 @@ THERMAL
 M:     Zhang Rui <rui.zhang@intel.com>
 M:     Eduardo Valentin <edubezval@gmail.com>
 R:     Daniel Lezcano <daniel.lezcano@linaro.org>
+R:     Amit Kucheria <amit.kucheria@verdurent.com>
 L:     linux-pm@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal.git
@@ -16753,13 +16778,6 @@ W:     http://www.linux-usb.org/usbnet
 S:     Maintained
 F:     drivers/net/usb/dm9601.c
 
-USB DIAMOND RIO500 DRIVER
-M:     Cesar Miquel <miquel@df.uba.ar>
-L:     rio500-users@lists.sourceforge.net
-W:     http://rio500.sourceforge.net
-S:     Maintained
-F:     drivers/usb/misc/rio500*
-
 USB EHCI DRIVER
 M:     Alan Stern <stern@rowland.harvard.edu>
 L:     linux-usb@vger.kernel.org
@@ -17272,6 +17290,18 @@ S:     Supported
 F:     drivers/s390/virtio/
 F:     arch/s390/include/uapi/asm/virtio-ccw.h
 
+VIRTIO FILE SYSTEM
+M:     Vivek Goyal <vgoyal@redhat.com>
+M:     Stefan Hajnoczi <stefanha@redhat.com>
+M:     Miklos Szeredi <miklos@szeredi.hu>
+L:     virtualization@lists.linux-foundation.org
+L:     linux-fsdevel@vger.kernel.org
+W:     https://virtio-fs.gitlab.io/
+S:     Supported
+F:     fs/fuse/virtio_fs.c
+F:     include/uapi/linux/virtio_fs.h
+F:     Documentation/filesystems/virtiofs.rst
+
 VIRTIO GPU DRIVER
 M:     David Airlie <airlied@linux.ie>
 M:     Gerd Hoffmann <kraxel@redhat.com>
@@ -17414,7 +17444,7 @@ F:      include/linux/regulator/
 K:     regulator_get_optional
 
 VRF
-M:     David Ahern <dsa@cumulusnetworks.com>
+M:     David Ahern <dsahern@kernel.org>
 M:     Shrijeet Mukherjee <shrijeet@gmail.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
@@ -18015,6 +18045,7 @@ F:      Documentation/vm/zsmalloc.rst
 ZSWAP COMPRESSED SWAP CACHING
 M:     Seth Jennings <sjenning@redhat.com>
 M:     Dan Streetman <ddstreet@ieee.org>
+M:     Vitaly Wool <vitaly.wool@konsulko.com>
 L:     linux-mm@kvack.org
 S:     Maintained
 F:     mm/zswap.c
index d456746..b37d0e8 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 5
-PATCHLEVEL = 3
+PATCHLEVEL = 4
 SUBLEVEL = 0
-EXTRAVERSION =
-NAME = Bobtail Squid
+EXTRAVERSION = -rc6
+NAME = Kleptomaniac Octopus
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
@@ -206,24 +206,8 @@ ifndef KBUILD_CHECKSRC
   KBUILD_CHECKSRC = 0
 endif
 
-# Use make M=dir to specify directory of external module to build
-# Old syntax make ... SUBDIRS=$PWD is still supported
-# Setting the environment variable KBUILD_EXTMOD take precedence
-ifdef SUBDIRS
-  $(warning ================= WARNING ================)
-  $(warning 'SUBDIRS' will be removed after Linux 5.3)
-  $(warning )
-  $(warning If you are building an individual subdirectory)
-  $(warning in the kernel tree, you can do like this:)
-  $(warning $$ make path/to/dir/you/want/to/build/)
-  $(warning (Do not forget the trailing slash))
-  $(warning )
-  $(warning If you are building an external module,)
-  $(warning Please use 'M=' or 'KBUILD_EXTMOD' instead)
-  $(warning ==========================================)
-  KBUILD_EXTMOD ?= $(SUBDIRS)
-endif
-
+# Use make M=dir or set the environment variable KBUILD_EXTMOD to specify the
+# directory of external module to build. Setting M= takes precedence.
 ifeq ("$(origin M)", "command line")
   KBUILD_EXTMOD := $(M)
 endif
@@ -498,7 +482,6 @@ export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE CFLAGS_UBSAN
 export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
 export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
 export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
-export KBUILD_ARFLAGS
 
 # Files to ignore in find ... statements
 
@@ -616,7 +599,7 @@ endif
 # in addition to whatever we do anyway.
 # Just "make" or "make all" shall build modules as well
 
-ifneq ($(filter all _all modules,$(MAKECMDGOALS)),)
+ifneq ($(filter all _all modules nsdeps,$(MAKECMDGOALS)),)
   KBUILD_MODULES := 1
 endif
 
@@ -914,9 +897,6 @@ ifdef CONFIG_RETPOLINE
 KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
 endif
 
-# use the deterministic mode of AR if available
-KBUILD_ARFLAGS := $(call ar-option,D)
-
 include scripts/Makefile.kasan
 include scripts/Makefile.extrawarn
 include scripts/Makefile.ubsan
@@ -1057,7 +1037,7 @@ export KBUILD_VMLINUX_OBJS := $(head-y) $(init-y) $(core-y) $(libs-y2) \
 export KBUILD_VMLINUX_LIBS := $(libs-y1)
 export KBUILD_LDS          := arch/$(SRCARCH)/kernel/vmlinux.lds
 export LDFLAGS_vmlinux
-# used by scripts/package/Makefile
+# used by scripts/Makefile.package
 export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) LICENSES arch include scripts tools)
 
 vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_OBJS) $(KBUILD_VMLINUX_LIBS)
@@ -1237,9 +1217,8 @@ PHONY += kselftest
 kselftest:
        $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests run_tests
 
-PHONY += kselftest-clean
-kselftest-clean:
-       $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests clean
+kselftest-%: FORCE
+       $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests $*
 
 PHONY += kselftest-merge
 kselftest-merge:
index 0fcf8ec..5f8a5d8 100644 (file)
@@ -706,6 +706,17 @@ config HAVE_ARCH_COMPAT_MMAP_BASES
          and vice-versa 32-bit applications to call 64-bit mmap().
          Required for applications doing different bitness syscalls.
 
+# This allows to use a set of generic functions to determine mmap base
+# address by giving priority to top-down scheme only if the process
+# is not in legacy mode (compat task, unlimited stack size or
+# sysctl_legacy_va_layout).
+# Architecture that selects this option can provide its own version of:
+# - STACK_RND_MASK
+config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+       bool
+       depends on MMU
+       select ARCH_HAS_ELF_RANDOMIZE
+
 config HAVE_COPY_THREAD_TLS
        bool
        help
index 71ded3b..eb91f1e 100644 (file)
@@ -53,6 +53,4 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
        free_page((unsigned long)pmd);
 }
 
-#define check_pgt_cache()      do { } while (0)
-
 #endif /* _ALPHA_PGALLOC_H */
index 89c2032..065b57f 100644 (file)
@@ -359,11 +359,6 @@ extern void paging_init(void);
 
 #include <asm-generic/pgtable.h>
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 /* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT.  */
 #define HAVE_ARCH_UNMAPPED_AREA
 
index ac23379..a18ec7f 100644 (file)
@@ -68,6 +68,9 @@
 #define MADV_WIPEONFORK 18             /* Zero memory on fork, child only */
 #define MADV_KEEPONFORK 19             /* Undo MADV_WIPEONFORK */
 
+#define MADV_COLD      20              /* deactivate these pages */
+#define MADV_PAGEOUT   21              /* reclaim these pages */
+
 /* compatibility flags */
 #define MAP_FILE       0
 
index bfc7f5f..9acbeba 100644 (file)
                clock-frequency = <33333333>;
        };
 
+       reg_5v0: regulator-5v0 {
+               compatible = "regulator-fixed";
+
+               regulator-name = "5v0-supply";
+               regulator-min-microvolt = <5000000>;
+               regulator-max-microvolt = <5000000>;
+       };
+
        cpu_intc: cpu-interrupt-controller {
                compatible = "snps,archs-intc";
                interrupt-controller;
                        clocks = <&input_clk>;
                        cs-gpios = <&creg_gpio 0 GPIO_ACTIVE_LOW>,
                                   <&creg_gpio 1 GPIO_ACTIVE_LOW>;
+
+                       spi-flash@0 {
+                               compatible = "sst26wf016b", "jedec,spi-nor";
+                               reg = <0>;
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+                               spi-max-frequency = <4000000>;
+                       };
+
+                       adc@1 {
+                               compatible = "ti,adc108s102";
+                               reg = <1>;
+                               vref-supply = <&reg_5v0>;
+                               spi-max-frequency = <1000000>;
+                       };
                };
 
                creg_gpio: gpio@14b0 {
index 9b9a744..0974226 100644 (file)
@@ -32,6 +32,8 @@ CONFIG_INET=y
 CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_MTD=y
+CONFIG_MTD_SPI_NOR=y
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_NETDEVICES=y
@@ -55,6 +57,8 @@ CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_DWAPB=y
 CONFIG_GPIO_SNPS_CREG=y
 # CONFIG_HWMON is not set
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_FIXED_VOLTAGE=y
 CONFIG_DRM=y
 # CONFIG_DRM_FBDEV_EMULATION is not set
 CONFIG_DRM_UDL=y
@@ -72,6 +76,8 @@ CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_DW=y
 CONFIG_DMADEVICES=y
 CONFIG_DW_AXI_DMAC=y
+CONFIG_IIO=y
+CONFIG_TI_ADC108S102=y
 CONFIG_EXT3_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
index 9bdb8ed..b747f2e 100644 (file)
@@ -108,7 +108,7 @@ pte_alloc_one(struct mm_struct *mm)
                return 0;
        memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
        page = virt_to_page(pte_pg);
-       if (!pgtable_page_ctor(page)) {
+       if (!pgtable_pte_page_ctor(page)) {
                __free_page(page);
                return 0;
        }
@@ -123,13 +123,12 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
 {
-       pgtable_page_dtor(virt_to_page(ptep));
+       pgtable_pte_page_dtor(virt_to_page(ptep));
        free_pages((unsigned long)ptep, __get_order_pte());
 }
 
 #define __pte_free_tlb(tlb, pte, addr)  pte_free((tlb)->mm, pte)
 
-#define check_pgt_cache()   do { } while (0)
 #define pmd_pgtable(pmd)       ((pgtable_t) pmd_page_vaddr(pmd))
 
 #endif /* _ASM_ARC_PGALLOC_H */
index 1d87c18..7addd03 100644 (file)
@@ -395,11 +395,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 /* to cope with aliasing VIPT cache */
 #define HAVE_ARCH_UNMAPPED_AREA
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 #endif /* __ASSEMBLY__ */
 
 #endif
index 861a8ae..661fd84 100644 (file)
@@ -614,8 +614,8 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
        /* loop thru all available h/w condition indexes */
        for (i = 0; i < cc_bcr.c; i++) {
                write_aux_reg(ARC_REG_CC_INDEX, i);
-               cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0);
-               cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1);
+               cc_name.indiv.word0 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME0));
+               cc_name.indiv.word1 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME1));
 
                arc_pmu_map_hw_event(i, cc_name.str);
                arc_pmu_add_raw_event_attr(i, cc_name.str);
index 229f2cd..8a50efb 100644 (file)
@@ -34,6 +34,7 @@ config ARM
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_USE_BUILTIN_BSWAP
        select ARCH_USE_CMPXCHG_LOCKREF
+       select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
        select ARCH_WANT_IPC_PARSE_VERSION
        select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
        select BUILDTIME_EXTABLE_SORT if MMU
index 18f70b3..204bccf 100644 (file)
        pinctrl-0 = <&mmc0_pins_default>;
 };
 
-&gpio0 {
+&gpio0_target {
        /* Do not idle the GPIO used for holding the VTT regulator */
        ti,no-reset-on-init;
        ti,no-idle-on-init;
index 9915c89..7a9eb2b 100644 (file)
                        ranges = <0x0 0x5000 0x1000>;
                };
 
-               target-module@7000 {                    /* 0x44e07000, ap 14 20.0 */
+               gpio0_target: target-module@7000 {      /* 0x44e07000, ap 14 20.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
                        ti,hwmods = "gpio1";
                        reg = <0x7000 0x4>,
                        reg = <0xe000 0x4>,
                              <0xe054 0x4>;
                        reg-names = "rev", "sysc";
-                       ti,sysc-midle ;
+                       ti,sysc-midle = <SYSC_IDLE_FORCE>,
+                                       <SYSC_IDLE_NO>,
+                                       <SYSC_IDLE_SMART>;
                        ti,sysc-sidle = <SYSC_IDLE_FORCE>,
                                        <SYSC_IDLE_NO>,
                                        <SYSC_IDLE_SMART>;
index ebfe28c..a1fd3e6 100644 (file)
        };
 
        lcd0: display@0 {
-               compatible = "panel-dpi";
+               /* This isn't the exact LCD, but the timings meet spec */
+               /* To make it work, set CONFIG_OMAP2_DSS_MIN_FCK_PER_PCK=4 */
+               compatible = "newhaven,nhd-4.3-480272ef-atxl";
                label = "15";
-               status = "okay";
-               pinctrl-names = "default";
+               backlight = <&bl>;
                enable-gpios = <&gpio6 16 GPIO_ACTIVE_HIGH>;    /* gpio176, lcd INI */
                vcc-supply = <&vdd_io_reg>;
 
                                remote-endpoint = <&dpi_out>;
                        };
                };
-
-               panel-timing {
-                       clock-frequency = <9000000>;
-                       hactive = <480>;
-                       vactive = <272>;
-                       hfront-porch = <3>;
-                       hback-porch = <2>;
-                       hsync-len = <42>;
-                       vback-porch = <3>;
-                       vfront-porch = <4>;
-                       vsync-len = <11>;
-                       hsync-active = <0>;
-                       vsync-active = <0>;
-                       de-active = <1>;
-                       pixelclk-active = <1>;
-               };
        };
 
        bl: backlight {
index 883fb85..1b4b2b0 100644 (file)
                reg = <0x70>;
                #address-cells = <1>;
                #size-cells = <0>;
+               i2c-mux-idle-disconnect;
 
                i2c@0 {
                        /* FMC A */
                        #address-cells = <1>;
                        #size-cells = <0>;
                        reg = <0>;
-                       i2c-mux-idle-disconnect;
                };
 
                i2c@1 {
                        #address-cells = <1>;
                        #size-cells = <0>;
                        reg = <1>;
-                       i2c-mux-idle-disconnect;
                };
 
                i2c@2 {
                        #address-cells = <1>;
                        #size-cells = <0>;
                        reg = <2>;
-                       i2c-mux-idle-disconnect;
                };
 
                i2c@3 {
                        #address-cells = <1>;
                        #size-cells = <0>;
                        reg = <3>;
-                       i2c-mux-idle-disconnect;
                };
 
                i2c@4 {
                        #address-cells = <1>;
                        #size-cells = <0>;
                        reg = <4>;
-                       i2c-mux-idle-disconnect;
                };
 
                i2c@5 {
                        #address-cells = <1>;
                        #size-cells = <0>;
                        reg = <5>;
-                       i2c-mux-idle-disconnect;
 
                        ina230@40 { compatible = "ti,ina230"; reg = <0x40>; shunt-resistor = <5000>; };
                        ina230@41 { compatible = "ti,ina230"; reg = <0x41>; shunt-resistor = <5000>; };
                        #address-cells = <1>;
                        #size-cells = <0>;
                        reg = <6>;
-                       i2c-mux-idle-disconnect;
                };
 
                i2c@7 {
                        #address-cells = <1>;
                        #size-cells = <0>;
                        reg = <7>;
-                       i2c-mux-idle-disconnect;
 
                        u41: pca9575@20 {
                                compatible = "nxp,pca9575";
index 848e2a8..14bbc43 100644 (file)
                                ti,hwmods = "dss_dispc";
                                clocks = <&disp_clk>;
                                clock-names = "fck";
+
+                               max-memory-bandwidth = <230000000>;
                        };
 
                        rfbi: rfbi@4832a800 {
index 09a088f..b75af21 100644 (file)
        #address-cells = <1>;
        #size-cells = <0>;
        pinctrl-0 = <&emmc_gpio34 &gpclk2_gpio43>;
+       bus-width = <4>;
        mmc-pwrseq = <&wifi_pwrseq>;
        non-removable;
        status = "okay";
index 7c3cb7e..925cb37 100644 (file)
@@ -9,6 +9,14 @@
                reg = <0 0x40000000>;
        };
 
+       leds {
+               /*
+                * Since there is no upstream GPIO driver yet,
+                * remove the incomplete node.
+                */
+               /delete-node/ act;
+       };
+
        reg_3v3: fixed-regulator {
                compatible = "regulator-fixed";
                regulator-name = "3V3";
index ea0e7c1..5cac2dd 100644 (file)
                                interrupt-names = "tx", "rx";
                                dmas = <&edma_xbar 129 1>, <&edma_xbar 128 1>;
                                dma-names = "tx", "rx";
-                               clocks = <&ipu_clkctrl DRA7_IPU_MCASP1_CLKCTRL 22>,
+                               clocks = <&ipu_clkctrl DRA7_IPU_MCASP1_CLKCTRL 0>,
                                         <&ipu_clkctrl DRA7_IPU_MCASP1_CLKCTRL 24>,
                                         <&ipu_clkctrl DRA7_IPU_MCASP1_CLKCTRL 28>;
                                clock-names = "fck", "ahclkx", "ahclkr";
                                interrupt-names = "tx", "rx";
                                dmas = <&edma_xbar 131 1>, <&edma_xbar 130 1>;
                                dma-names = "tx", "rx";
-                               clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP2_CLKCTRL 22>,
-                                        <&l4per2_clkctrl DRA7_L4PER2_MCASP2_CLKCTRL 24>,
+                               clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP2_CLKCTRL 0>,
+                                        <&ipu_clkctrl DRA7_IPU_MCASP1_CLKCTRL 24>,
                                         <&l4per2_clkctrl DRA7_L4PER2_MCASP2_CLKCTRL 28>;
                                clock-names = "fck", "ahclkx", "ahclkr";
                                status = "disabled";
                                        <SYSC_IDLE_SMART>;
                        /* Domains (P, C): l4per_pwrdm, l4per2_clkdm */
                        clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 0>,
-                                <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 24>,
-                                <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 28>;
-                       clock-names = "fck", "ahclkx", "ahclkr";
+                                <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 24>;
+                       clock-names = "fck", "ahclkx";
                        #address-cells = <1>;
                        #size-cells = <1>;
                        ranges = <0x0 0x68000 0x2000>,
                                interrupt-names = "tx", "rx";
                                dmas = <&edma_xbar 133 1>, <&edma_xbar 132 1>;
                                dma-names = "tx", "rx";
-                               clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 22>,
+                               clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 0>,
                                         <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 24>;
                                clock-names = "fck", "ahclkx";
                                status = "disabled";
                                        <SYSC_IDLE_SMART>;
                        /* Domains (P, C): l4per_pwrdm, l4per2_clkdm */
                        clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP4_CLKCTRL 0>,
-                                <&l4per2_clkctrl DRA7_L4PER2_MCASP4_CLKCTRL 24>,
-                                <&l4per2_clkctrl DRA7_L4PER2_MCASP4_CLKCTRL 28>;
-                       clock-names = "fck", "ahclkx", "ahclkr";
+                                <&l4per2_clkctrl DRA7_L4PER2_MCASP4_CLKCTRL 24>;
+                       clock-names = "fck", "ahclkx";
                        #address-cells = <1>;
                        #size-cells = <1>;
                        ranges = <0x0 0x6c000 0x2000>,
                                interrupt-names = "tx", "rx";
                                dmas = <&edma_xbar 135 1>, <&edma_xbar 134 1>;
                                dma-names = "tx", "rx";
-                               clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP4_CLKCTRL 22>,
+                               clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP4_CLKCTRL 0>,
                                         <&l4per2_clkctrl DRA7_L4PER2_MCASP4_CLKCTRL 24>;
                                clock-names = "fck", "ahclkx";
                                status = "disabled";
                                        <SYSC_IDLE_SMART>;
                        /* Domains (P, C): l4per_pwrdm, l4per2_clkdm */
                        clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP5_CLKCTRL 0>,
-                                <&l4per2_clkctrl DRA7_L4PER2_MCASP5_CLKCTRL 24>,
-                                <&l4per2_clkctrl DRA7_L4PER2_MCASP5_CLKCTRL 28>;
-                       clock-names = "fck", "ahclkx", "ahclkr";
+                                <&l4per2_clkctrl DRA7_L4PER2_MCASP5_CLKCTRL 24>;
+                       clock-names = "fck", "ahclkx";
                        #address-cells = <1>;
                        #size-cells = <1>;
                        ranges = <0x0 0x70000 0x2000>,
                                interrupt-names = "tx", "rx";
                                dmas = <&edma_xbar 137 1>, <&edma_xbar 136 1>;
                                dma-names = "tx", "rx";
-                               clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP5_CLKCTRL 22>,
+                               clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP5_CLKCTRL 0>,
                                         <&l4per2_clkctrl DRA7_L4PER2_MCASP5_CLKCTRL 24>;
                                clock-names = "fck", "ahclkx";
                                status = "disabled";
                                        <SYSC_IDLE_SMART>;
                        /* Domains (P, C): l4per_pwrdm, l4per2_clkdm */
                        clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP6_CLKCTRL 0>,
-                                <&l4per2_clkctrl DRA7_L4PER2_MCASP6_CLKCTRL 24>,
-                                <&l4per2_clkctrl DRA7_L4PER2_MCASP6_CLKCTRL 28>;
-                       clock-names = "fck", "ahclkx", "ahclkr";
+                                <&l4per2_clkctrl DRA7_L4PER2_MCASP6_CLKCTRL 24>;
+                       clock-names = "fck", "ahclkx";
                        #address-cells = <1>;
                        #size-cells = <1>;
                        ranges = <0x0 0x74000 0x2000>,
                                interrupt-names = "tx", "rx";
                                dmas = <&edma_xbar 139 1>, <&edma_xbar 138 1>;
                                dma-names = "tx", "rx";
-                               clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP6_CLKCTRL 22>,
+                               clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP6_CLKCTRL 0>,
                                         <&l4per2_clkctrl DRA7_L4PER2_MCASP6_CLKCTRL 24>;
                                clock-names = "fck", "ahclkx";
                                status = "disabled";
                                        <SYSC_IDLE_SMART>;
                        /* Domains (P, C): l4per_pwrdm, l4per2_clkdm */
                        clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP7_CLKCTRL 0>,
-                                <&l4per2_clkctrl DRA7_L4PER2_MCASP7_CLKCTRL 24>,
-                                <&l4per2_clkctrl DRA7_L4PER2_MCASP7_CLKCTRL 28>;
-                       clock-names = "fck", "ahclkx", "ahclkr";
+                                <&l4per2_clkctrl DRA7_L4PER2_MCASP7_CLKCTRL 24>;
+                       clock-names = "fck", "ahclkx";
                        #address-cells = <1>;
                        #size-cells = <1>;
                        ranges = <0x0 0x78000 0x2000>,
                                interrupt-names = "tx", "rx";
                                dmas = <&edma_xbar 141 1>, <&edma_xbar 140 1>;
                                dma-names = "tx", "rx";
-                               clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP7_CLKCTRL 22>,
+                               clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP7_CLKCTRL 0>,
                                         <&l4per2_clkctrl DRA7_L4PER2_MCASP7_CLKCTRL 24>;
                                clock-names = "fck", "ahclkx";
                                status = "disabled";
                                        <SYSC_IDLE_SMART>;
                        /* Domains (P, C): l4per_pwrdm, l4per2_clkdm */
                        clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP8_CLKCTRL 0>,
-                                <&l4per2_clkctrl DRA7_L4PER2_MCASP8_CLKCTRL 24>,
-                                <&l4per2_clkctrl DRA7_L4PER2_MCASP8_CLKCTRL 28>;
-                       clock-names = "fck", "ahclkx", "ahclkr";
+                                <&l4per2_clkctrl DRA7_L4PER2_MCASP8_CLKCTRL 24>;
+                       clock-names = "fck", "ahclkx";
                        #address-cells = <1>;
                        #size-cells = <1>;
                        ranges = <0x0 0x7c000 0x2000>,
                                interrupt-names = "tx", "rx";
                                dmas = <&edma_xbar 143 1>, <&edma_xbar 142 1>;
                                dma-names = "tx", "rx";
-                               clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP8_CLKCTRL 22>,
+                               clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP8_CLKCTRL 0>,
                                         <&l4per2_clkctrl DRA7_L4PER2_MCASP8_CLKCTRL 24>;
                                clock-names = "fck", "ahclkx";
                                status = "disabled";
index 7ceae35..547fb14 100644 (file)
        vin-supply = <&sw1c_reg>;
 };
 
+&snvs_poweroff {
+       status = "okay";
+};
+
 &iomuxc {
        pinctrl-names = "default";
        pinctrl-0 = <&pinctrl_hog>;
index 710f850..e2e604d 100644 (file)
                                compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
                                reg = <0x302d0000 0x10000>;
                                interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
-                               clocks = <&clks IMX7D_CLK_DUMMY>,
+                               clocks = <&clks IMX7D_GPT1_ROOT_CLK>,
                                         <&clks IMX7D_GPT1_ROOT_CLK>;
                                clock-names = "ipg", "per";
                        };
                                compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
                                reg = <0x302e0000 0x10000>;
                                interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
-                               clocks = <&clks IMX7D_CLK_DUMMY>,
+                               clocks = <&clks IMX7D_GPT2_ROOT_CLK>,
                                         <&clks IMX7D_GPT2_ROOT_CLK>;
                                clock-names = "ipg", "per";
                                status = "disabled";
                                compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
                                reg = <0x302f0000 0x10000>;
                                interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
-                               clocks = <&clks IMX7D_CLK_DUMMY>,
+                               clocks = <&clks IMX7D_GPT3_ROOT_CLK>,
                                         <&clks IMX7D_GPT3_ROOT_CLK>;
                                clock-names = "ipg", "per";
                                status = "disabled";
                                compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
                                reg = <0x30300000 0x10000>;
                                interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
-                               clocks = <&clks IMX7D_CLK_DUMMY>,
+                               clocks = <&clks IMX7D_GPT4_ROOT_CLK>,
                                         <&clks IMX7D_GPT4_ROOT_CLK>;
                                clock-names = "ipg", "per";
                                status = "disabled";
index 5563ee5..b56524c 100644 (file)
                >;
        };
 
+       i2c2_pins: pinmux_i2c2_pins {
+               pinctrl-single,pins = <
+                       OMAP3_CORE1_IOPAD(0x21be, PIN_INPUT | MUX_MODE0)        /* i2c2_scl */
+                       OMAP3_CORE1_IOPAD(0x21c0, PIN_INPUT | MUX_MODE0)        /* i2c2_sda */
+               >;
+       };
+
+       i2c3_pins: pinmux_i2c3_pins {
+               pinctrl-single,pins = <
+                       OMAP3_CORE1_IOPAD(0x21c2, PIN_INPUT | MUX_MODE0)        /* i2c3_scl */
+                       OMAP3_CORE1_IOPAD(0x21c4, PIN_INPUT | MUX_MODE0)        /* i2c3_sda */
+               >;
+       };
+
        tsc2004_pins: pinmux_tsc2004_pins {
                pinctrl-single,pins = <
                        OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE4)        /* mcbsp4_dr.gpio_153 */
                        OMAP3_WKUP_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4)        /* sys_boot1.gpio_3 */
                >;
        };
-       i2c2_pins: pinmux_i2c2_pins {
-               pinctrl-single,pins = <
-                       OMAP3_CORE1_IOPAD(0x21be, PIN_INPUT | MUX_MODE0)        /* i2c2_scl */
-                       OMAP3_CORE1_IOPAD(0x21c0, PIN_INPUT | MUX_MODE0)        /* i2c2_sda */
-               >;
-       };
-       i2c3_pins: pinmux_i2c3_pins {
-               pinctrl-single,pins = <
-                       OMAP3_CORE1_IOPAD(0x21c2, PIN_INPUT | MUX_MODE0)        /* i2c3_scl */
-                       OMAP3_CORE1_IOPAD(0x21c4, PIN_INPUT | MUX_MODE0)        /* i2c3_sda */
-               >;
-       };
 };
 
 &omap3_pmx_core2 {
index 642e809..449cc76 100644 (file)
 &dss {
        status = "ok";
        vdds_dsi-supply = <&vpll2>;
-       vdda_video-supply = <&video_reg>;
        pinctrl-names = "default";
        pinctrl-0 = <&dss_dpi_pins1>;
        port {
                display0 = &lcd0;
        };
 
-       video_reg: video_reg {
-               pinctrl-names = "default";
-               pinctrl-0 = <&panel_pwr_pins>;
-               compatible = "regulator-fixed";
-               regulator-name = "fixed-supply";
-               regulator-min-microvolt = <3300000>;
-               regulator-max-microvolt = <3300000>;
-               gpio = <&gpio5 27 GPIO_ACTIVE_HIGH>;    /* gpio155, lcd INI */
-       };
-
        lcd0: display {
-               compatible = "panel-dpi";
+               /* This isn't the exact LCD, but the timings meet spec */
+               /* To make it work, set CONFIG_OMAP2_DSS_MIN_FCK_PER_PCK=4 */
+               compatible = "newhaven,nhd-4.3-480272ef-atxl";
                label = "15";
-               status = "okay";
-               /* default-on; */
                pinctrl-names = "default";
-
+               pinctrl-0 = <&panel_pwr_pins>;
+               backlight = <&bl>;
+               enable-gpios = <&gpio5 27 GPIO_ACTIVE_HIGH>;
                port {
                        lcd_in: endpoint {
                                remote-endpoint = <&dpi_out>;
                        };
                };
-
-               panel-timing {
-                       clock-frequency = <9000000>;
-                       hactive = <480>;
-                       vactive = <272>;
-                       hfront-porch = <3>;
-                       hback-porch = <2>;
-                       hsync-len = <42>;
-                       vback-porch = <3>;
-                       vfront-porch = <4>;
-                       vsync-len = <11>;
-                       hsync-active = <0>;
-                       vsync-active = <0>;
-                       de-active = <1>;
-                       pixelclk-active = <1>;
-               };
        };
 
        bl: backlight {
index 3fdd0a7..506b118 100644 (file)
 &twl_gpio {
        ti,use-leds;
 };
+
+&twl_keypad {
+       status = "disabled";
+};
index 464df42..2f6977a 100644 (file)
                        #address-cells = <3>;
                        #size-cells = <2>;
                        device_type = "pci";
-                       num-lanes = <4>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000   /* downstream I/O */
                        #address-cells = <3>;
                        #size-cells = <2>;
                        device_type = "pci";
-                       num-lanes = <4>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x48 0x00010000 0x0 0x00010000   /* downstream I/O */
index 3621b7d..9980c10 100644 (file)
        pinctrl-1 = <&ephy_leds_pins>;
        status = "okay";
 
+       gmac0: mac@0 {
+               compatible = "mediatek,eth-mac";
+               reg = <0>;
+               phy-mode = "2500base-x";
+               fixed-link {
+                       speed = <2500>;
+                       full-duplex;
+                       pause;
+               };
+       };
+
        gmac1: mac@1 {
                compatible = "mediatek,eth-mac";
                reg = <1>;
+               phy-mode = "gmii";
                phy-handle = <&phy0>;
        };
 
@@ -78,7 +90,6 @@
 
                phy0: ethernet-phy@0 {
                        reg = <0>;
-                       phy-mode = "gmii";
                };
        };
 };
index 9608bc2..867b881 100644 (file)
                        compatible = "mediatek,mt7629-sgmiisys", "syscon";
                        reg = <0x1b128000 0x3000>;
                        #clock-cells = <1>;
-                       mediatek,physpeed = "2500";
                };
 
                sgmiisys1: syscon@1b130000 {
                        compatible = "mediatek,mt7629-sgmiisys", "syscon";
                        reg = <0x1b130000 0x3000>;
                        #clock-cells = <1>;
-                       mediatek,physpeed = "2500";
                };
        };
 };
index d01fc87..b6ef1a7 100644 (file)
                        spi-max-frequency = <100000>;
                        spi-cpol;
                        spi-cpha;
+                       spi-cs-high;
 
                        backlight= <&backlight>;
                        label = "lcd";
index 4454449..a40fe8d 100644 (file)
                compatible = "ti,wl1285", "ti,wl1283";
                reg = <2>;
                /* gpio_100 with gpmc_wait2 pad as wakeirq */
-               interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>,
+               interrupts-extended = <&gpio4 4 IRQ_TYPE_LEVEL_HIGH>,
                                      <&omap4_pmx_core 0x4e>;
                interrupt-names = "irq", "wakeup";
                ref-clock-frequency = <26000000>;
index 14be2ec..55ea8b6 100644 (file)
                compatible = "ti,wl1271";
                reg = <2>;
                /* gpio_53 with gpmc_ncs3 pad as wakeup */
-               interrupts-extended = <&gpio2 21 IRQ_TYPE_EDGE_RISING>,
+               interrupts-extended = <&gpio2 21 IRQ_TYPE_LEVEL_HIGH>,
                                      <&omap4_pmx_core 0x3a>;
                interrupt-names = "irq", "wakeup";
                ref-clock-frequency = <38400000>;
index 3c27496..91480ac 100644 (file)
                compatible = "ti,wl1281";
                reg = <2>;
                interrupt-parent = <&gpio1>;
-               interrupts = <21 IRQ_TYPE_EDGE_RISING>; /* gpio 53 */
+               interrupts = <21 IRQ_TYPE_LEVEL_HIGH>; /* gpio 53 */
                ref-clock-frequency = <26000000>;
                tcxo-clock-frequency = <26000000>;
        };
index 6dbbc9b..d003221 100644 (file)
@@ -69,7 +69,7 @@
                compatible = "ti,wl1271";
                reg = <2>;
                interrupt-parent = <&gpio2>;
-               interrupts = <9 IRQ_TYPE_EDGE_RISING>; /* gpio 41 */
+               interrupts = <9 IRQ_TYPE_LEVEL_HIGH>; /* gpio 41 */
                ref-clock-frequency = <38400000>;
        };
 };
index 7fff555..68ac046 100644 (file)
                pinctrl-names = "default";
                pinctrl-0 = <&wlcore_irq_pin>;
                interrupt-parent = <&gpio1>;
-               interrupts = <14 IRQ_TYPE_EDGE_RISING>; /* gpio 14 */
+               interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;  /* gpio 14 */
                ref-clock-frequency = <26000000>;
        };
 };
index fac2e57..4791834 100644 (file)
                };
        };
 
-       gpu_cm: clock-controller@1500 {
+       gpu_cm: gpu_cm@1500 {
                compatible = "ti,omap4-cm";
                reg = <0x1500 0x100>;
                #address-cells = <1>;
index a53657b..bda454d 100644 (file)
@@ -8,6 +8,7 @@
 #include <dt-bindings/mfd/dbx500-prcmu.h>
 #include <dt-bindings/arm/ux500_pm_domains.h>
 #include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/thermal/thermal.h>
 
 / {
        #address-cells = <1>;
                 * cooling.
                 */
                cpu_thermal: cpu-thermal {
-                       polling-delay-passive = <0>;
-                       polling-delay = <1000>;
+                       polling-delay-passive = <250>;
+                       /*
+                        * This sensor fires interrupts to update the thermal
+                        * zone, so no polling is needed.
+                        */
+                       polling-delay = <0>;
 
                        thermal-sensors = <&thermal>;
 
@@ -79,7 +84,7 @@
 
                        cooling-maps {
                                trip = <&cpu_alert>;
-                               cooling-device = <&CPU0 0 2>;
+                               cooling-device = <&CPU0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
                                contribution = <100>;
                        };
                };
index e4a0d51..0a3a7d6 100644 (file)
                                                 <STM32_PINMUX('F', 6, AF9)>; /* QSPI_BK1_IO3 */
                                        bias-disable;
                                        drive-push-pull;
-                                       slew-rate = <3>;
+                                       slew-rate = <1>;
                                };
                                pins2 {
                                        pinmux = <STM32_PINMUX('B', 6, AF10)>; /* QSPI_BK1_NCS */
                                        bias-pull-up;
                                        drive-push-pull;
-                                       slew-rate = <3>;
+                                       slew-rate = <1>;
                                };
                        };
 
                                                 <STM32_PINMUX('G', 7, AF11)>; /* QSPI_BK2_IO3 */
                                        bias-disable;
                                        drive-push-pull;
-                                       slew-rate = <3>;
+                                       slew-rate = <1>;
                                };
                                pins2 {
                                        pinmux = <STM32_PINMUX('C', 0, AF10)>; /* QSPI_BK2_NCS */
                                        bias-pull-up;
                                        drive-push-pull;
-                                       slew-rate = <3>;
+                                       slew-rate = <1>;
                                };
                        };
 
index ce823c4..4c268b7 100644 (file)
                        interrupts = <39>;
                        clocks = <&ccu CLK_AHB_EHCI0>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        interrupts = <64>;
                        clocks = <&ccu CLK_USB_OHCI0>, <&ccu CLK_AHB_OHCI0>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        interrupts = <40>;
                        clocks = <&ccu CLK_AHB_EHCI1>;
                        phys = <&usbphy 2>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        interrupts = <65>;
                        clocks = <&ccu CLK_USB_OHCI1>, <&ccu CLK_AHB_OHCI1>;
                        phys = <&usbphy 2>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
index cfb1efc..6befa23 100644 (file)
                        interrupts = <39>;
                        clocks = <&ccu CLK_AHB_EHCI>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        interrupts = <40>;
                        clocks = <&ccu CLK_USB_OHCI>, <&ccu CLK_AHB_OHCI>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
index bbeb743..ac76380 100644 (file)
                        clocks = <&ccu CLK_AHB1_EHCI0>;
                        resets = <&ccu RST_AHB1_EHCI0>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        clocks = <&ccu CLK_AHB1_OHCI0>, <&ccu CLK_USB_OHCI0>;
                        resets = <&ccu RST_AHB1_OHCI0>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        clocks = <&ccu CLK_AHB1_EHCI1>;
                        resets = <&ccu RST_AHB1_EHCI1>;
                        phys = <&usbphy 2>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        clocks = <&ccu CLK_AHB1_OHCI1>, <&ccu CLK_USB_OHCI1>;
                        resets = <&ccu RST_AHB1_OHCI1>;
                        phys = <&usbphy 2>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
index 49380de..8aebefd 100644 (file)
                        compatible = "allwinner,sun7i-a20-csi0";
                        reg = <0x01c09000 0x1000>;
                        interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
-                       clocks = <&ccu CLK_AHB_CSI0>, <&ccu CLK_CSI0>,
-                                <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI0>;
-                       clock-names = "bus", "mod", "isp", "ram";
+                       clocks = <&ccu CLK_AHB_CSI0>, <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI0>;
+                       clock-names = "bus", "isp", "ram";
                        resets = <&ccu RST_CSI0>;
                        status = "disabled";
                };
                        interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&ccu CLK_AHB_EHCI0>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&ccu CLK_USB_OHCI0>, <&ccu CLK_AHB_OHCI0>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&ccu CLK_AHB_EHCI1>;
                        phys = <&usbphy 2>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&ccu CLK_USB_OHCI1>, <&ccu CLK_AHB_OHCI1>;
                        phys = <&usbphy 2>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
index 52eed0a..f292f96 100644 (file)
                        clocks = <&ccu CLK_BUS_EHCI>;
                        resets = <&ccu RST_BUS_EHCI>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        clocks = <&ccu CLK_BUS_OHCI>, <&ccu CLK_USB_OHCI>;
                        resets = <&ccu RST_BUS_OHCI>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
index 523be66..74bb053 100644 (file)
                        clocks = <&ccu CLK_BUS_EHCI0>;
                        resets = <&ccu RST_BUS_EHCI0>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        clocks = <&ccu CLK_BUS_OHCI0>, <&ccu CLK_USB_OHCI0>;
                        resets = <&ccu RST_BUS_OHCI0>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        clocks = <&ccu CLK_BUS_EHCI1>;
                        resets = <&ccu RST_BUS_EHCI1>;
                        phys = <&usbphy 2>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
index bde0681..c9c2688 100644 (file)
                        clocks = <&ccu CLK_BUS_EHCI1>;
                        resets = <&ccu RST_BUS_EHCI1>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                                 <&ccu CLK_USB_OHCI1>;
                        resets = <&ccu RST_BUS_OHCI1>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        clocks = <&ccu CLK_BUS_EHCI2>;
                        resets = <&ccu RST_BUS_EHCI2>;
                        phys = <&usbphy 2>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                                 <&ccu CLK_USB_OHCI2>;
                        resets = <&ccu RST_BUS_OHCI2>;
                        phys = <&usbphy 2>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
index c34d505..b9b6fb0 100644 (file)
                        clocks = <&usb_clocks CLK_BUS_HCI0>;
                        resets = <&usb_clocks RST_USB0_HCI>;
                        phys = <&usbphy1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                                 <&usb_clocks CLK_USB_OHCI0>;
                        resets = <&usb_clocks RST_USB0_HCI>;
                        phys = <&usbphy1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        clocks = <&usb_clocks CLK_BUS_HCI1>;
                        resets = <&usb_clocks RST_USB1_HCI>;
                        phys = <&usbphy2>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        clocks = <&usb_clocks CLK_BUS_HCI2>;
                        resets = <&usb_clocks RST_USB2_HCI>;
                        phys = <&usbphy3>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                                 <&usb_clocks CLK_USB_OHCI2>;
                        resets = <&usb_clocks RST_USB2_HCI>;
                        phys = <&usbphy3>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
index eba190b..107eeaf 100644 (file)
                        clocks = <&ccu CLK_BUS_EHCI1>, <&ccu CLK_BUS_OHCI1>;
                        resets = <&ccu RST_BUS_EHCI1>, <&ccu RST_BUS_OHCI1>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                                 <&ccu CLK_USB_OHCI1>;
                        resets = <&ccu RST_BUS_EHCI1>, <&ccu RST_BUS_OHCI1>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        clocks = <&ccu CLK_BUS_EHCI2>, <&ccu CLK_BUS_OHCI2>;
                        resets = <&ccu RST_BUS_EHCI2>, <&ccu RST_BUS_OHCI2>;
                        phys = <&usbphy 2>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                                 <&ccu CLK_USB_OHCI2>;
                        resets = <&ccu RST_BUS_EHCI2>, <&ccu RST_BUS_OHCI2>;
                        phys = <&usbphy 2>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                        clocks = <&ccu CLK_BUS_EHCI3>, <&ccu CLK_BUS_OHCI3>;
                        resets = <&ccu RST_BUS_EHCI3>, <&ccu RST_BUS_OHCI3>;
                        phys = <&usbphy 3>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                                 <&ccu CLK_USB_OHCI3>;
                        resets = <&ccu RST_BUS_EHCI3>, <&ccu RST_BUS_OHCI3>;
                        phys = <&usbphy 3>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
index dc8a5f3..c8ebb23 100644 (file)
                #address-cells = <1>;
                #size-cells = <0>;
                reg = <0x70>;
+               i2c-mux-idle-disconnect;
 
                sff0_i2c: i2c@1 {
                        #address-cells = <1>;
                reg = <0x71>;
                #address-cells = <1>;
                #size-cells = <0>;
+               i2c-mux-idle-disconnect;
 
                sff5_i2c: i2c@1 {
                        #address-cells = <1>;
index 5ae5b52..ef484c4 100644 (file)
@@ -91,7 +91,6 @@ CONFIG_USB_SERIAL_PL2303=m
 CONFIG_USB_SERIAL_CYBERJACK=m
 CONFIG_USB_SERIAL_XIRCOM=m
 CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_RIO500=m
 CONFIG_EXT2_FS=m
 CONFIG_EXT3_FS=m
 CONFIG_MSDOS_FS=y
index e4f6442..4fec2ec 100644 (file)
@@ -195,7 +195,6 @@ CONFIG_USB_SERIAL_XIRCOM=m
 CONFIG_USB_SERIAL_OMNINET=m
 CONFIG_USB_EMI62=m
 CONFIG_USB_EMI26=m
-CONFIG_USB_RIO500=m
 CONFIG_USB_LEGOTOWER=m
 CONFIG_USB_LCD=m
 CONFIG_USB_CYTHERM=m
index b34970c..231f897 100644 (file)
@@ -167,6 +167,7 @@ CONFIG_FB=y
 CONFIG_FIRMWARE_EDID=y
 CONFIG_FB_DA8XX=y
 CONFIG_BACKLIGHT_PWM=m
+CONFIG_BACKLIGHT_GPIO=m
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
@@ -228,7 +229,7 @@ CONFIG_RTC_DRV_OMAP=m
 CONFIG_DMADEVICES=y
 CONFIG_TI_EDMA=y
 CONFIG_COMMON_CLK_PWM=m
-CONFIG_REMOTEPROC=m
+CONFIG_REMOTEPROC=y
 CONFIG_DA8XX_REMOTEPROC=m
 CONFIG_MEMORY=y
 CONFIG_TI_AEMIF=m
index 9bfffbe..0f7381e 100644 (file)
@@ -276,6 +276,7 @@ CONFIG_VIDEO_OV5640=m
 CONFIG_VIDEO_OV5645=m
 CONFIG_IMX_IPUV3_CORE=y
 CONFIG_DRM=y
+CONFIG_DRM_MSM=y
 CONFIG_DRM_PANEL_LVDS=y
 CONFIG_DRM_PANEL_SIMPLE=y
 CONFIG_DRM_PANEL_SEIKO_43WVF1G=y
index 13ba532..e4c8def 100644 (file)
@@ -415,7 +415,7 @@ CONFIG_SPI_SH_MSIOF=m
 CONFIG_SPI_SH_HSPI=y
 CONFIG_SPI_SIRF=y
 CONFIG_SPI_STM32=m
-CONFIG_SPI_STM32_QSPI=m
+CONFIG_SPI_STM32_QSPI=y
 CONFIG_SPI_SUN4I=y
 CONFIG_SPI_SUN6I=y
 CONFIG_SPI_TEGRA114=y
@@ -933,7 +933,7 @@ CONFIG_BCM2835_MBOX=y
 CONFIG_ROCKCHIP_IOMMU=y
 CONFIG_TEGRA_IOMMU_GART=y
 CONFIG_TEGRA_IOMMU_SMMU=y
-CONFIG_REMOTEPROC=m
+CONFIG_REMOTEPROC=y
 CONFIG_ST_REMOTEPROC=m
 CONFIG_RPMSG_VIRTIO=m
 CONFIG_ASPEED_LPC_CTRL=m
index c7bf9c4..40d7f1a 100644 (file)
@@ -356,13 +356,15 @@ CONFIG_DRM_OMAP_CONNECTOR_HDMI=m
 CONFIG_DRM_OMAP_CONNECTOR_ANALOG_TV=m
 CONFIG_DRM_OMAP_PANEL_DPI=m
 CONFIG_DRM_OMAP_PANEL_DSI_CM=m
-CONFIG_DRM_OMAP_PANEL_SONY_ACX565AKM=m
-CONFIG_DRM_OMAP_PANEL_LGPHILIPS_LB035Q02=m
-CONFIG_DRM_OMAP_PANEL_SHARP_LS037V7DW01=m
-CONFIG_DRM_OMAP_PANEL_TPO_TD028TTEC1=m
-CONFIG_DRM_OMAP_PANEL_TPO_TD043MTEA1=m
-CONFIG_DRM_OMAP_PANEL_NEC_NL8048HL11=m
 CONFIG_DRM_TILCDC=m
+CONFIG_DRM_PANEL_SIMPLE=m
+CONFIG_DRM_TI_TFP410=m
+CONFIG_DRM_PANEL_LG_LB035Q02=m
+CONFIG_DRM_PANEL_NEC_NL8048HL11=m
+CONFIG_DRM_PANEL_SHARP_LS037V7DW01=m
+CONFIG_DRM_PANEL_SONY_ACX565AKM=m
+CONFIG_DRM_PANEL_TPO_TD028TTEC1=m
+CONFIG_DRM_PANEL_TPO_TD043MTEA1=m
 CONFIG_FB=y
 CONFIG_FIRMWARE_EDID=y
 CONFIG_FB_MODE_HELPERS=y
@@ -422,6 +424,7 @@ CONFIG_USB_SERIAL_GENERIC=y
 CONFIG_USB_SERIAL_SIMPLE=m
 CONFIG_USB_SERIAL_FTDI_SIO=m
 CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_OPTION=m
 CONFIG_USB_TEST=m
 CONFIG_NOP_USB_XCEIV=m
 CONFIG_AM335X_PHY_USB=m
@@ -459,6 +462,7 @@ CONFIG_MMC_SDHCI_OMAP=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=m
 CONFIG_LEDS_CPCAP=m
+CONFIG_LEDS_LM3532=m
 CONFIG_LEDS_GPIO=m
 CONFIG_LEDS_PCA963X=m
 CONFIG_LEDS_PWM=m
@@ -480,7 +484,7 @@ CONFIG_RTC_DRV_OMAP=m
 CONFIG_RTC_DRV_CPCAP=m
 CONFIG_DMADEVICES=y
 CONFIG_OMAP_IOMMU=y
-CONFIG_REMOTEPROC=m
+CONFIG_REMOTEPROC=y
 CONFIG_OMAP_REMOTEPROC=m
 CONFIG_WKUP_M3_RPROC=m
 CONFIG_SOC_TI=y
index 787c3f9..b817c57 100644 (file)
@@ -581,7 +581,6 @@ CONFIG_USB_SERIAL_XIRCOM=m
 CONFIG_USB_SERIAL_OMNINET=m
 CONFIG_USB_EMI62=m
 CONFIG_USB_EMI26=m
-CONFIG_USB_RIO500=m
 CONFIG_USB_LEGOTOWER=m
 CONFIG_USB_LCD=m
 CONFIG_USB_CYTHERM=m
index 95b5a4f..73ed73a 100644 (file)
@@ -327,7 +327,6 @@ CONFIG_USB_EMI62=m
 CONFIG_USB_EMI26=m
 CONFIG_USB_ADUTUX=m
 CONFIG_USB_SEVSEG=m
-CONFIG_USB_RIO500=m
 CONFIG_USB_LEGOTOWER=m
 CONFIG_USB_LCD=m
 CONFIG_USB_CYPRESS_CY7C63=m
index 4fb51d6..a1cdbfa 100644 (file)
@@ -189,7 +189,6 @@ CONFIG_USB_SERIAL_XIRCOM=m
 CONFIG_USB_SERIAL_OMNINET=m
 CONFIG_USB_EMI62=m
 CONFIG_USB_EMI26=m
-CONFIG_USB_RIO500=m
 CONFIG_USB_LEGOTOWER=m
 CONFIG_USB_LCD=m
 CONFIG_USB_CYTHERM=m
index b24df84..043b0b1 100644 (file)
@@ -98,6 +98,7 @@ config CRYPTO_AES_ARM_CE
        tristate "Accelerated AES using ARMv8 Crypto Extensions"
        depends on KERNEL_MODE_NEON
        select CRYPTO_BLKCIPHER
+       select CRYPTO_LIB_AES
        select CRYPTO_SIMD
        help
          Use an implementation of AES in CBC, CTR and XTS modes that uses
index b978cdf..4d17073 100644 (file)
@@ -9,6 +9,7 @@
 #include <asm/assembler.h>
 
        .text
+       .arch           armv8-a
        .fpu            crypto-neon-fp-armv8
        .align          3
 
index 567dbed..f1d0a78 100644 (file)
@@ -82,7 +82,7 @@
 #ifndef __ASSEMBLY__
 
 #ifdef CONFIG_CPU_CP15_MMU
-static inline unsigned int get_domain(void)
+static __always_inline unsigned int get_domain(void)
 {
        unsigned int domain;
 
@@ -94,7 +94,7 @@ static inline unsigned int get_domain(void)
        return domain;
 }
 
-static inline void set_domain(unsigned val)
+static __always_inline void set_domain(unsigned int val)
 {
        asm volatile(
        "mcr    p15, 0, %0, c3, c0      @ set domain"
@@ -102,12 +102,12 @@ static inline void set_domain(unsigned val)
        isb();
 }
 #else
-static inline unsigned int get_domain(void)
+static __always_inline unsigned int get_domain(void)
 {
        return 0;
 }
 
-static inline void set_domain(unsigned val)
+static __always_inline void set_domain(unsigned int val)
 {
 }
 #endif
index a2a68b7..069da39 100644 (file)
@@ -15,8 +15,6 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
-#define check_pgt_cache()              do { } while (0)
-
 #ifdef CONFIG_MMU
 
 #define _PAGE_USER_TABLE       (PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_USER))
index d0de24f..010fa1a 100644 (file)
@@ -70,11 +70,6 @@ typedef pte_t *pte_addr_t;
  */
 extern unsigned int kobjsize(const void *objp);
 
-/*
- * No page table caches to initialise.
- */
-#define pgtable_cache_init()   do { } while (0)
-
 /*
  * All 32bit addresses are effectively valid for vmalloc...
  * Sort of meaningless for non-VM targets.
index f2e990d..3ae120c 100644 (file)
@@ -368,8 +368,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-#define pgtable_cache_init() do { } while (0)
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* CONFIG_MMU */
index 20c2f42..614bf82 100644 (file)
@@ -140,8 +140,6 @@ static inline void prefetchw(const void *ptr)
 #endif
 #endif
 
-#define HAVE_ARCH_PICK_MMAP_LAYOUT
-
 #endif
 
 #endif /* __ASM_ARM_PROCESSOR_H */
index b75ea15..669474a 100644 (file)
@@ -44,7 +44,7 @@ static inline void __tlb_remove_table(void *_table)
 static inline void
 __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr)
 {
-       pgtable_page_dtor(pte);
+       pgtable_pte_page_dtor(pte);
 
 #ifndef CONFIG_ARM_LPAE
        /*
index 303248e..98c6b91 100644 (file)
@@ -22,7 +22,7 @@
  * perform such accesses (eg, via list poison values) which could then
  * be exploited for priviledge escalation.
  */
-static inline unsigned int uaccess_save_and_enable(void)
+static __always_inline unsigned int uaccess_save_and_enable(void)
 {
 #ifdef CONFIG_CPU_SW_DOMAIN_PAN
        unsigned int old_domain = get_domain();
@@ -37,7 +37,7 @@ static inline unsigned int uaccess_save_and_enable(void)
 #endif
 }
 
-static inline void uaccess_restore(unsigned int flags)
+static __always_inline void uaccess_restore(unsigned int flags)
 {
 #ifdef CONFIG_CPU_SW_DOMAIN_PAN
        /* Restore the user access mask */
diff --git a/arch/arm/include/asm/xen/xen-ops.h b/arch/arm/include/asm/xen/xen-ops.h
deleted file mode 100644 (file)
index ec154e7..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_XEN_OPS_H
-#define _ASM_XEN_OPS_H
-
-void xen_efi_runtime_setup(void);
-
-#endif /* _ASM_XEN_OPS_H */
index a7810be..4a39828 100644 (file)
@@ -68,7 +68,7 @@ ENDPROC(__vet_atags)
  * The following fragment of code is executed with the MMU on in MMU mode,
  * and uses absolute addresses; this is not position independent.
  *
- *  r0  = cp#15 control register
+ *  r0  = cp#15 control register (exc_ret for M-class)
  *  r1  = machine ID
  *  r2  = atags/dtb pointer
  *  r9  = processor ID
@@ -137,7 +137,8 @@ __mmap_switched_data:
 #ifdef CONFIG_CPU_CP15
        .long   cr_alignment                    @ r3
 #else
-       .long   0                               @ r3
+M_CLASS(.long  exc_ret)                        @ r3
+AR_CLASS(.long 0)                              @ r3
 #endif
        .size   __mmap_switched_data, . - __mmap_switched_data
 
index afa350f..0fc814b 100644 (file)
@@ -201,6 +201,8 @@ M_CLASS(streq       r3, [r12, #PMSAv8_MAIR1])
        bic     r0, r0, #V7M_SCB_CCR_IC
 #endif
        str     r0, [r12, V7M_SCB_CCR]
+       /* Pass exc_ret to __mmap_switched */
+       mov     r0, r10
 #endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */
        ret     lr
 ENDPROC(__after_proc_init)
index f934a67..9485acc 100644 (file)
@@ -319,11 +319,6 @@ unsigned long get_wchan(struct task_struct *p)
        return 0;
 }
 
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-       return randomize_page(mm->brk, 0x02000000);
-}
-
 #ifdef CONFIG_MMU
 #ifdef CONFIG_KUSER_HELPERS
 /*
index 56007b0..e8d6e99 100644 (file)
@@ -26,7 +26,6 @@ config MACH_ASPEED_G4
 config MACH_ASPEED_G5
        bool "Aspeed SoC 5th Generation"
        depends on ARCH_MULTI_V6
-       select CPU_V6
        select PINCTRL_ASPEED_G5
        select FTTMR010_TIMER
        help
index 8062412..9fc5c73 100644 (file)
@@ -462,8 +462,8 @@ static s8 dm365_queue_priority_mapping[][2] = {
 };
 
 static const struct dma_slave_map dm365_edma_map[] = {
-       { "davinci-mcbsp.0", "tx", EDMA_FILTER_PARAM(0, 2) },
-       { "davinci-mcbsp.0", "rx", EDMA_FILTER_PARAM(0, 3) },
+       { "davinci-mcbsp", "tx", EDMA_FILTER_PARAM(0, 2) },
+       { "davinci-mcbsp", "rx", EDMA_FILTER_PARAM(0, 3) },
        { "davinci_voicecodec", "tx", EDMA_FILTER_PARAM(0, 2) },
        { "davinci_voicecodec", "rx", EDMA_FILTER_PARAM(0, 3) },
        { "spi_davinci.2", "tx", EDMA_FILTER_PARAM(0, 10) },
index dd939e1..29fd136 100644 (file)
@@ -763,7 +763,8 @@ static struct omap_hwmod_class_sysconfig am33xx_timer_sysc = {
        .rev_offs       = 0x0000,
        .sysc_offs      = 0x0010,
        .syss_offs      = 0x0014,
-       .sysc_flags     = (SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET),
+       .sysc_flags     = SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET |
+                         SYSC_HAS_RESET_STATUS,
        .idlemodes      = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
                          SIDLE_SMART_WKUP),
        .sysc_fields    = &omap_hwmod_sysc_type2,
index 2bcb634..5452477 100644 (file)
@@ -231,8 +231,9 @@ static struct omap_hwmod am33xx_control_hwmod = {
 static struct omap_hwmod_class_sysconfig lcdc_sysc = {
        .rev_offs       = 0x0,
        .sysc_offs      = 0x54,
-       .sysc_flags     = (SYSC_HAS_SIDLEMODE | SYSC_HAS_MIDLEMODE),
-       .idlemodes      = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
+       .sysc_flags     = SYSC_HAS_SIDLEMODE | SYSC_HAS_MIDLEMODE,
+       .idlemodes      = SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+                         MSTANDBY_FORCE | MSTANDBY_NO | MSTANDBY_SMART,
        .sysc_fields    = &omap_hwmod_sysc_type2,
 };
 
index 6c6f8fc..2efd18e 100644 (file)
@@ -89,6 +89,13 @@ static struct iommu_platform_data omap3_iommu_pdata = {
        .reset_name = "mmu",
        .assert_reset = omap_device_assert_hardreset,
        .deassert_reset = omap_device_deassert_hardreset,
+       .device_enable = omap_device_enable,
+       .device_idle = omap_device_idle,
+};
+
+static struct iommu_platform_data omap3_iommu_isp_pdata = {
+       .device_enable = omap_device_enable,
+       .device_idle = omap_device_idle,
 };
 
 static int omap3_sbc_t3730_twl_callback(struct device *dev,
@@ -424,6 +431,8 @@ static struct iommu_platform_data omap4_iommu_pdata = {
        .reset_name = "mmu_cache",
        .assert_reset = omap_device_assert_hardreset,
        .deassert_reset = omap_device_deassert_hardreset,
+       .device_enable = omap_device_enable,
+       .device_idle = omap_device_idle,
 };
 #endif
 
@@ -491,11 +500,11 @@ static int ti_sysc_clkdm_init(struct device *dev,
                              struct clk *fck, struct clk *ick,
                              struct ti_sysc_cookie *cookie)
 {
-       if (fck)
+       if (!IS_ERR(fck))
                cookie->clkdm = ti_sysc_find_one_clockdomain(fck);
        if (cookie->clkdm)
                return 0;
-       if (ick)
+       if (!IS_ERR(ick))
                cookie->clkdm = ti_sysc_find_one_clockdomain(ick);
        if (cookie->clkdm)
                return 0;
@@ -617,6 +626,8 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = {
 #ifdef CONFIG_ARCH_OMAP3
        OF_DEV_AUXDATA("ti,omap2-iommu", 0x5d000000, "5d000000.mmu",
                       &omap3_iommu_pdata),
+       OF_DEV_AUXDATA("ti,omap2-iommu", 0x480bd400, "480bd400.mmu",
+                      &omap3_iommu_isp_pdata),
        OF_DEV_AUXDATA("ti,omap3-smartreflex-core", 0x480cb000,
                       "480cb000.smartreflex", &omap_sr_pdata[OMAP_SR_CORE]),
        OF_DEV_AUXDATA("ti,omap3-smartreflex-mpu-iva", 0x480c9000,
index 1fde1bf..7ac9af5 100644 (file)
@@ -74,83 +74,6 @@ int omap_pm_clkdms_setup(struct clockdomain *clkdm, void *unused)
        return 0;
 }
 
-/*
- * This API is to be called during init to set the various voltage
- * domains to the voltage as per the opp table. Typically we boot up
- * at the nominal voltage. So this function finds out the rate of
- * the clock associated with the voltage domain, finds out the correct
- * opp entry and sets the voltage domain to the voltage specified
- * in the opp entry
- */
-static int __init omap2_set_init_voltage(char *vdd_name, char *clk_name,
-                                        const char *oh_name)
-{
-       struct voltagedomain *voltdm;
-       struct clk *clk;
-       struct dev_pm_opp *opp;
-       unsigned long freq, bootup_volt;
-       struct device *dev;
-
-       if (!vdd_name || !clk_name || !oh_name) {
-               pr_err("%s: invalid parameters\n", __func__);
-               goto exit;
-       }
-
-       if (!strncmp(oh_name, "mpu", 3))
-               /* 
-                * All current OMAPs share voltage rail and clock
-                * source, so CPU0 is used to represent the MPU-SS.
-                */
-               dev = get_cpu_device(0);
-       else
-               dev = omap_device_get_by_hwmod_name(oh_name);
-
-       if (IS_ERR(dev)) {
-               pr_err("%s: Unable to get dev pointer for hwmod %s\n",
-                       __func__, oh_name);
-               goto exit;
-       }
-
-       voltdm = voltdm_lookup(vdd_name);
-       if (!voltdm) {
-               pr_err("%s: unable to get vdd pointer for vdd_%s\n",
-                       __func__, vdd_name);
-               goto exit;
-       }
-
-       clk =  clk_get(NULL, clk_name);
-       if (IS_ERR(clk)) {
-               pr_err("%s: unable to get clk %s\n", __func__, clk_name);
-               goto exit;
-       }
-
-       freq = clk_get_rate(clk);
-       clk_put(clk);
-
-       opp = dev_pm_opp_find_freq_ceil(dev, &freq);
-       if (IS_ERR(opp)) {
-               pr_err("%s: unable to find boot up OPP for vdd_%s\n",
-                       __func__, vdd_name);
-               goto exit;
-       }
-
-       bootup_volt = dev_pm_opp_get_voltage(opp);
-       dev_pm_opp_put(opp);
-
-       if (!bootup_volt) {
-               pr_err("%s: unable to find voltage corresponding to the bootup OPP for vdd_%s\n",
-                      __func__, vdd_name);
-               goto exit;
-       }
-
-       voltdm_scale(voltdm, bootup_volt);
-       return 0;
-
-exit:
-       pr_err("%s: unable to set vdd_%s\n", __func__, vdd_name);
-       return -EINVAL;
-}
-
 #ifdef CONFIG_SUSPEND
 static int omap_pm_enter(suspend_state_t suspend_state)
 {
@@ -208,25 +131,6 @@ void omap_common_suspend_init(void *pm_suspend)
 }
 #endif /* CONFIG_SUSPEND */
 
-static void __init omap3_init_voltages(void)
-{
-       if (!soc_is_omap34xx())
-               return;
-
-       omap2_set_init_voltage("mpu_iva", "dpll1_ck", "mpu");
-       omap2_set_init_voltage("core", "l3_ick", "l3_main");
-}
-
-static void __init omap4_init_voltages(void)
-{
-       if (!soc_is_omap44xx())
-               return;
-
-       omap2_set_init_voltage("mpu", "dpll_mpu_ck", "mpu");
-       omap2_set_init_voltage("core", "l3_div_ck", "l3_main_1");
-       omap2_set_init_voltage("iva", "dpll_iva_m5x2_ck", "iva");
-}
-
 int __maybe_unused omap_pm_nop_init(void)
 {
        return 0;
@@ -246,10 +150,6 @@ int __init omap2_common_pm_late_init(void)
        omap4_twl_init();
        omap_voltage_late_init();
 
-       /* Initialize the voltages */
-       omap3_init_voltages();
-       omap4_init_voltages();
-
        /* Smartreflex device init */
        omap_devinit_smartreflex();
 
index 04b3643..788c5cf 100644 (file)
@@ -324,7 +324,7 @@ union offset_union {
        __put32_unaligned_check("strbt", val, addr)
 
 static void
-do_alignment_finish_ldst(unsigned long addr, unsigned long instr, struct pt_regs *regs, union offset_union offset)
+do_alignment_finish_ldst(unsigned long addr, u32 instr, struct pt_regs *regs, union offset_union offset)
 {
        if (!LDST_U_BIT(instr))
                offset.un = -offset.un;
@@ -337,7 +337,7 @@ do_alignment_finish_ldst(unsigned long addr, unsigned long instr, struct pt_regs
 }
 
 static int
-do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *regs)
+do_alignment_ldrhstrh(unsigned long addr, u32 instr, struct pt_regs *regs)
 {
        unsigned int rd = RD_BITS(instr);
 
@@ -386,8 +386,7 @@ do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *r
 }
 
 static int
-do_alignment_ldrdstrd(unsigned long addr, unsigned long instr,
-                     struct pt_regs *regs)
+do_alignment_ldrdstrd(unsigned long addr, u32 instr, struct pt_regs *regs)
 {
        unsigned int rd = RD_BITS(instr);
        unsigned int rd2;
@@ -449,7 +448,7 @@ do_alignment_ldrdstrd(unsigned long addr, unsigned long instr,
 }
 
 static int
-do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *regs)
+do_alignment_ldrstr(unsigned long addr, u32 instr, struct pt_regs *regs)
 {
        unsigned int rd = RD_BITS(instr);
 
@@ -498,7 +497,7 @@ do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *reg
  * PU = 10             A                    B
  */
 static int
-do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *regs)
+do_alignment_ldmstm(unsigned long addr, u32 instr, struct pt_regs *regs)
 {
        unsigned int rd, rn, correction, nr_regs, regbits;
        unsigned long eaddr, newaddr;
@@ -539,7 +538,7 @@ do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *reg
         * processor for us.
         */
        if (addr != eaddr) {
-               pr_err("LDMSTM: PC = %08lx, instr = %08lx, "
+               pr_err("LDMSTM: PC = %08lx, instr = %08x, "
                        "addr = %08lx, eaddr = %08lx\n",
                         instruction_pointer(regs), instr, addr, eaddr);
                show_regs(regs);
@@ -716,10 +715,10 @@ thumb2arm(u16 tinstr)
  * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt)
  */
 static void *
-do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
+do_alignment_t32_to_handler(u32 *pinstr, struct pt_regs *regs,
                            union offset_union *poffset)
 {
-       unsigned long instr = *pinstr;
+       u32 instr = *pinstr;
        u16 tinst1 = (instr >> 16) & 0xffff;
        u16 tinst2 = instr & 0xffff;
 
@@ -767,17 +766,48 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
        return NULL;
 }
 
+static int alignment_get_arm(struct pt_regs *regs, u32 *ip, u32 *inst)
+{
+       u32 instr = 0;
+       int fault;
+
+       if (user_mode(regs))
+               fault = get_user(instr, ip);
+       else
+               fault = probe_kernel_address(ip, instr);
+
+       *inst = __mem_to_opcode_arm(instr);
+
+       return fault;
+}
+
+static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst)
+{
+       u16 instr = 0;
+       int fault;
+
+       if (user_mode(regs))
+               fault = get_user(instr, ip);
+       else
+               fault = probe_kernel_address(ip, instr);
+
+       *inst = __mem_to_opcode_thumb16(instr);
+
+       return fault;
+}
+
 static int
 do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 {
        union offset_union uninitialized_var(offset);
-       unsigned long instr = 0, instrptr;
-       int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
+       unsigned long instrptr;
+       int (*handler)(unsigned long addr, u32 instr, struct pt_regs *regs);
        unsigned int type;
-       unsigned int fault;
+       u32 instr = 0;
        u16 tinstr = 0;
        int isize = 4;
        int thumb2_32b = 0;
+       int fault;
 
        if (interrupts_enabled(regs))
                local_irq_enable();
@@ -786,15 +816,14 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 
        if (thumb_mode(regs)) {
                u16 *ptr = (u16 *)(instrptr & ~1);
-               fault = probe_kernel_address(ptr, tinstr);
-               tinstr = __mem_to_opcode_thumb16(tinstr);
+
+               fault = alignment_get_thumb(regs, ptr, &tinstr);
                if (!fault) {
                        if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
                            IS_T32(tinstr)) {
                                /* Thumb-2 32-bit */
-                               u16 tinst2 = 0;
-                               fault = probe_kernel_address(ptr + 1, tinst2);
-                               tinst2 = __mem_to_opcode_thumb16(tinst2);
+                               u16 tinst2;
+                               fault = alignment_get_thumb(regs, ptr + 1, &tinst2);
                                instr = __opcode_thumb32_compose(tinstr, tinst2);
                                thumb2_32b = 1;
                        } else {
@@ -803,8 +832,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
                        }
                }
        } else {
-               fault = probe_kernel_address((void *)instrptr, instr);
-               instr = __mem_to_opcode_arm(instr);
+               fault = alignment_get_arm(regs, (void *)instrptr, &instr);
        }
 
        if (fault) {
@@ -926,7 +954,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
         * Oops, we didn't handle the instruction.
         */
        pr_err("Alignment trap: not handling instruction "
-               "%0*lx at [<%08lx>]\n",
+               "%0*x at [<%08lx>]\n",
                isize << 1,
                isize == 2 ? tinstr : instr, instrptr);
        ai_skipped += 1;
@@ -936,7 +964,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
        ai_user += 1;
 
        if (ai_usermode & UM_WARN)
-               printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx "
+               printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*x "
                       "Address=0x%08lx FSR 0x%03x\n", current->comm,
                        task_pid_nr(current), instrptr,
                        isize << 1,
index 6ecbda8..6d89db7 100644 (file)
@@ -204,18 +204,17 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
         * coherent with the kernels mapping.
         */
        if (!PageHighMem(page)) {
-               size_t page_size = PAGE_SIZE << compound_order(page);
-               __cpuc_flush_dcache_area(page_address(page), page_size);
+               __cpuc_flush_dcache_area(page_address(page), page_size(page));
        } else {
                unsigned long i;
                if (cache_is_vipt_nonaliasing()) {
-                       for (i = 0; i < (1 << compound_order(page)); i++) {
+                       for (i = 0; i < compound_nr(page); i++) {
                                void *addr = kmap_atomic(page + i);
                                __cpuc_flush_dcache_area(addr, PAGE_SIZE);
                                kunmap_atomic(addr);
                        }
                } else {
-                       for (i = 0; i < (1 << compound_order(page)); i++) {
+                       for (i = 0; i < compound_nr(page); i++) {
                                void *addr = kmap_high_get(page + i);
                                if (addr) {
                                        __cpuc_flush_dcache_area(addr, PAGE_SIZE);
index f866870..b8d912a 100644 (file)
        ((((addr)+SHMLBA-1)&~(SHMLBA-1)) +      \
         (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1)))
 
-/* gap between mmap and stack */
-#define MIN_GAP (128*1024*1024UL)
-#define MAX_GAP ((TASK_SIZE)/6*5)
-
-static int mmap_is_legacy(struct rlimit *rlim_stack)
-{
-       if (current->personality & ADDR_COMPAT_LAYOUT)
-               return 1;
-
-       if (rlim_stack->rlim_cur == RLIM_INFINITY)
-               return 1;
-
-       return sysctl_legacy_va_layout;
-}
-
-static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
-{
-       unsigned long gap = rlim_stack->rlim_cur;
-
-       if (gap < MIN_GAP)
-               gap = MIN_GAP;
-       else if (gap > MAX_GAP)
-               gap = MAX_GAP;
-
-       return PAGE_ALIGN(TASK_SIZE - gap - rnd);
-}
-
 /*
  * We need to ensure that shared mappings are correctly aligned to
  * avoid aliasing issues with VIPT caches.  We need to ensure that
@@ -171,31 +144,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
        return addr;
 }
 
-unsigned long arch_mmap_rnd(void)
-{
-       unsigned long rnd;
-
-       rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
-
-       return rnd << PAGE_SHIFT;
-}
-
-void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
-{
-       unsigned long random_factor = 0UL;
-
-       if (current->flags & PF_RANDOMIZE)
-               random_factor = arch_mmap_rnd();
-
-       if (mmap_is_legacy(rlim_stack)) {
-               mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
-               mm->get_unmapped_area = arch_get_unmapped_area;
-       } else {
-               mm->mmap_base = mmap_base(random_factor, rlim_stack);
-               mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-       }
-}
-
 /*
  * You really shouldn't be using read() or write() on /dev/mem.  This
  * might go away in the future.
index 25da9b2..48c2888 100644 (file)
@@ -731,7 +731,7 @@ static void *__init late_alloc(unsigned long sz)
 {
        void *ptr = (void *)__get_free_pages(GFP_PGTABLE_KERNEL, get_order(sz));
 
-       if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
+       if (!ptr || !pgtable_pte_page_ctor(virt_to_page(ptr)))
                BUG();
        return ptr;
 }
index 1448f14..1a49d50 100644 (file)
@@ -132,13 +132,11 @@ __v7m_setup_cont:
        dsb
        mov     r6, lr                  @ save LR
        ldr     sp, =init_thread_union + THREAD_START_SP
-       stmia   sp, {r0-r3, r12}
        cpsie   i
        svc     #0
 1:     cpsid   i
-       ldr     r0, =exc_ret
-       orr     lr, lr, #EXC_RET_THREADMODE_PROCESSSTACK
-       str     lr, [r0]
+       /* Calculate exc_ret */
+       orr     r10, lr, #EXC_RET_THREADMODE_PROCESSSTACK
        ldmia   sp, {r0-r3, r12}
        str     r5, [r12, #11 * 4]      @ restore the original SVC vector entry
        mov     lr, r6                  @ restore LR
index 7ed2898..c32d047 100644 (file)
@@ -1,3 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-y          := enlighten.o hypercall.o grant-table.o p2m.o mm.o
-obj-$(CONFIG_XEN_EFI) += efi.o
diff --git a/arch/arm/xen/efi.c b/arch/arm/xen/efi.c
deleted file mode 100644 (file)
index d687a73..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (c) 2015, Linaro Limited, Shannon Zhao
- */
-
-#include <linux/efi.h>
-#include <xen/xen-ops.h>
-#include <asm/xen/xen-ops.h>
-
-/* Set XEN EFI runtime services function pointers. Other fields of struct efi,
- * e.g. efi.systab, will be set like normal EFI.
- */
-void __init xen_efi_runtime_setup(void)
-{
-       efi.get_time                 = xen_efi_get_time;
-       efi.set_time                 = xen_efi_set_time;
-       efi.get_wakeup_time          = xen_efi_get_wakeup_time;
-       efi.set_wakeup_time          = xen_efi_set_wakeup_time;
-       efi.get_variable             = xen_efi_get_variable;
-       efi.get_next_variable        = xen_efi_get_next_variable;
-       efi.set_variable             = xen_efi_set_variable;
-       efi.query_variable_info      = xen_efi_query_variable_info;
-       efi.update_capsule           = xen_efi_update_capsule;
-       efi.query_capsule_caps       = xen_efi_query_capsule_caps;
-       efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
-       efi.reset_system             = xen_efi_reset_system;
-}
-EXPORT_SYMBOL_GPL(xen_efi_runtime_setup);
index 1e57692..dd6804a 100644 (file)
@@ -15,7 +15,6 @@
 #include <xen/xen-ops.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
-#include <asm/xen/xen-ops.h>
 #include <asm/system_misc.h>
 #include <asm/efi.h>
 #include <linux/interrupt.h>
@@ -437,7 +436,7 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op);
 EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op);
 EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op);
 EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op);
-EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op);
+EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op_raw);
 EXPORT_SYMBOL_GPL(HYPERVISOR_multicall);
 EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist);
 EXPORT_SYMBOL_GPL(HYPERVISOR_dm_op);
index 2b2c208..38fa917 100644 (file)
@@ -28,7 +28,10 @@ unsigned long xen_get_swiotlb_free_pages(unsigned int order)
 
        for_each_memblock(memory, reg) {
                if (reg->base < (phys_addr_t)0xffffffff) {
-                       flags |= __GFP_DMA;
+                       if (IS_ENABLED(CONFIG_ZONE_DMA32))
+                               flags |= __GFP_DMA32;
+                       else
+                               flags |= __GFP_DMA;
                        break;
                }
        }
index 37c6109..3f047af 100644 (file)
@@ -15,7 +15,6 @@ config ARM64
        select ARCH_HAS_DMA_COHERENT_TO_PFN
        select ARCH_HAS_DMA_PREP_COHERENT
        select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
-       select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_FAST_MULTIPLIER
        select ARCH_HAS_FORTIFY_SOURCE
        select ARCH_HAS_GCOV_PROFILE_ALL
@@ -71,6 +70,7 @@ config ARM64
        select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG
        select ARCH_SUPPORTS_NUMA_BALANCING
        select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
+       select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
        select ARCH_WANT_FRAME_POINTERS
        select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
        select ARCH_HAS_UBSAN_SANITIZE_ALL
@@ -110,7 +110,6 @@ config ARM64
        select GENERIC_STRNLEN_USER
        select GENERIC_TIME_VSYSCALL
        select GENERIC_GETTIMEOFDAY
-       select GENERIC_COMPAT_VDSO if (!CPU_BIG_ENDIAN && COMPAT)
        select HANDLE_DOMAIN_IRQ
        select HARDIRQS_SW_RESEND
        select HAVE_PCI
@@ -617,6 +616,23 @@ config CAVIUM_ERRATUM_30115
 
          If unsure, say Y.
 
+config CAVIUM_TX2_ERRATUM_219
+       bool "Cavium ThunderX2 erratum 219: PRFM between TTBR change and ISB fails"
+       default y
+       help
+         On Cavium ThunderX2, a load, store or prefetch instruction between a
+         TTBR update and the corresponding context synchronizing operation can
+         cause a spurious Data Abort to be delivered to any hardware thread in
+         the CPU core.
+
+         Work around the issue by avoiding the problematic code sequence and
+         trapping KVM guest TTBRx_EL1 writes to EL2 when SMT is enabled. The
+         trap handler performs the corresponding register access, skips the
+         instruction and ensures context synchronization by virtue of the
+         exception return.
+
+         If unsure, say Y.
+
 config QCOM_FALKOR_ERRATUM_1003
        bool "Falkor E1003: Incorrect translation due to ASID change"
        default y
@@ -982,7 +998,7 @@ config KEXEC_FILE
          for kernel and initramfs as opposed to list of segments as
          accepted by previous system call.
 
-config KEXEC_VERIFY_SIG
+config KEXEC_SIG
        bool "Verify kernel signature during kexec_file_load() syscall"
        depends on KEXEC_FILE
        help
@@ -997,13 +1013,13 @@ config KEXEC_VERIFY_SIG
 config KEXEC_IMAGE_VERIFY_SIG
        bool "Enable Image signature verification support"
        default y
-       depends on KEXEC_VERIFY_SIG
+       depends on KEXEC_SIG
        depends on EFI && SIGNED_PE_FILE_VERIFICATION
        help
          Enable Image signature verification support.
 
 comment "Support for PE file signature verification disabled"
-       depends on KEXEC_VERIFY_SIG
+       depends on KEXEC_SIG
        depends on !EFI || !SIGNED_PE_FILE_VERIFICATION
 
 config CRASH_DUMP
@@ -1159,7 +1175,7 @@ menuconfig COMPAT
 if COMPAT
 
 config KUSER_HELPERS
-       bool "Enable kuser helpers page for 32 bit applications"
+       bool "Enable kuser helpers page for 32-bit applications"
        default y
        help
          Warning: disabling this option may break 32-bit user programs.
@@ -1185,6 +1201,18 @@ config KUSER_HELPERS
          Say N here only if you are absolutely certain that you do not
          need these helpers; otherwise, the safe option is to say Y.
 
+config COMPAT_VDSO
+       bool "Enable vDSO for 32-bit applications"
+       depends on !CPU_BIG_ENDIAN && "$(CROSS_COMPILE_COMPAT)" != ""
+       select GENERIC_COMPAT_VDSO
+       default y
+       help
+         Place in the process address space of 32-bit applications an
+         ELF shared object providing fast implementations of gettimeofday
+         and clock_gettime.
+
+         You must have a 32-bit build of glibc 2.22 or later for programs
+         to seamlessly take advantage of this.
 
 menuconfig ARMV8_DEPRECATED
        bool "Emulate deprecated/obsolete ARMv8 instructions"
index 84a3d50..2c0238c 100644 (file)
@@ -53,22 +53,6 @@ $(warning Detected assembler with broken .inst; disassembly will be unreliable)
   endif
 endif
 
-ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y)
-  CROSS_COMPILE_COMPAT ?= $(CONFIG_CROSS_COMPILE_COMPAT_VDSO:"%"=%)
-
-  ifeq ($(CONFIG_CC_IS_CLANG), y)
-    $(warning CROSS_COMPILE_COMPAT is clang, the compat vDSO will not be built)
-  else ifeq ($(strip $(CROSS_COMPILE_COMPAT)),)
-    $(warning CROSS_COMPILE_COMPAT not defined or empty, the compat vDSO will not be built)
-  else ifeq ($(shell which $(CROSS_COMPILE_COMPAT)gcc 2> /dev/null),)
-    $(error $(CROSS_COMPILE_COMPAT)gcc not found, check CROSS_COMPILE_COMPAT)
-  else
-    export CROSS_COMPILE_COMPAT
-    export CONFIG_COMPAT_VDSO := y
-    compat_vdso := -DCONFIG_COMPAT_VDSO=1
-  endif
-endif
-
 KBUILD_CFLAGS  += -mgeneral-regs-only $(lseinstr) $(brokengasinst)     \
                   $(compat_vdso) $(cc_has_k_constraint)
 KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
index 24f1aac..d5b6e81 100644 (file)
                reg = <1>;
        };
 };
+
+&reg_dc1sw {
+       /*
+        * Ethernet PHY needs 30ms to properly power up and some more
+        * to initialize. 100ms should be plenty of time to finish
+        * whole process.
+        */
+       regulator-enable-ramp-delay = <100000>;
+};
index 2b6345d..78c82a6 100644 (file)
 
 &ehci0 {
        phys = <&usbphy 0>;
+       phy-names = "usb";
        status = "okay";
 };
 
 
 &ohci0 {
        phys = <&usbphy 0>;
+       phy-names = "usb";
        status = "okay";
 };
 
index e6fb968..2509920 100644 (file)
 };
 
 &reg_dc1sw {
+       /*
+        * Ethernet PHY needs 30ms to properly power up and some more
+        * to initialize. 100ms should be plenty of time to finish
+        * whole process.
+        */
+       regulator-enable-ramp-delay = <100000>;
        regulator-name = "vcc-phy";
 };
 
index 69128a6..70f4cce 100644 (file)
                clock-output-names = "ext-osc32k";
        };
 
-       pmu {
-               compatible = "arm,cortex-a53-pmu";
-               interrupts = <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>,
-                            <GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>,
-                            <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>,
-                            <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>;
-               interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>;
-       };
-
        psci {
                compatible = "arm,psci-0.2";
                method = "smc";
                        resets = <&ccu RST_BUS_OHCI1>,
                                 <&ccu RST_BUS_EHCI1>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                                 <&ccu CLK_USB_OHCI1>;
                        resets = <&ccu RST_BUS_OHCI1>;
                        phys = <&usbphy 1>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
index 4020a1a..0d5ea19 100644 (file)
                        resets = <&ccu RST_BUS_OHCI3>,
                                 <&ccu RST_BUS_EHCI3>;
                        phys = <&usb2phy 3>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
                                 <&ccu CLK_USB_OHCI3>;
                        resets = <&ccu RST_BUS_OHCI3>;
                        phys = <&usb2phy 3>;
+                       phy-names = "usb";
                        status = "disabled";
                };
 
index 8a3a770..56789cc 100644 (file)
 
                pinmux: pinmux@14029c {
                        compatible = "pinctrl-single";
-                       reg = <0x0014029c 0x250>;
+                       reg = <0x0014029c 0x26c>;
                        #address-cells = <1>;
                        #size-cells = <1>;
                        pinctrl-single,register-width = <32>;
                        pinctrl-single,function-mask = <0xf>;
                        pinctrl-single,gpio-range = <
-                               &range 0 154 MODE_GPIO
+                               &range 0  91 MODE_GPIO
+                               &range 95 60 MODE_GPIO
                                >;
                        range: gpio-range {
                                #pinctrl-single,gpio-range-cells = <3>;
index 71e2e34..0098dfd 100644 (file)
                                        <&pinmux 108 16 27>,
                                        <&pinmux 135 77 6>,
                                        <&pinmux 141 67 4>,
-                                       <&pinmux 145 149 6>,
-                                       <&pinmux 151 91 4>;
+                                       <&pinmux 145 149 6>;
                };
 
                i2c1: i2c@e0000 {
index 124a7e2..3379193 100644 (file)
                        #address-cells = <3>;
                        #size-cells = <2>;
                        device_type = "pci";
-                       num-lanes = <4>;
                        num-viewport = <2>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000   /* downstream I/O */
index 71d9ed9..c084c7a 100644 (file)
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <4>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000   /* downstream I/O */
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <2>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x48 0x00010000 0x0 0x00010000   /* downstream I/O */
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <2>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x50 0x00010000 0x0 0x00010000   /* downstream I/O */
index b0ef08b..d4c1da3 100644 (file)
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <4>;
                        num-viewport = <8>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000   /* downstream I/O */
                        reg-names = "regs", "addr_space";
                        num-ib-windows = <6>;
                        num-ob-windows = <8>;
-                       num-lanes = <2>;
                        status = "disabled";
                };
 
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <2>;
                        num-viewport = <8>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x48 0x00010000 0x0 0x00010000   /* downstream I/O */
                        reg-names = "regs", "addr_space";
                        num-ib-windows = <6>;
                        num-ob-windows = <8>;
-                       num-lanes = <2>;
                        status = "disabled";
                };
 
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <2>;
                        num-viewport = <8>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x50 0x00010000 0x0 0x00010000   /* downstream I/O */
                        reg-names = "regs", "addr_space";
                        num-ib-windows = <6>;
                        num-ob-windows = <8>;
-                       num-lanes = <2>;
                        status = "disabled";
                };
 
index d1469b0..c676d07 100644 (file)
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <4>;
                        num-viewport = <256>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x20 0x00010000 0x0 0x00010000   /* downstream I/O */
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <4>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x28 0x00010000 0x0 0x00010000   /* downstream I/O */
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <8>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x30 0x00010000 0x0 0x00010000   /* downstream I/O */
index 64101c9..7a0be8e 100644 (file)
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <4>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        msi-parent = <&its>;
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <4>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        msi-parent = <&its>;
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <8>;
                        num-viewport = <256>;
                        bus-range = <0x0 0xff>;
                        msi-parent = <&its>;
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <4>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        msi-parent = <&its>;
index 408e0ec..b032f38 100644 (file)
@@ -33,7 +33,7 @@
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster0_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@1 {
@@ -49,7 +49,7 @@
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster0_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@100 {
@@ -65,7 +65,7 @@
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster1_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@101 {
@@ -81,7 +81,7 @@
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster1_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@200 {
@@ -97,7 +97,7 @@
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster2_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@201 {
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster2_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@300 {
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster3_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@301 {
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster3_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@400 {
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster4_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@401 {
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster4_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@500 {
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster5_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@501 {
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster5_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@600 {
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster6_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@601 {
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster6_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@700 {
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster7_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cpu@701 {
                        i-cache-line-size = <64>;
                        i-cache-sets = <192>;
                        next-level-cache = <&cluster7_l2>;
-                       cpu-idle-states = <&cpu_pw20>;
+                       cpu-idle-states = <&cpu_pw15>;
                };
 
                cluster0_l2: l2-cache0 {
                        cache-level = <2>;
                };
 
-               cpu_pw20: cpu-pw20 {
+               cpu_pw15: cpu-pw15 {
                        compatible = "arm,idle-state";
-                       idle-state-name = "PW20";
+                       idle-state-name = "PW15";
                        arm,psci-suspend-param = <0x0>;
                        entry-latency-us = <2000>;
                        exit-latency-us = <2000>;
index 5f9d0da..58b8cd0 100644 (file)
                                compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc";
                                reg = <0x30b40000 0x10000>;
                                interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>;
-                               clocks = <&clk IMX8MM_CLK_DUMMY>,
+                               clocks = <&clk IMX8MM_CLK_IPG_ROOT>,
                                         <&clk IMX8MM_CLK_NAND_USDHC_BUS>,
                                         <&clk IMX8MM_CLK_USDHC1_ROOT>;
                                clock-names = "ipg", "ahb", "per";
                                compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc";
                                reg = <0x30b50000 0x10000>;
                                interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
-                               clocks = <&clk IMX8MM_CLK_DUMMY>,
+                               clocks = <&clk IMX8MM_CLK_IPG_ROOT>,
                                         <&clk IMX8MM_CLK_NAND_USDHC_BUS>,
                                         <&clk IMX8MM_CLK_USDHC2_ROOT>;
                                clock-names = "ipg", "ahb", "per";
                                compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc";
                                reg = <0x30b60000 0x10000>;
                                interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
-                               clocks = <&clk IMX8MM_CLK_DUMMY>,
+                               clocks = <&clk IMX8MM_CLK_IPG_ROOT>,
                                         <&clk IMX8MM_CLK_NAND_USDHC_BUS>,
                                         <&clk IMX8MM_CLK_USDHC3_ROOT>;
                                clock-names = "ipg", "ahb", "per";
index 785f4c4..98496f5 100644 (file)
                                compatible = "fsl,imx8mn-usdhc", "fsl,imx7d-usdhc";
                                reg = <0x30b40000 0x10000>;
                                interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>;
-                               clocks = <&clk IMX8MN_CLK_DUMMY>,
+                               clocks = <&clk IMX8MN_CLK_IPG_ROOT>,
                                         <&clk IMX8MN_CLK_NAND_USDHC_BUS>,
                                         <&clk IMX8MN_CLK_USDHC1_ROOT>;
                                clock-names = "ipg", "ahb", "per";
                                compatible = "fsl,imx8mn-usdhc", "fsl,imx7d-usdhc";
                                reg = <0x30b50000 0x10000>;
                                interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
-                               clocks = <&clk IMX8MN_CLK_DUMMY>,
+                               clocks = <&clk IMX8MN_CLK_IPG_ROOT>,
                                         <&clk IMX8MN_CLK_NAND_USDHC_BUS>,
                                         <&clk IMX8MN_CLK_USDHC2_ROOT>;
                                clock-names = "ipg", "ahb", "per";
                                compatible = "fsl,imx8mn-usdhc", "fsl,imx7d-usdhc";
                                reg = <0x30b60000 0x10000>;
                                interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
-                               clocks = <&clk IMX8MN_CLK_DUMMY>,
+                               clocks = <&clk IMX8MN_CLK_IPG_ROOT>,
                                         <&clk IMX8MN_CLK_NAND_USDHC_BUS>,
                                         <&clk IMX8MN_CLK_USDHC3_ROOT>;
                                clock-names = "ipg", "ahb", "per";
index af99473..087b5b6 100644 (file)
@@ -89,8 +89,8 @@
                regulator-min-microvolt = <900000>;
                regulator-max-microvolt = <1000000>;
                gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>;
-               states = <1000000 0x0
-                          900000 0x1>;
+               states = <1000000 0x1
+                          900000 0x0>;
                regulator-always-on;
        };
 };
index 04115ca..55a3d1c 100644 (file)
                                             "fsl,imx7d-usdhc";
                                reg = <0x30b40000 0x10000>;
                                interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>;
-                               clocks = <&clk IMX8MQ_CLK_DUMMY>,
+                               clocks = <&clk IMX8MQ_CLK_IPG_ROOT>,
                                         <&clk IMX8MQ_CLK_NAND_USDHC_BUS>,
                                         <&clk IMX8MQ_CLK_USDHC1_ROOT>;
                                clock-names = "ipg", "ahb", "per";
                                             "fsl,imx7d-usdhc";
                                reg = <0x30b50000 0x10000>;
                                interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
-                               clocks = <&clk IMX8MQ_CLK_DUMMY>,
+                               clocks = <&clk IMX8MQ_CLK_IPG_ROOT>,
                                         <&clk IMX8MQ_CLK_NAND_USDHC_BUS>,
                                         <&clk IMX8MQ_CLK_USDHC2_ROOT>;
                                clock-names = "ipg", "ahb", "per";
index d105986..5f350cc 100644 (file)
                gpio = <&gpiosb 0 GPIO_ACTIVE_HIGH>;
        };
 
-       usb3_phy: usb3-phy {
-               compatible = "usb-nop-xceiv";
-               vcc-supply = <&exp_usb3_vbus>;
-       };
-
        vsdc_reg: vsdc-reg {
                compatible = "regulator-gpio";
                regulator-name = "vsdc";
        status = "okay";
 };
 
+&comphy2 {
+       connector {
+               compatible = "usb-a-connector";
+               phy-supply = <&exp_usb3_vbus>;
+       };
+};
+
 &usb3 {
        status = "okay";
        phys = <&comphy2 0>;
-       usb-phy = <&usb3_phy>;
 };
 
 &mdio {
index 62e07e1..4c38426 100644 (file)
                        gpio = <&gpio TEGRA194_MAIN_GPIO(A, 3) GPIO_ACTIVE_HIGH>;
                        enable-active-high;
                };
+
+               vdd_3v3_pcie: regulator@2 {
+                       compatible = "regulator-fixed";
+                       reg = <2>;
+
+                       regulator-name = "PEX_3V3";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       gpio = <&gpio TEGRA194_MAIN_GPIO(Z, 2) GPIO_ACTIVE_HIGH>;
+                       regulator-boot-on;
+                       enable-active-high;
+               };
+
+               vdd_12v_pcie: regulator@3 {
+                       compatible = "regulator-fixed";
+                       reg = <3>;
+
+                       regulator-name = "VDD_12V";
+                       regulator-min-microvolt = <1200000>;
+                       regulator-max-microvolt = <1200000>;
+                       gpio = <&gpio TEGRA194_MAIN_GPIO(A, 1) GPIO_ACTIVE_LOW>;
+                       regulator-boot-on;
+                       enable-active-low;
+               };
        };
 };
index 23597d5..d47cd8c 100644 (file)
        };
 
        pcie@141a0000 {
-               status = "disabled";
+               status = "okay";
 
                vddio-pex-ctl-supply = <&vdd_1v8ao>;
+               vpcie3v3-supply = <&vdd_3v3_pcie>;
+               vpcie12v-supply = <&vdd_12v_pcie>;
 
                phys = <&p2u_nvhs_0>, <&p2u_nvhs_1>, <&p2u_nvhs_2>,
                       <&p2u_nvhs_3>, <&p2u_nvhs_4>, <&p2u_nvhs_5>,
index adebbbf..3c0cf54 100644 (file)
@@ -3,8 +3,9 @@
 #include <dt-bindings/gpio/tegra194-gpio.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 #include <dt-bindings/mailbox/tegra186-hsp.h>
-#include <dt-bindings/reset/tegra194-reset.h>
+#include <dt-bindings/pinctrl/pinctrl-tegra.h>
 #include <dt-bindings/power/tegra194-powergate.h>
+#include <dt-bindings/reset/tegra194-reset.h>
 #include <dt-bindings/thermal/tegra194-bpmp-thermal.h>
 
 / {
                        };
                };
 
+               pinmux: pinmux@2430000 {
+                       compatible = "nvidia,tegra194-pinmux";
+                       reg = <0x2430000 0x17000
+                              0xc300000 0x4000>;
+
+                       status = "okay";
+
+                       pex_rst_c5_out_state: pex_rst_c5_out {
+                               pex_rst {
+                                       nvidia,pins = "pex_l5_rst_n_pgg1";
+                                       nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+                                       nvidia,lpdr = <TEGRA_PIN_ENABLE>;
+                                       nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                                       nvidia,io-high-voltage = <TEGRA_PIN_ENABLE>;
+                                       nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                                       nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               };
+                       };
+
+                       clkreq_c5_bi_dir_state: clkreq_c5_bi_dir {
+                               clkreq {
+                                       nvidia,pins = "pex_l5_clkreq_n_pgg0";
+                                       nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+                                       nvidia,lpdr = <TEGRA_PIN_ENABLE>;
+                                       nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                                       nvidia,io-high-voltage = <TEGRA_PIN_ENABLE>;
+                                       nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                                       nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               };
+                       };
+               };
+
                uarta: serial@3100000 {
                        compatible = "nvidia,tegra194-uart", "nvidia,tegra20-uart";
                        reg = <0x03100000 0x40>;
                num-viewport = <8>;
                linux,pci-domain = <5>;
 
+               pinctrl-names = "default";
+               pinctrl-0 = <&pex_rst_c5_out_state>, <&clkreq_c5_bi_dir_state>;
+
                clocks = <&bpmp TEGRA194_CLK_PEX1_CORE_5>,
                        <&bpmp TEGRA194_CLK_PEX1_CORE_5M>;
                clock-names = "core", "core_m";
index e152b0c..b806686 100644 (file)
@@ -44,7 +44,7 @@
                power-supply = <&pp3300_disp>;
 
                panel-timing {
-                       clock-frequency = <266604720>;
+                       clock-frequency = <266666667>;
                        hactive = <2400>;
                        hfront-porch = <48>;
                        hback-porch = <84>;
index 0d1f5f9..c133e8d 100644 (file)
        status = "okay";
 
        u2phy0_host: host-port {
-               phy-supply = <&vcc5v0_host>;
+               phy-supply = <&vcc5v0_typec>;
                status = "okay";
        };
 
 
 &usbdrd_dwc3_0 {
        status = "okay";
-       dr_mode = "otg";
+       dr_mode = "host";
 };
 
 &usbdrd3_1 {
index 0401d4e..e544deb 100644 (file)
                regulator-always-on;
                regulator-boot-on;
                regulator-min-microvolt = <800000>;
-               regulator-max-microvolt = <1400000>;
+               regulator-max-microvolt = <1700000>;
                vin-supply = <&vcc5v0_sys>;
        };
 };
        rk808: pmic@1b {
                compatible = "rockchip,rk808";
                reg = <0x1b>;
-               interrupt-parent = <&gpio1>;
-               interrupts = <21 IRQ_TYPE_LEVEL_LOW>;
+               interrupt-parent = <&gpio3>;
+               interrupts = <10 IRQ_TYPE_LEVEL_LOW>;
                #clock-cells = <1>;
                clock-output-names = "xin32k", "rk808-clkout2";
                pinctrl-names = "default";
 
        pmic {
                pmic_int_l: pmic-int-l {
-                       rockchip,pins = <1 RK_PC5 RK_FUNC_GPIO &pcfg_pull_up>;
+                       rockchip,pins = <3 RK_PB2 RK_FUNC_GPIO &pcfg_pull_up>;
                };
 
                vsel1_gpio: vsel1-gpio {
 
 &sdmmc {
        bus-width = <4>;
-       cap-mmc-highspeed;
        cap-sd-highspeed;
        cd-gpios = <&gpio0 7 GPIO_ACTIVE_LOW>;
        disable-wp;
 
 &sdhci {
        bus-width = <8>;
-       mmc-hs400-1_8v;
-       mmc-hs400-enhanced-strobe;
+       mmc-hs200-1_8v;
        non-removable;
        status = "okay";
 };
index 8e05c39..c9a867a 100644 (file)
@@ -723,7 +723,7 @@ CONFIG_TEGRA_IOMMU_SMMU=y
 CONFIG_ARM_SMMU=y
 CONFIG_ARM_SMMU_V3=y
 CONFIG_QCOM_IOMMU=y
-CONFIG_REMOTEPROC=m
+CONFIG_REMOTEPROC=y
 CONFIG_QCOM_Q6V5_MSS=m
 CONFIG_QCOM_Q6V5_PAS=m
 CONFIG_QCOM_SYSMON=m
index f74909b..5bf9638 100644 (file)
@@ -78,10 +78,9 @@ alternative_else_nop_endif
 /*
  * Remove the address tag from a virtual address, if present.
  */
-       .macro  clear_address_tag, dst, addr
-       tst     \addr, #(1 << 55)
-       bic     \dst, \addr, #(0xff << 56)
-       csel    \dst, \dst, \addr, eq
+       .macro  untagged_addr, dst, addr
+       sbfx    \dst, \addr, #0, #56
+       and     \dst, \dst, \addr
        .endm
 
 #endif
index c6bd87d..574808b 100644 (file)
@@ -321,7 +321,8 @@ static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v)
 }
 
 #define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...)                    \
-static inline u##sz __lse__cmpxchg_case_##name##sz(volatile void *ptr, \
+static __always_inline u##sz                                           \
+__lse__cmpxchg_case_##name##sz(volatile void *ptr,                     \
                                              u##sz old,                \
                                              u##sz new)                \
 {                                                                      \
@@ -362,7 +363,8 @@ __CMPXCHG_CASE(x,  ,  mb_, 64, al, "memory")
 #undef __CMPXCHG_CASE
 
 #define __CMPXCHG_DBL(name, mb, cl...)                                 \
-static inline long __lse__cmpxchg_double##name(unsigned long old1,     \
+static __always_inline long                                            \
+__lse__cmpxchg_double##name(unsigned long old1,                                \
                                         unsigned long old2,            \
                                         unsigned long new1,            \
                                         unsigned long new2,            \
index f19fe4b..ac1dbca 100644 (file)
@@ -52,7 +52,9 @@
 #define ARM64_HAS_IRQ_PRIO_MASKING             42
 #define ARM64_HAS_DCPODP                       43
 #define ARM64_WORKAROUND_1463225               44
+#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM    45
+#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM   46
 
-#define ARM64_NCAPS                            45
+#define ARM64_NCAPS                            47
 
 #endif /* __ASM_CPUCAPS_H */
index b1454d1..aca07c2 100644 (file)
@@ -79,6 +79,7 @@
 #define CAVIUM_CPU_PART_THUNDERX_83XX  0x0A3
 #define CAVIUM_CPU_PART_THUNDERX2      0x0AF
 
+#define BRCM_CPU_PART_BRAHMA_B53       0x100
 #define BRCM_CPU_PART_VULCAN           0x516
 
 #define QCOM_CPU_PART_FALKOR_V1                0x800
 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
 #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2)
+#define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53)
 #define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN)
 #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
 #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
index 86825aa..97f21cc 100644 (file)
 #define read_sysreg_el2(r)     read_sysreg_elx(r, _EL2, _EL1)
 #define write_sysreg_el2(v,r)  write_sysreg_elx(v, r, _EL2, _EL1)
 
-/**
- * hyp_alternate_select - Generates patchable code sequences that are
- * used to switch between two implementations of a function, depending
- * on the availability of a feature.
- *
- * @fname: a symbol name that will be defined as a function returning a
- * function pointer whose type will match @orig and @alt
- * @orig: A pointer to the default function, as returned by @fname when
- * @cond doesn't hold
- * @alt: A pointer to the alternate function, as returned by @fname
- * when @cond holds
- * @cond: a CPU feature (as described in asm/cpufeature.h)
- */
-#define hyp_alternate_select(fname, orig, alt, cond)                   \
-typeof(orig) * __hyp_text fname(void)                                  \
-{                                                                      \
-       typeof(alt) *val = orig;                                        \
-       asm volatile(ALTERNATIVE("nop           \n",                    \
-                                "mov   %0, %1  \n",                    \
-                                cond)                                  \
-                    : "+r" (val) : "r" (alt));                         \
-       return val;                                                     \
-}
-
 int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
 
 void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
index b61b50b..c23c473 100644 (file)
@@ -215,12 +215,18 @@ static inline unsigned long kaslr_offset(void)
  * up with a tagged userland pointer. Clear the tag to get a sane pointer to
  * pass on to access_ok(), for instance.
  */
-#define untagged_addr(addr)    \
+#define __untagged_addr(addr)  \
        ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55))
 
+#define untagged_addr(addr)    ({                                      \
+       u64 __addr = (__force u64)addr;                                 \
+       __addr &= __untagged_addr(__addr);                              \
+       (__force __typeof__(addr))__addr;                               \
+})
+
 #ifdef CONFIG_KASAN_SW_TAGS
 #define __tag_shifted(tag)     ((u64)(tag) << 56)
-#define __tag_reset(addr)      untagged_addr(addr)
+#define __tag_reset(addr)      __untagged_addr(addr)
 #define __tag_get(addr)                (__u8)((u64)(addr) >> 56)
 #else
 #define __tag_shifted(tag)     0UL
index 14d0bc4..172d76f 100644 (file)
@@ -15,8 +15,6 @@
 
 #include <asm-generic/pgalloc.h>       /* for pte_{alloc,free}_one */
 
-#define check_pgt_cache()              do { } while (0)
-
 #define PGD_SIZE       (PTRS_PER_PGD * sizeof(pgd_t))
 
 #if CONFIG_PGTABLE_LEVELS > 2
index 9a21b84..8dc6c5c 100644 (file)
 #define PROT_DEFAULT           (_PROT_DEFAULT | PTE_MAYBE_NG)
 #define PROT_SECT_DEFAULT      (_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
 
-#define PROT_DEVICE_nGnRnE     (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE      (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL            (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_DEVICE_nGnRnE     (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE      (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL            (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
 
 #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_SECT_NORMAL       (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@@ -80,8 +80,9 @@
 #define PAGE_S2_DEVICE         __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
 
 #define PAGE_NONE              __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_SHARED            __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
-#define PAGE_SHARED_EXEC       __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
+/* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */
+#define PAGE_SHARED            __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
+#define PAGE_SHARED_EXEC       __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
 #define PAGE_READONLY          __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
 #define PAGE_READONLY_EXEC     __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
 #define PAGE_EXECONLY          __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
index 57427d1..8330810 100644 (file)
@@ -861,8 +861,6 @@ extern int kern_addr_valid(unsigned long addr);
 
 #include <asm-generic/pgtable.h>
 
-static inline void pgtable_cache_init(void) { }
-
 /*
  * On AArch64, the cache coherency is handled via the set_pte_at() function.
  */
@@ -878,9 +876,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 
 #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
 
-#define kc_vaddr_to_offset(v)  ((v) & ~PAGE_END)
-#define kc_offset_to_vaddr(o)  ((o) | PAGE_END)
-
 #ifdef CONFIG_ARM64_PA_BITS_52
 #define phys_to_ttbr(addr)     (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52)
 #else
index c67848c..5623685 100644 (file)
@@ -280,8 +280,6 @@ static inline void spin_lock_prefetch(const void *ptr)
                     "nop") : : "p" (ptr));
 }
 
-#define HAVE_ARCH_PICK_MMAP_LAYOUT
-
 extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
 extern void __init minsigstksz_setup(void);
 
index 972d196..6e919fa 100644 (file)
 #define SYS_FAR_EL1                    sys_reg(3, 0, 6, 0, 0)
 #define SYS_PAR_EL1                    sys_reg(3, 0, 7, 4, 0)
 
-#define SYS_PAR_EL1_F                  BIT(1)
+#define SYS_PAR_EL1_F                  BIT(0)
 #define SYS_PAR_EL1_FST                        GENMASK(6, 1)
 
 /*** Statistical Profiling Extension ***/
index a95d1fc..b76df82 100644 (file)
@@ -44,7 +44,7 @@ static inline void tlb_flush(struct mmu_gather *tlb)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
                                  unsigned long addr)
 {
-       pgtable_page_dtor(pte);
+       pgtable_pte_page_dtor(pte);
        tlb_remove_table(tlb, pte);
 }
 
index fb60a88..3fd8fd6 100644 (file)
@@ -20,7 +20,7 @@
 
 #define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
 
-#if __LINUX_ARM_ARCH__ >= 8
+#if __LINUX_ARM_ARCH__ >= 8 && defined(CONFIG_AS_DMB_ISHLD)
 #define aarch32_smp_mb()       dmb(ish)
 #define aarch32_smp_rmb()      dmb(ishld)
 #define aarch32_smp_wmb()      dmb(ishst)
diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
deleted file mode 100644 (file)
index 1f38bf3..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 ARM Limited
- */
-#ifndef __ASM_VDSO_DATAPAGE_H
-#define __ASM_VDSO_DATAPAGE_H
-
-#ifndef __ASSEMBLY__
-
-struct vdso_data {
-       __u64 cs_cycle_last;    /* Timebase at clocksource init */
-       __u64 raw_time_sec;     /* Raw time */
-       __u64 raw_time_nsec;
-       __u64 xtime_clock_sec;  /* Kernel time */
-       __u64 xtime_clock_nsec;
-       __u64 xtime_coarse_sec; /* Coarse time */
-       __u64 xtime_coarse_nsec;
-       __u64 wtm_clock_sec;    /* Wall to monotonic time */
-       __u64 wtm_clock_nsec;
-       __u32 tb_seq_count;     /* Timebase sequence counter */
-       /* cs_* members must be adjacent and in this order (ldp accesses) */
-       __u32 cs_mono_mult;     /* NTP-adjusted clocksource multiplier */
-       __u32 cs_shift;         /* Clocksource shift (mono = raw) */
-       __u32 cs_raw_mult;      /* Raw clocksource multiplier */
-       __u32 tz_minuteswest;   /* Whacky timezone stuff */
-       __u32 tz_dsttime;
-       __u32 use_syscall;
-       __u32 hrtimer_res;
-};
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __ASM_VDSO_DATAPAGE_H */
diff --git a/arch/arm64/include/asm/xen/xen-ops.h b/arch/arm64/include/asm/xen/xen-ops.h
deleted file mode 100644 (file)
index e6e7840..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_XEN_OPS_H
-#define _ASM_XEN_OPS_H
-
-void xen_efi_runtime_setup(void);
-
-#endif /* _ASM_XEN_OPS_H */
index 2ec09de..ca158be 100644 (file)
@@ -174,6 +174,9 @@ static void __init register_insn_emulation(struct insn_emulation_ops *ops)
        struct insn_emulation *insn;
 
        insn = kzalloc(sizeof(*insn), GFP_KERNEL);
+       if (!insn)
+               return;
+
        insn->ops = ops;
        insn->min = INSN_UNDEF;
 
@@ -233,6 +236,8 @@ static void __init register_insn_emulation_sysctl(void)
 
        insns_sysctl = kcalloc(nr_insn_emulated + 1, sizeof(*sysctl),
                               GFP_KERNEL);
+       if (!insns_sysctl)
+               return;
 
        raw_spin_lock_irqsave(&insn_emulation_lock, flags);
        list_for_each_entry(insn, &insn_emulation, node) {
index 1e43ba5..93f34b4 100644 (file)
@@ -12,6 +12,7 @@
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/cpufeature.h>
+#include <asm/smp_plat.h>
 
 static bool __maybe_unused
 is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
@@ -128,8 +129,8 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn,
        int cpu, slot = -1;
 
        /*
-        * enable_smccc_arch_workaround_1() passes NULL for the hyp_vecs
-        * start/end if we're a guest. Skip the hyp-vectors work.
+        * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if
+        * we're a guest. Skip the hyp-vectors work.
         */
        if (!hyp_vecs_start) {
                __this_cpu_write(bp_hardening_data.fn, fn);
@@ -488,6 +489,7 @@ static const struct midr_range arm64_ssb_cpus[] = {
        MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
        MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
        MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+       MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
        {},
 };
 
@@ -572,6 +574,7 @@ static const struct midr_range spectre_v2_safe_list[] = {
        MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
        MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
        MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+       MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
        { /* sentinel */ }
 };
 
@@ -623,6 +626,30 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
        return (need_wa > 0);
 }
 
+static const __maybe_unused struct midr_range tx2_family_cpus[] = {
+       MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
+       MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
+       {},
+};
+
+static bool __maybe_unused
+needs_tx2_tvm_workaround(const struct arm64_cpu_capabilities *entry,
+                        int scope)
+{
+       int i;
+
+       if (!is_affected_midr_range_list(entry, scope) ||
+           !is_hyp_mode_available())
+               return false;
+
+       for_each_possible_cpu(i) {
+               if (MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0) != 0)
+                       return true;
+       }
+
+       return false;
+}
+
 #ifdef CONFIG_HARDEN_EL2_VECTORS
 
 static const struct midr_range arm64_harden_el2_vectors[] = {
@@ -634,17 +661,23 @@ static const struct midr_range arm64_harden_el2_vectors[] = {
 #endif
 
 #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
-
-static const struct midr_range arm64_repeat_tlbi_cpus[] = {
+static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
 #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
-       MIDR_RANGE(MIDR_QCOM_FALKOR_V1, 0, 0, 0, 0),
+       {
+               ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0)
+       },
+       {
+               .midr_range.model = MIDR_QCOM_KRYO,
+               .matches = is_kryo_midr,
+       },
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_1286807
-       MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0),
+       {
+               ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0),
+       },
 #endif
        {},
 };
-
 #endif
 
 #ifdef CONFIG_CAVIUM_ERRATUM_27456
@@ -712,6 +745,33 @@ static const struct midr_range erratum_1418040_list[] = {
 };
 #endif
 
+#ifdef CONFIG_ARM64_ERRATUM_845719
+static const struct midr_range erratum_845719_list[] = {
+       /* Cortex-A53 r0p[01234] */
+       MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4),
+       /* Brahma-B53 r0p[0] */
+       MIDR_REV(MIDR_BRAHMA_B53, 0, 0),
+       {},
+};
+#endif
+
+#ifdef CONFIG_ARM64_ERRATUM_843419
+static const struct arm64_cpu_capabilities erratum_843419_list[] = {
+       {
+               /* Cortex-A53 r0p[01234] */
+               .matches = is_affected_midr_range,
+               ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4),
+               MIDR_FIXED(0x4, BIT(8)),
+       },
+       {
+               /* Brahma-B53 r0p[0] */
+               .matches = is_affected_midr_range,
+               ERRATA_MIDR_REV(MIDR_BRAHMA_B53, 0, 0),
+       },
+       {},
+};
+#endif
+
 const struct arm64_cpu_capabilities arm64_errata[] = {
 #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
        {
@@ -743,19 +803,18 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_843419
        {
-       /* Cortex-A53 r0p[01234] */
                .desc = "ARM erratum 843419",
                .capability = ARM64_WORKAROUND_843419,
-               ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4),
-               MIDR_FIXED(0x4, BIT(8)),
+               .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+               .matches = cpucap_multi_entry_cap_matches,
+               .match_list = erratum_843419_list,
        },
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_845719
        {
-       /* Cortex-A53 r0p[01234] */
                .desc = "ARM erratum 845719",
                .capability = ARM64_WORKAROUND_845719,
-               ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4),
+               ERRATA_MIDR_RANGE_LIST(erratum_845719_list),
        },
 #endif
 #ifdef CONFIG_CAVIUM_ERRATUM_23154
@@ -791,6 +850,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
        {
                .desc = "Qualcomm Technologies Falkor/Kryo erratum 1003",
                .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003,
+               .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
                .matches = cpucap_multi_entry_cap_matches,
                .match_list = qcom_erratum_1003_list,
        },
@@ -799,7 +859,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
        {
                .desc = "Qualcomm erratum 1009, ARM erratum 1286807",
                .capability = ARM64_WORKAROUND_REPEAT_TLBI,
-               ERRATA_MIDR_RANGE_LIST(arm64_repeat_tlbi_cpus),
+               .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+               .matches = cpucap_multi_entry_cap_matches,
+               .match_list = arm64_repeat_tlbi_list,
        },
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_858921
@@ -851,6 +913,19 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
                .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
                .matches = has_cortex_a76_erratum_1463225,
        },
+#endif
+#ifdef CONFIG_CAVIUM_TX2_ERRATUM_219
+       {
+               .desc = "Cavium ThunderX2 erratum 219 (KVM guest sysreg trapping)",
+               .capability = ARM64_WORKAROUND_CAVIUM_TX2_219_TVM,
+               ERRATA_MIDR_RANGE_LIST(tx2_family_cpus),
+               .matches = needs_tx2_tvm_workaround,
+       },
+       {
+               .desc = "Cavium ThunderX2 erratum 219 (PRFM removal)",
+               .capability = ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM,
+               ERRATA_MIDR_RANGE_LIST(tx2_family_cpus),
+       },
 #endif
        {
        }
index 9323bcc..80f459a 100644 (file)
@@ -136,6 +136,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
 
 static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
                       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPI_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
@@ -175,11 +176,16 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
 
index 84a8227..cf3bd29 100644 (file)
@@ -604,7 +604,7 @@ el1_da:
         */
        mrs     x3, far_el1
        inherit_daif    pstate=x23, tmp=x2
-       clear_address_tag x0, x3
+       untagged_addr   x0, x3
        mov     x2, sp                          // struct pt_regs
        bl      do_mem_abort
 
@@ -680,7 +680,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
        orr     x24, x24, x0
 alternative_else_nop_endif
        cbnz    x24, 1f                         // preempt count != 0 || NMI return path
-       bl      preempt_schedule_irq            // irq en/disable is done inside
+       bl      arm64_preempt_schedule_irq      // irq en/disable is done inside
 1:
 #endif
 
@@ -775,6 +775,7 @@ el0_sync_compat:
        b.ge    el0_dbg
        b       el0_inv
 el0_svc_compat:
+       gic_prio_kentry_setup tmp=x1
        mov     x0, sp
        bl      el0_svc_compat_handler
        b       ret_to_user
@@ -807,7 +808,7 @@ el0_da:
        mrs     x26, far_el1
        ct_user_exit_irqoff
        enable_daif
-       clear_address_tag x0, x26
+       untagged_addr   x0, x26
        mov     x1, x25
        mov     x2, sp
        bl      do_mem_abort
@@ -1070,7 +1071,9 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 #else
        ldr     x30, =vectors
 #endif
+alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM
        prfm    plil1strm, [x30, #(1b - tramp_vectors)]
+alternative_else_nop_endif
        msr     vbar_el1, x30
        add     x30, x30, #(1b - tramp_vectors)
        isb
index 1717732..06e56b4 100644 (file)
@@ -121,10 +121,16 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 
                        /*
                         * Ensure updated trampoline is visible to instruction
-                        * fetch before we patch in the branch.
+                        * fetch before we patch in the branch. Although the
+                        * architecture doesn't require an IPI in this case,
+                        * Neoverse-N1 erratum #1542419 does require one
+                        * if the TLB maintenance in module_enable_ro() is
+                        * skipped due to rodata_enabled. It doesn't seem worth
+                        * it to make it conditional given that this is
+                        * certainly not a fast-path.
                         */
-                       __flush_icache_range((unsigned long)&dst[0],
-                                            (unsigned long)&dst[1]);
+                       flush_icache_range((unsigned long)&dst[0],
+                                          (unsigned long)&dst[1]);
                }
                addr = (unsigned long)dst;
 #else /* CONFIG_ARM64_MODULE_PLTS */
index e0a7fce..a96b292 100644 (file)
@@ -201,6 +201,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
                                 gfp_t mask)
 {
        int rc = 0;
+       pgd_t *trans_pgd;
        pgd_t *pgdp;
        pud_t *pudp;
        pmd_t *pmdp;
@@ -215,7 +216,13 @@ static int create_safe_exec_page(void *src_start, size_t length,
        memcpy((void *)dst, src_start, length);
        __flush_icache_range(dst, dst + length);
 
-       pgdp = pgd_offset_raw(allocator(mask), dst_addr);
+       trans_pgd = allocator(mask);
+       if (!trans_pgd) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       pgdp = pgd_offset_raw(trans_pgd, dst_addr);
        if (pgd_none(READ_ONCE(*pgdp))) {
                pudp = allocator(mask);
                if (!pudp) {
index 03689c0..71f788c 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
+#include <linux/lockdep.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
 #include <linux/sysctl.h>
@@ -44,6 +45,7 @@
 #include <asm/alternative.h>
 #include <asm/arch_gicv3.h>
 #include <asm/compat.h>
+#include <asm/cpufeature.h>
 #include <asm/cacheflush.h>
 #include <asm/exec.h>
 #include <asm/fpsimd.h>
@@ -332,22 +334,27 @@ void arch_release_task_struct(struct task_struct *tsk)
        fpsimd_release_task(tsk);
 }
 
-/*
- * src and dst may temporarily have aliased sve_state after task_struct
- * is copied.  We cannot fix this properly here, because src may have
- * live SVE state and dst's thread_info may not exist yet, so tweaking
- * either src's or dst's TIF_SVE is not safe.
- *
- * The unaliasing is done in copy_thread() instead.  This works because
- * dst is not schedulable or traceable until both of these functions
- * have been called.
- */
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
        if (current->mm)
                fpsimd_preserve_current_state();
        *dst = *src;
 
+       /* We rely on the above assignment to initialize dst's thread_flags: */
+       BUILD_BUG_ON(!IS_ENABLED(CONFIG_THREAD_INFO_IN_TASK));
+
+       /*
+        * Detach src's sve_state (if any) from dst so that it does not
+        * get erroneously used or freed prematurely.  dst's sve_state
+        * will be allocated on demand later on if dst uses SVE.
+        * For consistency, also clear TIF_SVE here: this could be done
+        * later in copy_process(), but to avoid tripping up future
+        * maintainers it is best not to leave TIF_SVE and sve_state in
+        * an inconsistent state, even temporarily.
+        */
+       dst->thread.sve_state = NULL;
+       clear_tsk_thread_flag(dst, TIF_SVE);
+
        return 0;
 }
 
@@ -360,13 +367,6 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 
        memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
 
-       /*
-        * Unalias p->thread.sve_state (if any) from the parent task
-        * and disable discard SVE state for p:
-        */
-       clear_tsk_thread_flag(p, TIF_SVE);
-       p->thread.sve_state = NULL;
-
        /*
         * In case p was allocated the same task_struct pointer as some
         * other recently-exited task, make sure p is disassociated from
@@ -557,14 +557,6 @@ unsigned long arch_align_stack(unsigned long sp)
        return sp & ~0xf;
 }
 
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-       if (is_compat_task())
-               return randomize_page(mm->brk, SZ_32M);
-       else
-               return randomize_page(mm->brk, SZ_1G);
-}
-
 /*
  * Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY.
  */
@@ -641,3 +633,19 @@ static int __init tagged_addr_init(void)
 
 core_initcall(tagged_addr_init);
 #endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */
+
+asmlinkage void __sched arm64_preempt_schedule_irq(void)
+{
+       lockdep_assert_irqs_disabled();
+
+       /*
+        * Preempting a task from an IRQ means we leave copies of PSTATE
+        * on the stack. cpufeature's enable calls may modify PSTATE, but
+        * resuming one of these preempted tasks would undo those changes.
+        *
+        * Only allow a task to be preempted once cpufeatures have been
+        * enabled.
+        */
+       if (static_branch_likely(&arm64_const_caps_ready))
+               preempt_schedule_irq();
+}
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
deleted file mode 100644 (file)
index e69de29..0000000
index 1fba077..76b327f 100644 (file)
@@ -8,15 +8,21 @@
 ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32
 include $(srctree)/lib/vdso/Makefile
 
-COMPATCC := $(CROSS_COMPILE_COMPAT)gcc
+# Same as cc-*option, but using CC_COMPAT instead of CC
+ifeq ($(CONFIG_CC_IS_CLANG), y)
+CC_COMPAT ?= $(CC)
+else
+CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc
+endif
 
-# Same as cc-*option, but using COMPATCC instead of CC
 cc32-option = $(call try-run,\
-        $(COMPATCC) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+        $(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
 cc32-disable-warning = $(call try-run,\
-       $(COMPATCC) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+       $(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
 cc32-ldoption = $(call try-run,\
-        $(COMPATCC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
+        $(CC_COMPAT) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
+cc32-as-instr = $(call try-run,\
+       printf "%b\n" "$(1)" | $(CC_COMPAT) $(VDSO_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
 
 # We cannot use the global flags to compile the vDSO files, the main reason
 # being that the 32-bit compiler may be older than the main (64-bit) compiler
@@ -25,22 +31,21 @@ cc32-ldoption = $(call try-run,\
 # arm64 one.
 # As a result we set our own flags here.
 
-# From top-level Makefile
-# NOSTDINC_FLAGS
-VDSO_CPPFLAGS := -nostdinc -isystem $(shell $(COMPATCC) -print-file-name=include)
+# KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile
+VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
 VDSO_CPPFLAGS += $(LINUXINCLUDE)
-VDSO_CPPFLAGS += $(KBUILD_CPPFLAGS)
 
 # Common C and assembly flags
 # From top-level Makefile
 VDSO_CAFLAGS := $(VDSO_CPPFLAGS)
+ifneq ($(shell $(CC_COMPAT) --version 2>&1 | head -n 1 | grep clang),)
+VDSO_CAFLAGS += --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%))
+endif
+
 VDSO_CAFLAGS += $(call cc32-option,-fno-PIE)
 ifdef CONFIG_DEBUG_INFO
 VDSO_CAFLAGS += -g
 endif
-ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(COMPATCC)), y)
-VDSO_CAFLAGS += -DCC_HAVE_ASM_GOTO
-endif
 
 # From arm Makefile
 VDSO_CAFLAGS += $(call cc32-option,-fno-dwarf2-cfi-asm)
@@ -55,6 +60,7 @@ endif
 VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector
 VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING
 
+
 # Try to compile for ARMv8. If the compiler is too old and doesn't support it,
 # fall back to v7. There is no easy way to check for what architecture the code
 # is being compiled, so define a macro specifying that (see arch/arm/Makefile).
@@ -91,6 +97,12 @@ VDSO_CFLAGS += -Wno-int-to-pointer-cast
 VDSO_AFLAGS := $(VDSO_CAFLAGS)
 VDSO_AFLAGS += -D__ASSEMBLY__
 
+# Check for binutils support for dmb ishld
+dmbinstr := $(call cc32-as-instr,dmb ishld,-DCONFIG_AS_DMB_ISHLD=1)
+
+VDSO_CFLAGS += $(dmbinstr)
+VDSO_AFLAGS += $(dmbinstr)
+
 VDSO_LDFLAGS := $(VDSO_CPPFLAGS)
 # From arm vDSO Makefile
 VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
@@ -159,14 +171,14 @@ quiet_cmd_vdsold_and_vdso_check = LD32    $@
       cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check)
 
 quiet_cmd_vdsold = LD32    $@
-      cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
+      cmd_vdsold = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
                    -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
 quiet_cmd_vdsocc = CC32    $@
-      cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
+      cmd_vdsocc = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
 quiet_cmd_vdsocc_gettimeofday = CC32    $@
-      cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $<
+      cmd_vdsocc_gettimeofday = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $<
 quiet_cmd_vdsoas = AS32    $@
-      cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $<
+      cmd_vdsoas = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $<
 
 quiet_cmd_vdsomunge = MUNGE   $@
       cmd_vdsomunge = $(obj)/$(munge) $< $@
index bd978ad..799e84a 100644 (file)
@@ -124,6 +124,9 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
 {
        u64 hcr = vcpu->arch.hcr_el2;
 
+       if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
+               hcr |= HCR_TVM;
+
        write_sysreg(hcr, hcr_el2);
 
        if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
@@ -174,8 +177,10 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
         * the crucial bit is "On taking a vSError interrupt,
         * HCR_EL2.VSE is cleared to 0."
         */
-       if (vcpu->arch.hcr_el2 & HCR_VSE)
-               vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
+       if (vcpu->arch.hcr_el2 & HCR_VSE) {
+               vcpu->arch.hcr_el2 &= ~HCR_VSE;
+               vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
+       }
 
        if (has_vhe())
                deactivate_traps_vhe();
@@ -229,20 +234,6 @@ static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
        }
 }
 
-static bool __hyp_text __true_value(void)
-{
-       return true;
-}
-
-static bool __hyp_text __false_value(void)
-{
-       return false;
-}
-
-static hyp_alternate_select(__check_arm_834220,
-                           __false_value, __true_value,
-                           ARM64_WORKAROUND_834220);
-
 static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
 {
        u64 par, tmp;
@@ -298,7 +289,8 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
         * resolve the IPA using the AT instruction.
         */
        if (!(esr & ESR_ELx_S1PTW) &&
-           (__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
+           (cpus_have_const_cap(ARM64_WORKAROUND_834220) ||
+            (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
                if (!__translate_far_to_hpfar(far, &hpfar))
                        return false;
        } else {
@@ -393,6 +385,61 @@ static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
        return true;
 }
 
+static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu)
+{
+       u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu));
+       int rt = kvm_vcpu_sys_get_rt(vcpu);
+       u64 val = vcpu_get_reg(vcpu, rt);
+
+       /*
+        * The normal sysreg handling code expects to see the traps,
+        * let's not do anything here.
+        */
+       if (vcpu->arch.hcr_el2 & HCR_TVM)
+               return false;
+
+       switch (sysreg) {
+       case SYS_SCTLR_EL1:
+               write_sysreg_el1(val, SYS_SCTLR);
+               break;
+       case SYS_TTBR0_EL1:
+               write_sysreg_el1(val, SYS_TTBR0);
+               break;
+       case SYS_TTBR1_EL1:
+               write_sysreg_el1(val, SYS_TTBR1);
+               break;
+       case SYS_TCR_EL1:
+               write_sysreg_el1(val, SYS_TCR);
+               break;
+       case SYS_ESR_EL1:
+               write_sysreg_el1(val, SYS_ESR);
+               break;
+       case SYS_FAR_EL1:
+               write_sysreg_el1(val, SYS_FAR);
+               break;
+       case SYS_AFSR0_EL1:
+               write_sysreg_el1(val, SYS_AFSR0);
+               break;
+       case SYS_AFSR1_EL1:
+               write_sysreg_el1(val, SYS_AFSR1);
+               break;
+       case SYS_MAIR_EL1:
+               write_sysreg_el1(val, SYS_MAIR);
+               break;
+       case SYS_AMAIR_EL1:
+               write_sysreg_el1(val, SYS_AMAIR);
+               break;
+       case SYS_CONTEXTIDR_EL1:
+               write_sysreg_el1(val, SYS_CONTEXTIDR);
+               break;
+       default:
+               return false;
+       }
+
+       __kvm_skip_instr(vcpu);
+       return true;
+}
+
 /*
  * Return true when we were able to fixup the guest exit and should return to
  * the guest, false when we should restore the host state and return to the
@@ -412,6 +459,11 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
        if (*exit_code != ARM_EXCEPTION_TRAP)
                goto exit;
 
+       if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
+           kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
+           handle_tx2_tvm(vcpu))
+               return true;
+
        /*
         * We trap the first access to the FP/SIMD to save the host context
         * and restore the guest context lazily.
index c466060..eb0efc5 100644 (file)
@@ -67,10 +67,14 @@ static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
        isb();
 }
 
-static hyp_alternate_select(__tlb_switch_to_guest,
-                           __tlb_switch_to_guest_nvhe,
-                           __tlb_switch_to_guest_vhe,
-                           ARM64_HAS_VIRT_HOST_EXTN);
+static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm,
+                                            struct tlb_inv_context *cxt)
+{
+       if (has_vhe())
+               __tlb_switch_to_guest_vhe(kvm, cxt);
+       else
+               __tlb_switch_to_guest_nvhe(kvm, cxt);
+}
 
 static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
                                                struct tlb_inv_context *cxt)
@@ -98,10 +102,14 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm,
        write_sysreg(0, vttbr_el2);
 }
 
-static hyp_alternate_select(__tlb_switch_to_host,
-                           __tlb_switch_to_host_nvhe,
-                           __tlb_switch_to_host_vhe,
-                           ARM64_HAS_VIRT_HOST_EXTN);
+static void __hyp_text __tlb_switch_to_host(struct kvm *kvm,
+                                           struct tlb_inv_context *cxt)
+{
+       if (has_vhe())
+               __tlb_switch_to_host_vhe(kvm, cxt);
+       else
+               __tlb_switch_to_host_nvhe(kvm, cxt);
+}
 
 void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
@@ -111,7 +119,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 
        /* Switch to requested VMID */
        kvm = kern_hyp_va(kvm);
-       __tlb_switch_to_guest()(kvm, &cxt);
+       __tlb_switch_to_guest(kvm, &cxt);
 
        /*
         * We could do so much better if we had the VA as well.
@@ -154,7 +162,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
        if (!has_vhe() && icache_is_vpipt())
                __flush_icache_all();
 
-       __tlb_switch_to_host()(kvm, &cxt);
+       __tlb_switch_to_host(kvm, &cxt);
 }
 
 void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
@@ -165,13 +173,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
 
        /* Switch to requested VMID */
        kvm = kern_hyp_va(kvm);
-       __tlb_switch_to_guest()(kvm, &cxt);
+       __tlb_switch_to_guest(kvm, &cxt);
 
        __tlbi(vmalls12e1is);
        dsb(ish);
        isb();
 
-       __tlb_switch_to_host()(kvm, &cxt);
+       __tlb_switch_to_host(kvm, &cxt);
 }
 
 void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
@@ -180,13 +188,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
        struct tlb_inv_context cxt;
 
        /* Switch to requested VMID */
-       __tlb_switch_to_guest()(kvm, &cxt);
+       __tlb_switch_to_guest(kvm, &cxt);
 
        __tlbi(vmalle1);
        dsb(nsh);
        isb();
 
-       __tlb_switch_to_host()(kvm, &cxt);
+       __tlb_switch_to_host(kvm, &cxt);
 }
 
 void __hyp_text __kvm_flush_vm_context(void)
index 2071260..46822af 100644 (file)
@@ -632,6 +632,8 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
         */
        val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
               | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
+       if (!system_supports_32bit_el0())
+               val |= ARMV8_PMU_PMCR_LC;
        __vcpu_sys_reg(vcpu, r->reg) = val;
 }
 
@@ -682,6 +684,8 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
                val = __vcpu_sys_reg(vcpu, PMCR_EL0);
                val &= ~ARMV8_PMU_PMCR_MASK;
                val |= p->regval & ARMV8_PMU_PMCR_MASK;
+               if (!system_supports_32bit_el0())
+                       val |= ARMV8_PMU_PMCR_LC;
                __vcpu_sys_reg(vcpu, PMCR_EL0) = val;
                kvm_pmu_handle_pmcr(vcpu, val);
                kvm_vcpu_pmu_restore_guest(vcpu);
index 115d7a0..9fc6db0 100644 (file)
@@ -113,6 +113,15 @@ static inline bool is_ttbr1_addr(unsigned long addr)
        return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
 }
 
+static inline unsigned long mm_to_pgd_phys(struct mm_struct *mm)
+{
+       /* Either init_pg_dir or swapper_pg_dir */
+       if (mm == &init_mm)
+               return __pa_symbol(mm->pgd);
+
+       return (unsigned long)virt_to_phys(mm->pgd);
+}
+
 /*
  * Dump out the page tables associated with 'addr' in the currently active mm.
  */
@@ -141,7 +150,7 @@ static void show_pte(unsigned long addr)
 
        pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n",
                 mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
-                vabits_actual, (unsigned long)virt_to_phys(mm->pgd));
+                vabits_actual, mm_to_pgd_phys(mm));
        pgdp = pgd_offset(mm, addr);
        pgd = READ_ONCE(*pgdp);
        pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
@@ -259,14 +268,18 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
        par = read_sysreg(par_el1);
        local_irq_restore(flags);
 
+       /*
+        * If we now have a valid translation, treat the translation fault as
+        * spurious.
+        */
        if (!(par & SYS_PAR_EL1_F))
-               return false;
+               return true;
 
        /*
         * If we got a different type of fault from the AT instruction,
         * treat the translation fault as spurious.
         */
-       dfsc = FIELD_PREP(SYS_PAR_EL1_FST, par);
+       dfsc = FIELD_GET(SYS_PAR_EL1_FST, par);
        return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT;
 }
 
index dc19300..ac48516 100644 (file)
@@ -56,8 +56,7 @@ void __sync_icache_dcache(pte_t pte)
        struct page *page = pte_page(pte);
 
        if (!test_and_set_bit(PG_dcache_clean, &page->flags))
-               sync_icache_aliases(page_address(page),
-                                   PAGE_SIZE << compound_order(page));
+               sync_icache_aliases(page_address(page), page_size(page));
 }
 EXPORT_SYMBOL_GPL(__sync_icache_dcache);
 
index b050641..3028bac 100644 (file)
 
 #include <asm/cputype.h>
 
-/*
- * Leave enough space between the mmap area and the stack to honour ulimit in
- * the face of randomisation.
- */
-#define MIN_GAP (SZ_128M)
-#define MAX_GAP        (STACK_TOP/6*5)
-
-static int mmap_is_legacy(struct rlimit *rlim_stack)
-{
-       if (current->personality & ADDR_COMPAT_LAYOUT)
-               return 1;
-
-       if (rlim_stack->rlim_cur == RLIM_INFINITY)
-               return 1;
-
-       return sysctl_legacy_va_layout;
-}
-
-unsigned long arch_mmap_rnd(void)
-{
-       unsigned long rnd;
-
-#ifdef CONFIG_COMPAT
-       if (test_thread_flag(TIF_32BIT))
-               rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
-       else
-#endif
-               rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
-       return rnd << PAGE_SHIFT;
-}
-
-static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
-{
-       unsigned long gap = rlim_stack->rlim_cur;
-       unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
-
-       /* Values close to RLIM_INFINITY can overflow. */
-       if (gap + pad > gap)
-               gap += pad;
-
-       if (gap < MIN_GAP)
-               gap = MIN_GAP;
-       else if (gap > MAX_GAP)
-               gap = MAX_GAP;
-
-       return PAGE_ALIGN(STACK_TOP - gap - rnd);
-}
-
-/*
- * This function, called very early during the creation of a new process VM
- * image, sets up which VM layout function to use:
- */
-void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
-{
-       unsigned long random_factor = 0UL;
-
-       if (current->flags & PF_RANDOMIZE)
-               random_factor = arch_mmap_rnd();
-
-       /*
-        * Fall back to the standard layout if the personality bit is set, or
-        * if the expected stack growth is unlimited:
-        */
-       if (mmap_is_legacy(rlim_stack)) {
-               mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
-               mm->get_unmapped_area = arch_get_unmapped_area;
-       } else {
-               mm->mmap_base = mmap_base(random_factor, rlim_stack);
-               mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-       }
-}
-
 /*
  * You really shouldn't be using read() or write() on /dev/mem.  This might go
  * away in the future.
index 53dc6f2..60c929f 100644 (file)
@@ -384,7 +384,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift)
         * folded, and if so pgtable_pmd_page_ctor() becomes nop.
         */
        if (shift == PAGE_SHIFT)
-               BUG_ON(!pgtable_page_ctor(phys_to_page(pa)));
+               BUG_ON(!pgtable_pte_page_ctor(phys_to_page(pa)));
        else if (shift == PMD_SHIFT)
                BUG_ON(!pgtable_pmd_page_ctor(phys_to_page(pa)));
 
index 7548f9c..4a64089 100644 (file)
@@ -35,7 +35,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
                kmem_cache_free(pgd_cache, pgd);
 }
 
-void __init pgd_cache_init(void)
+void __init pgtable_cache_init(void)
 {
        if (PGD_SIZE == PAGE_SIZE)
                return;
index a4fc65f..b66215e 100644 (file)
@@ -1,4 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 xen-arm-y      += $(addprefix ../../arm/xen/, enlighten.o grant-table.o p2m.o mm.o)
 obj-y          := xen-arm.o hypercall.o
-obj-$(CONFIG_XEN_EFI) += $(addprefix ../../arm/xen/, efi.o)
index 0bd8059..0b6919c 100644 (file)
@@ -59,11 +59,6 @@ extern unsigned long empty_zero_page;
 
 #define swapper_pg_dir ((pgd_t *) 0)
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 /*
  * c6x is !MMU, so define the simpliest implementation
  */
index 27ef5b2..cb2a0d9 100644 (file)
@@ -5,8 +5,10 @@
 #include <linux/uaccess.h>
 #include <linux/ptrace.h>
 
-static int align_enable = 1;
-static int align_count;
+static int align_kern_enable = 1;
+static int align_usr_enable = 1;
+static int align_kern_count = 0;
+static int align_usr_count = 0;
 
 static inline uint32_t get_ptreg(struct pt_regs *regs, uint32_t rx)
 {
@@ -32,9 +34,6 @@ static int ldb_asm(uint32_t addr, uint32_t *valp)
        uint32_t val;
        int err;
 
-       if (!access_ok((void *)addr, 1))
-               return 1;
-
        asm volatile (
                "movi   %0, 0\n"
                "1:\n"
@@ -67,9 +66,6 @@ static int stb_asm(uint32_t addr, uint32_t val)
 {
        int err;
 
-       if (!access_ok((void *)addr, 1))
-               return 1;
-
        asm volatile (
                "movi   %0, 0\n"
                "1:\n"
@@ -203,8 +199,6 @@ static int stw_c(struct pt_regs *regs, uint32_t rz, uint32_t addr)
        if (stb_asm(addr, byte3))
                return 1;
 
-       align_count++;
-
        return 0;
 }
 
@@ -226,7 +220,14 @@ void csky_alignment(struct pt_regs *regs)
        uint32_t addr   = 0;
 
        if (!user_mode(regs))
+               goto kernel_area;
+
+       if (!align_usr_enable) {
+               pr_err("%s user disabled.\n", __func__);
                goto bad_area;
+       }
+
+       align_usr_count++;
 
        ret = get_user(tmp, (uint16_t *)instruction_pointer(regs));
        if (ret) {
@@ -234,6 +235,19 @@ void csky_alignment(struct pt_regs *regs)
                goto bad_area;
        }
 
+       goto good_area;
+
+kernel_area:
+       if (!align_kern_enable) {
+               pr_err("%s kernel disabled.\n", __func__);
+               goto bad_area;
+       }
+
+       align_kern_count++;
+
+       tmp = *(uint16_t *)instruction_pointer(regs);
+
+good_area:
        opcode = (uint32_t)tmp;
 
        rx  = opcode & 0xf;
@@ -286,18 +300,32 @@ bad_area:
        force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr);
 }
 
-static struct ctl_table alignment_tbl[4] = {
+static struct ctl_table alignment_tbl[5] = {
+       {
+               .procname = "kernel_enable",
+               .data = &align_kern_enable,
+               .maxlen = sizeof(align_kern_enable),
+               .mode = 0666,
+               .proc_handler = &proc_dointvec
+       },
+       {
+               .procname = "user_enable",
+               .data = &align_usr_enable,
+               .maxlen = sizeof(align_usr_enable),
+               .mode = 0666,
+               .proc_handler = &proc_dointvec
+       },
        {
-               .procname = "enable",
-               .data = &align_enable,
-               .maxlen = sizeof(align_enable),
+               .procname = "kernel_count",
+               .data = &align_kern_count,
+               .maxlen = sizeof(align_kern_count),
                .mode = 0666,
                .proc_handler = &proc_dointvec
        },
        {
-               .procname = "count",
-               .data = &align_count,
-               .maxlen = sizeof(align_count),
+               .procname = "user_count",
+               .data = &align_usr_count,
+               .maxlen = sizeof(align_usr_count),
                .mode = 0666,
                .proc_handler = &proc_dointvec
        },
index 10af8b6..9f1fe80 100644 (file)
 #include <asm/cacheflush.h>
 #include <asm/cachectl.h>
 
+#define PG_dcache_clean                PG_arch_1
+
 void flush_dcache_page(struct page *page)
 {
-       struct address_space *mapping = page_mapping(page);
-       unsigned long addr;
+       struct address_space *mapping;
 
-       if (mapping && !mapping_mapped(mapping)) {
-               set_bit(PG_arch_1, &(page)->flags);
+       if (page == ZERO_PAGE(0))
                return;
-       }
 
-       /*
-        * We could delay the flush for the !page_mapping case too.  But that
-        * case is for exec env/arg pages and those are %99 certainly going to
-        * get faulted into the tlb (and thus flushed) anyways.
-        */
-       addr = (unsigned long) page_address(page);
-       dcache_wb_range(addr, addr + PAGE_SIZE);
+       mapping = page_mapping_file(page);
+
+       if (mapping && !page_mapcount(page))
+               clear_bit(PG_dcache_clean, &page->flags);
+       else {
+               dcache_wbinv_all();
+               if (mapping)
+                       icache_inv_all();
+               set_bit(PG_dcache_clean, &page->flags);
+       }
 }
+EXPORT_SYMBOL(flush_dcache_page);
 
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
-                     pte_t *pte)
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
+       pte_t *ptep)
 {
-       unsigned long addr;
+       unsigned long pfn = pte_pfn(*ptep);
        struct page *page;
-       unsigned long pfn;
 
-       pfn = pte_pfn(*pte);
-       if (unlikely(!pfn_valid(pfn)))
+       if (!pfn_valid(pfn))
                return;
 
        page = pfn_to_page(pfn);
-       addr = (unsigned long) page_address(page);
+       if (page == ZERO_PAGE(0))
+               return;
+
+       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+               dcache_wbinv_all();
 
-       if (vma->vm_flags & VM_EXEC ||
-           pages_do_alias(addr, address & PAGE_MASK))
-               cache_wbinv_all();
+       if (page_mapping_file(page)) {
+               if (vma->vm_flags & VM_EXEC)
+                       icache_inv_all();
+       }
+}
+
+void flush_kernel_dcache_page(struct page *page)
+{
+       struct address_space *mapping;
+
+       mapping = page_mapping_file(page);
+
+       if (!mapping || mapping_mapped(mapping))
+               dcache_wbinv_all();
+}
+EXPORT_SYMBOL(flush_kernel_dcache_page);
+
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+       unsigned long end)
+{
+       dcache_wbinv_all();
 
-       clear_bit(PG_arch_1, &(page)->flags);
+       if (vma->vm_flags & VM_EXEC)
+               icache_inv_all();
 }
index 5f663ae..79ef9e8 100644 (file)
@@ -4,46 +4,63 @@
 #ifndef __ABI_CSKY_CACHEFLUSH_H
 #define __ABI_CSKY_CACHEFLUSH_H
 
-#include <linux/compiler.h>
+#include <linux/mm.h>
 #include <asm/string.h>
 #include <asm/cache.h>
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
-#define flush_cache_mm(mm)                     cache_wbinv_all()
+#define flush_cache_mm(mm)                     dcache_wbinv_all()
 #define flush_cache_page(vma, page, pfn)       cache_wbinv_all()
 #define flush_cache_dup_mm(mm)                 cache_wbinv_all()
 
+#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+extern void flush_kernel_dcache_page(struct page *);
+
+#define flush_dcache_mmap_lock(mapping)                xa_lock_irq(&mapping->i_pages)
+#define flush_dcache_mmap_unlock(mapping)      xa_unlock_irq(&mapping->i_pages)
+
+static inline void flush_kernel_vmap_range(void *addr, int size)
+{
+       dcache_wbinv_all();
+}
+static inline void invalidate_kernel_vmap_range(void *addr, int size)
+{
+       dcache_wbinv_all();
+}
+
+#define ARCH_HAS_FLUSH_ANON_PAGE
+static inline void flush_anon_page(struct vm_area_struct *vma,
+                        struct page *page, unsigned long vmaddr)
+{
+       if (PageAnon(page))
+               cache_wbinv_all();
+}
+
 /*
  * if (current_mm != vma->mm) cache_wbinv_range(start, end) will be broken.
  * Use cache_wbinv_all() here and need to be improved in future.
  */
-#define flush_cache_range(vma, start, end)     cache_wbinv_all()
-#define flush_cache_vmap(start, end)           cache_wbinv_range(start, end)
-#define flush_cache_vunmap(start, end)         cache_wbinv_range(start, end)
+extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+#define flush_cache_vmap(start, end)           cache_wbinv_all()
+#define flush_cache_vunmap(start, end)         cache_wbinv_all()
 
-#define flush_icache_page(vma, page)           cache_wbinv_all()
+#define flush_icache_page(vma, page)           do {} while (0);
 #define flush_icache_range(start, end)         cache_wbinv_range(start, end)
 
-#define flush_icache_user_range(vma, pg, adr, len) \
-                               cache_wbinv_range(adr, adr + len)
+#define flush_icache_user_range(vma,page,addr,len) \
+       flush_dcache_page(page)
 
 #define copy_from_user_page(vma, page, vaddr, dst, src, len) \
 do { \
-       cache_wbinv_all(); \
        memcpy(dst, src, len); \
-       cache_wbinv_all(); \
 } while (0)
 
 #define copy_to_user_page(vma, page, vaddr, dst, src, len) \
 do { \
-       cache_wbinv_all(); \
        memcpy(dst, src, len); \
        cache_wbinv_all(); \
 } while (0)
 
-#define flush_dcache_mmap_lock(mapping)                do {} while (0)
-#define flush_dcache_mmap_unlock(mapping)      do {} while (0)
-
 #endif /* __ABI_CSKY_CACHEFLUSH_H */
index 6336e92..c864519 100644 (file)
@@ -1,13 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
-extern unsigned long shm_align_mask;
+#include <asm/shmparam.h>
+
 extern void flush_dcache_page(struct page *page);
 
 static inline unsigned long pages_do_alias(unsigned long addr1,
                                           unsigned long addr2)
 {
-       return (addr1 ^ addr2) & shm_align_mask;
+       return (addr1 ^ addr2) & (SHMLBA-1);
 }
 
 static inline void clear_user_page(void *addr, unsigned long vaddr,
index b462fd5..6792aca 100644 (file)
@@ -9,58 +9,63 @@
 #include <linux/random.h>
 #include <linux/io.h>
 
-unsigned long shm_align_mask = (0x4000 >> 1) - 1;   /* Sane caches */
+#define COLOUR_ALIGN(addr,pgoff)               \
+       ((((addr)+SHMLBA-1)&~(SHMLBA-1)) +      \
+        (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1)))
 
-#define COLOUR_ALIGN(addr, pgoff) \
-       ((((addr) + shm_align_mask) & ~shm_align_mask) + \
-        (((pgoff) << PAGE_SHIFT) & shm_align_mask))
-
-unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+/*
+ * We need to ensure that shared mappings are correctly aligned to
+ * avoid aliasing issues with VIPT caches.  We need to ensure that
+ * a specific page of an object is always mapped at a multiple of
+ * SHMLBA bytes.
+ *
+ * We unconditionally provide this function for all cases.
+ */
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
                unsigned long len, unsigned long pgoff, unsigned long flags)
 {
-       struct vm_area_struct *vmm;
-       int do_color_align;
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       int do_align = 0;
+       struct vm_unmapped_area_info info;
+
+       /*
+        * We only need to do colour alignment if either the I or D
+        * caches alias.
+        */
+       do_align = filp || (flags & MAP_SHARED);
 
+       /*
+        * We enforce the MAP_FIXED case.
+        */
        if (flags & MAP_FIXED) {
-               /*
-                * We do not accept a shared mapping if it would violate
-                * cache aliasing constraints.
-                */
-               if ((flags & MAP_SHARED) &&
-                       ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+               if (flags & MAP_SHARED &&
+                   (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))
                        return -EINVAL;
                return addr;
        }
 
        if (len > TASK_SIZE)
                return -ENOMEM;
-       do_color_align = 0;
-       if (filp || (flags & MAP_SHARED))
-               do_color_align = 1;
+
        if (addr) {
-               if (do_color_align)
+               if (do_align)
                        addr = COLOUR_ALIGN(addr, pgoff);
                else
                        addr = PAGE_ALIGN(addr);
-               vmm = find_vma(current->mm, addr);
+
+               vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr &&
-                               (!vmm || addr + len <= vmm->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
-       addr = TASK_UNMAPPED_BASE;
-       if (do_color_align)
-               addr = COLOUR_ALIGN(addr, pgoff);
-       else
-               addr = PAGE_ALIGN(addr);
 
-       for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
-               /* At this point: (!vmm || addr < vmm->vm_end). */
-               if (TASK_SIZE - len < addr)
-                       return -ENOMEM;
-               if (!vmm || addr + len <= vmm->vm_start)
-                       return addr;
-               addr = vmm->vm_end;
-               if (do_color_align)
-                       addr = COLOUR_ALIGN(addr, pgoff);
-       }
+       info.flags = 0;
+       info.length = len;
+       info.low_limit = mm->mmap_base;
+       info.high_limit = TASK_SIZE;
+       info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+       info.align_offset = pgoff << PAGE_SHIFT;
+       return vm_unmapped_area(&info);
 }
index 476eb78..a430e7f 100644 (file)
@@ -9,11 +9,12 @@
 #define nop()  asm volatile ("nop\n":::"memory")
 
 /*
- * sync:        completion barrier
- * sync.s:      completion barrier and shareable to other cores
- * sync.i:      completion barrier with flush cpu pipeline
- * sync.is:     completion barrier with flush cpu pipeline and shareable to
- *             other cores
+ * sync:        completion barrier, all sync.xx instructions
+ *              guarantee the last response recieved by bus transaction
+ *              made by ld/st instructions before sync.s
+ * sync.s:      inherit from sync, but also shareable to other cores
+ * sync.i:      inherit from sync, but also flush cpu pipeline
+ * sync.is:     the same with sync.i + sync.s
  *
  * bar.brwarw:  ordering barrier for all load/store instructions before it
  * bar.brwarws: ordering barrier for all load/store instructions before it
@@ -27,9 +28,7 @@
  */
 
 #ifdef CONFIG_CPU_HAS_CACHEV2
-#define mb()           asm volatile ("bar.brwarw\n":::"memory")
-#define rmb()          asm volatile ("bar.brar\n":::"memory")
-#define wmb()          asm volatile ("bar.bwaw\n":::"memory")
+#define mb()           asm volatile ("sync.s\n":::"memory")
 
 #ifdef CONFIG_SMP
 #define __smp_mb()     asm volatile ("bar.brwarws\n":::"memory")
index d683734..1d5fc2f 100644 (file)
@@ -24,6 +24,7 @@ void cache_wbinv_range(unsigned long start, unsigned long end);
 void cache_wbinv_all(void);
 
 void dma_wbinv_range(unsigned long start, unsigned long end);
+void dma_inv_range(unsigned long start, unsigned long end);
 void dma_wb_range(unsigned long start, unsigned long end);
 
 #endif
index c1dfa9c..80d071e 100644 (file)
@@ -4,17 +4,10 @@
 #ifndef __ASM_CSKY_IO_H
 #define __ASM_CSKY_IO_H
 
-#include <abi/pgtable-bits.h>
+#include <asm/pgtable.h>
 #include <linux/types.h>
 #include <linux/version.h>
 
-extern void __iomem *ioremap(phys_addr_t offset, size_t size);
-
-extern void iounmap(void *addr);
-
-extern int remap_area_pages(unsigned long address, phys_addr_t phys_addr,
-               size_t size, unsigned long flags);
-
 /*
  * I/O memory access primitives. Reads are ordered relative to any
  * following Normal memory access. Writes are ordered relative to any prior
@@ -40,9 +33,17 @@ extern int remap_area_pages(unsigned long address, phys_addr_t phys_addr,
 #define writel(v,c)            ({ wmb(); writel_relaxed((v),(c)); mb(); })
 #endif
 
-#define ioremap_nocache(phy, sz)       ioremap(phy, sz)
-#define ioremap_wc ioremap_nocache
-#define ioremap_wt ioremap_nocache
+/*
+ * I/O memory mapping functions.
+ */
+extern void __iomem *ioremap_cache(phys_addr_t addr, size_t size);
+extern void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot);
+extern void iounmap(void *addr);
+
+#define ioremap(addr, size)            __ioremap((addr), (size), pgprot_noncached(PAGE_KERNEL))
+#define ioremap_wc(addr, size)         __ioremap((addr), (size), pgprot_writecombine(PAGE_KERNEL))
+#define ioremap_nocache(addr, size)    ioremap((addr), (size))
+#define ioremap_cache                  ioremap_cache
 
 #include <asm-generic/io.h>
 
index 98c5716..c7c1ed2 100644 (file)
@@ -71,12 +71,10 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
 #define __pte_free_tlb(tlb, pte, address)              \
 do {                                                   \
-       pgtable_page_dtor(pte);                         \
+       pgtable_pte_page_dtor(pte);                     \
        tlb_remove_page(tlb, pte);                      \
 } while (0)
 
-#define check_pgt_cache()      do {} while (0)
-
 extern void pagetable_init(void);
 extern void pre_mmu_init(void);
 extern void pre_trap_init(void);
index c429a6f..7c21985 100644 (file)
@@ -258,6 +258,16 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot)
 {
        unsigned long prot = pgprot_val(_prot);
 
+       prot = (prot & ~_CACHE_MASK) | _CACHE_UNCACHED | _PAGE_SO;
+
+       return __pgprot(prot);
+}
+
+#define pgprot_writecombine pgprot_writecombine
+static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
+{
+       unsigned long prot = pgprot_val(_prot);
+
        prot = (prot & ~_CACHE_MASK) | _CACHE_UNCACHED;
 
        return __pgprot(prot);
@@ -296,11 +306,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
 #define kern_addr_valid(addr)  (1)
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do {} while (0)
-
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
        remap_pfn_range(vma, vaddr, pfn, size, prot)
 
index a7e84cc..a7a5b67 100644 (file)
 #define PTE_INDX_SHIFT  10
 #define _PGDIR_SHIFT    22
 
+.macro zero_fp
+#ifdef CONFIG_STACKTRACE
+       movi    r8, 0
+#endif
+.endm
+
 .macro tlbop_begin name, val0, val1, val2
 ENTRY(csky_\name)
        mtcr    a3, ss2
@@ -96,6 +102,7 @@ ENTRY(csky_\name)
        SAVE_ALL 0
 .endm
 .macro tlbop_end is_write
+       zero_fp
        RD_MEH  a2
        psrset  ee, ie
        mov     a0, sp
@@ -120,6 +127,7 @@ tlbop_end 1
 
 ENTRY(csky_systemcall)
        SAVE_ALL TRAP0_SIZE
+       zero_fp
 
        psrset  ee, ie
 
@@ -136,9 +144,9 @@ ENTRY(csky_systemcall)
        mov     r9, sp
        bmaski  r10, THREAD_SHIFT
        andn    r9, r10
-       ldw     r8, (r9, TINFO_FLAGS)
-       ANDI_R3 r8, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
-       cmpnei  r8, 0
+       ldw     r12, (r9, TINFO_FLAGS)
+       ANDI_R3 r12, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
+       cmpnei  r12, 0
        bt      csky_syscall_trace
 #if defined(__CSKYABIV2__)
        subi    sp, 8
@@ -180,7 +188,7 @@ csky_syscall_trace:
 
 ENTRY(ret_from_kernel_thread)
        jbsr    schedule_tail
-       mov     a0, r8
+       mov     a0, r10
        jsr     r9
        jbsr    ret_from_exception
 
@@ -189,9 +197,9 @@ ENTRY(ret_from_fork)
        mov     r9, sp
        bmaski  r10, THREAD_SHIFT
        andn    r9, r10
-       ldw     r8, (r9, TINFO_FLAGS)
-       ANDI_R3 r8, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
-       cmpnei  r8, 0
+       ldw     r12, (r9, TINFO_FLAGS)
+       ANDI_R3 r12, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
+       cmpnei  r12, 0
        bf      ret_from_exception
        mov     a0, sp                  /* sp = pt_regs pointer */
        jbsr    syscall_trace_exit
@@ -209,9 +217,9 @@ ret_from_exception:
        bmaski  r10, THREAD_SHIFT
        andn    r9, r10
 
-       ldw     r8, (r9, TINFO_FLAGS)
-       andi    r8, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED)
-       cmpnei  r8, 0
+       ldw     r12, (r9, TINFO_FLAGS)
+       andi    r12, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED)
+       cmpnei  r12, 0
        bt      exit_work
 1:
        RESTORE_ALL
@@ -220,11 +228,11 @@ exit_work:
        lrw     syscallid, ret_from_exception
        mov     lr, syscallid
 
-       btsti   r8, TIF_NEED_RESCHED
+       btsti   r12, TIF_NEED_RESCHED
        bt      work_resched
 
        mov     a0, sp
-       mov     a1, r8
+       mov     a1, r12
        jmpi    do_notify_resume
 
 work_resched:
@@ -232,6 +240,7 @@ work_resched:
 
 ENTRY(csky_trap)
        SAVE_ALL 0
+       zero_fp
        psrset  ee
        mov     a0, sp                 /* Push Stack pointer arg */
        jbsr    trap_c                 /* Call C-level trap handler */
@@ -265,6 +274,7 @@ ENTRY(csky_get_tls)
 
 ENTRY(csky_irq)
        SAVE_ALL 0
+       zero_fp
        psrset  ee
 
 #ifdef CONFIG_PREEMPT
@@ -276,27 +286,23 @@ ENTRY(csky_irq)
         * Get task_struct->stack.preempt_count for current,
         * and increase 1.
         */
-       ldw     r8, (r9, TINFO_PREEMPT)
-       addi    r8, 1
-       stw     r8, (r9, TINFO_PREEMPT)
+       ldw     r12, (r9, TINFO_PREEMPT)
+       addi    r12, 1
+       stw     r12, (r9, TINFO_PREEMPT)
 #endif
 
        mov     a0, sp
        jbsr    csky_do_IRQ
 
 #ifdef CONFIG_PREEMPT
-       subi    r8, 1
-       stw     r8, (r9, TINFO_PREEMPT)
-       cmpnei  r8, 0
+       subi    r12, 1
+       stw     r12, (r9, TINFO_PREEMPT)
+       cmpnei  r12, 0
        bt      2f
-       ldw     r8, (r9, TINFO_FLAGS)
-       btsti   r8, TIF_NEED_RESCHED
+       ldw     r12, (r9, TINFO_FLAGS)
+       btsti   r12, TIF_NEED_RESCHED
        bf      2f
-1:
        jbsr    preempt_schedule_irq    /* irq en/disable is done inside */
-       ldw     r7, (r9, TINFO_FLAGS)   /* get new tasks TI_FLAGS */
-       btsti   r7, TIF_NEED_RESCHED
-       bt      1b                      /* go again */
 #endif
 2:
        jmpi    ret_from_exception
index 4c1a193..1a29f11 100644 (file)
@@ -1306,7 +1306,7 @@ int csky_pmu_device_probe(struct platform_device *pdev,
                                 &csky_pmu.count_width)) {
                csky_pmu.count_width = DEFAULT_COUNT_WIDTH;
        }
-       csky_pmu.max_period = BIT(csky_pmu.count_width) - 1;
+       csky_pmu.max_period = BIT_ULL(csky_pmu.count_width) - 1;
 
        csky_pmu.plat_device = pdev;
 
@@ -1337,7 +1337,7 @@ int csky_pmu_device_probe(struct platform_device *pdev,
        return ret;
 }
 
-const static struct of_device_id csky_pmu_of_device_ids[] = {
+static const struct of_device_id csky_pmu_of_device_ids[] = {
        {.compatible = "csky,csky-pmu"},
        {},
 };
index e555740..f320d92 100644 (file)
@@ -55,7 +55,7 @@ int copy_thread(unsigned long clone_flags,
        if (unlikely(p->flags & PF_KTHREAD)) {
                memset(childregs, 0, sizeof(struct pt_regs));
                childstack->r15 = (unsigned long) ret_from_kernel_thread;
-               childstack->r8 = kthread_arg;
+               childstack->r10 = kthread_arg;
                childstack->r9 = usp;
                childregs->sr = mfcr("psr");
        } else {
index b8a75cc..494ec91 100644 (file)
@@ -120,7 +120,12 @@ void dma_wbinv_range(unsigned long start, unsigned long end)
        cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1);
 }
 
+void dma_inv_range(unsigned long start, unsigned long end)
+{
+       cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1);
+}
+
 void dma_wb_range(unsigned long start, unsigned long end)
 {
-       cache_op_range(start, end, DATA_CACHE|CACHE_INV, 1);
+       cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1);
 }
index baaf05d..b61be65 100644 (file)
@@ -69,11 +69,20 @@ void dma_wbinv_range(unsigned long start, unsigned long end)
        sync_is();
 }
 
+void dma_inv_range(unsigned long start, unsigned long end)
+{
+       unsigned long i = start & ~(L1_CACHE_BYTES - 1);
+
+       for (; i < end; i += L1_CACHE_BYTES)
+               asm volatile("dcache.iva %0\n"::"r"(i):"memory");
+       sync_is();
+}
+
 void dma_wb_range(unsigned long start, unsigned long end)
 {
        unsigned long i = start & ~(L1_CACHE_BYTES - 1);
 
        for (; i < end; i += L1_CACHE_BYTES)
-               asm volatile("dcache.civa %0\n"::"r"(i):"memory");
+               asm volatile("dcache.cva %0\n"::"r"(i):"memory");
        sync_is();
 }
index 602a60d..06e85b5 100644 (file)
 #include <linux/version.h>
 #include <asm/cache.h>
 
-void arch_dma_prep_coherent(struct page *page, size_t size)
-{
-       if (PageHighMem(page)) {
-               unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
-               do {
-                       void *ptr = kmap_atomic(page);
-                       size_t _size = (size < PAGE_SIZE) ? size : PAGE_SIZE;
-
-                       memset(ptr, 0, _size);
-                       dma_wbinv_range((unsigned long)ptr,
-                                       (unsigned long)ptr + _size);
-
-                       kunmap_atomic(ptr);
-
-                       page++;
-                       size -= PAGE_SIZE;
-                       count--;
-               } while (count);
-       } else {
-               void *ptr = page_address(page);
-
-               memset(ptr, 0, size);
-               dma_wbinv_range((unsigned long)ptr, (unsigned long)ptr + size);
-       }
-}
-
 static inline void cache_op(phys_addr_t paddr, size_t size,
                            void (*fn)(unsigned long start, unsigned long end))
 {
-       struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
-       unsigned int offset = paddr & ~PAGE_MASK;
-       size_t left = size;
-       unsigned long start;
+       struct page *page    = phys_to_page(paddr);
+       void *start          = __va(page_to_phys(page));
+       unsigned long offset = offset_in_page(paddr);
+       size_t left          = size;
 
        do {
                size_t len = left;
 
+               if (offset + len > PAGE_SIZE)
+                       len = PAGE_SIZE - offset;
+
                if (PageHighMem(page)) {
-                       void *addr;
+                       start = kmap_atomic(page);
 
-                       if (offset + len > PAGE_SIZE) {
-                               if (offset >= PAGE_SIZE) {
-                                       page += offset >> PAGE_SHIFT;
-                                       offset &= ~PAGE_MASK;
-                               }
-                               len = PAGE_SIZE - offset;
-                       }
+                       fn((unsigned long)start + offset,
+                                       (unsigned long)start + offset + len);
 
-                       addr = kmap_atomic(page);
-                       start = (unsigned long)(addr + offset);
-                       fn(start, start + len);
-                       kunmap_atomic(addr);
+                       kunmap_atomic(start);
                } else {
-                       start = (unsigned long)phys_to_virt(paddr);
-                       fn(start, start + size);
+                       fn((unsigned long)start + offset,
+                                       (unsigned long)start + offset + len);
                }
                offset = 0;
+
                page++;
+               start += PAGE_SIZE;
                left -= len;
        } while (left);
 }
 
+static void dma_wbinv_set_zero_range(unsigned long start, unsigned long end)
+{
+       memset((void *)start, 0, end - start);
+       dma_wbinv_range(start, end);
+}
+
+void arch_dma_prep_coherent(struct page *page, size_t size)
+{
+       cache_op(page_to_phys(page), size, dma_wbinv_set_zero_range);
+}
+
 void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
                              size_t size, enum dma_data_direction dir)
 {
@@ -98,11 +79,10 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
 {
        switch (dir) {
        case DMA_TO_DEVICE:
-               cache_op(paddr, size, dma_wb_range);
-               break;
+               return;
        case DMA_FROM_DEVICE:
        case DMA_BIDIRECTIONAL:
-               cache_op(paddr, size, dma_wbinv_range);
+               cache_op(paddr, size, dma_inv_range);
                break;
        default:
                BUG();
index eb0dc9e..d4c2292 100644 (file)
@@ -60,22 +60,6 @@ void __init mem_init(void)
        mem_init_print_info(NULL);
 }
 
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
-       if (start < end)
-               pr_info("Freeing initrd memory: %ldk freed\n",
-                       (end - start) >> 10);
-
-       for (; start < end; start += PAGE_SIZE) {
-               ClearPageReserved(virt_to_page(start));
-               init_page_count(virt_to_page(start));
-               free_page(start);
-               totalram_pages_inc();
-       }
-}
-#endif
-
 extern char __init_begin[], __init_end[];
 
 void free_initmem(void)
index 8473b6b..e13cd34 100644 (file)
@@ -8,12 +8,12 @@
 
 #include <asm/pgtable.h>
 
-void __iomem *ioremap(phys_addr_t addr, size_t size)
+static void __iomem *__ioremap_caller(phys_addr_t addr, size_t size,
+                                     pgprot_t prot, void *caller)
 {
        phys_addr_t last_addr;
        unsigned long offset, vaddr;
        struct vm_struct *area;
-       pgprot_t prot;
 
        last_addr = addr + size - 1;
        if (!size || last_addr < addr)
@@ -23,15 +23,12 @@ void __iomem *ioremap(phys_addr_t addr, size_t size)
        addr &= PAGE_MASK;
        size = PAGE_ALIGN(size + offset);
 
-       area = get_vm_area_caller(size, VM_ALLOC, __builtin_return_address(0));
+       area = get_vm_area_caller(size, VM_IOREMAP, caller);
        if (!area)
                return NULL;
 
        vaddr = (unsigned long)area->addr;
 
-       prot = __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE |
-                       _PAGE_GLOBAL | _CACHE_UNCACHED | _PAGE_SO);
-
        if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) {
                free_vm_area(area);
                return NULL;
@@ -39,7 +36,20 @@ void __iomem *ioremap(phys_addr_t addr, size_t size)
 
        return (void __iomem *)(vaddr + offset);
 }
-EXPORT_SYMBOL(ioremap);
+
+void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot)
+{
+       return __ioremap_caller(phys_addr, size, prot,
+                               __builtin_return_address(0));
+}
+EXPORT_SYMBOL(__ioremap);
+
+void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
+{
+       return __ioremap_caller(phys_addr, size, PAGE_KERNEL,
+                               __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_cache);
 
 void iounmap(void __iomem *addr)
 {
@@ -51,10 +61,9 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
                              unsigned long size, pgprot_t vma_prot)
 {
        if (!pfn_valid(pfn)) {
-               vma_prot.pgprot |= _PAGE_SO;
                return pgprot_noncached(vma_prot);
        } else if (file->f_flags & O_SYNC) {
-               return pgprot_noncached(vma_prot);
+               return pgprot_writecombine(vma_prot);
        }
 
        return vma_prot;
index a99caa4..4d00152 100644 (file)
@@ -4,7 +4,6 @@
 #define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 #include <asm-generic/pgtable.h>
-#define pgtable_cache_init()   do { } while (0)
 extern void paging_init(void);
 #define PAGE_NONE              __pgprot(0)    /* these mean nothing to NO_MM */
 #define PAGE_SHARED            __pgprot(0)    /* these mean nothing to NO_MM */
@@ -34,11 +33,6 @@ static inline int pte_file(pte_t pte) { return 0; }
 extern unsigned int kobjsize(const void *objp);
 extern int is_in_rom(unsigned long);
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 /*
  * All 32bit addresses are effectively valid for vmalloc...
  * Sort of meaningless for non-VM targets.
index d6544dc..cc9be51 100644 (file)
@@ -13,8 +13,6 @@
 
 #include <asm-generic/pgalloc.h>       /* for pte_{alloc,free}_one */
 
-#define check_pgt_cache() do {} while (0)
-
 extern unsigned long long kmap_generation;
 
 /*
@@ -96,7 +94,7 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 
 #define __pte_free_tlb(tlb, pte, addr)         \
 do {                                           \
-       pgtable_page_dtor((pte));               \
+       pgtable_pte_page_dtor((pte));           \
        tlb_remove_page((tlb), (pte));          \
 } while (0)
 
index a3ff6d2..2fec20a 100644 (file)
@@ -431,9 +431,6 @@ static inline int pte_exec(pte_t pte)
 
 #define __pte_offset(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
-/*  I think this is in case we have page table caches; needed by init/main.c  */
-#define pgtable_cache_init()    do { } while (0)
-
 /*
  * Swap/file PTE definitions.  If _PAGE_PRESENT is zero, the rest of the PTE is
  * interpreted as swap information.  The remaining free bits are interpreted as
index 1894263..8938384 100644 (file)
@@ -3,5 +3,5 @@
 # Makefile for Hexagon memory management subsystem
 #
 
-obj-y := init.o pgalloc.o ioremap.o uaccess.o vm_fault.o cache.o
+obj-y := init.o ioremap.o uaccess.o vm_fault.o cache.o
 obj-y += copy_to_user.o copy_from_user.o strnlen_user.o vm_tlb.o
index f1f6ebd..c961773 100644 (file)
@@ -71,19 +71,6 @@ void __init mem_init(void)
        init_mm.context.ptbase = __pa(init_mm.pgd);
 }
 
-/*
- * free_initrd_mem - frees...  initrd memory.
- * @start - start of init memory
- * @end - end of init memory
- *
- * Apparently has to be passed the address of the initrd memory.
- *
- * Wrapped by #ifdef CONFIG_BLKDEV_INITRD
- */
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
-}
-
 void sync_icache_dcache(pte_t pte)
 {
        unsigned long addr;
diff --git a/arch/hexagon/mm/pgalloc.c b/arch/hexagon/mm/pgalloc.c
deleted file mode 100644 (file)
index 4d43161..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-#include <linux/init.h>
-
-void __init pgtable_cache_init(void)
-{
-}
index 685a3df..1671447 100644 (file)
@@ -72,10 +72,6 @@ config 64BIT
 config ZONE_DMA32
        def_bool y
 
-config QUICKLIST
-       bool
-       default y
-
 config MMU
        bool
        default y
index c9e4810..f4c4910 100644 (file)
 #include <linux/mm.h>
 #include <linux/page-flags.h>
 #include <linux/threads.h>
-#include <linux/quicklist.h>
+
+#include <asm-generic/pgalloc.h>
 
 #include <asm/mmu_context.h>
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-       return quicklist_alloc(0, GFP_KERNEL, NULL);
+       return (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-       quicklist_free(0, NULL, pgd);
+       free_page((unsigned long)pgd);
 }
 
 #if CONFIG_PGTABLE_LEVELS == 4
@@ -42,12 +43,12 @@ pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-       return quicklist_alloc(0, GFP_KERNEL, NULL);
+       return (pud_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
-       quicklist_free(0, NULL, pud);
+       free_page((unsigned long)pud);
 }
 #define __pud_free_tlb(tlb, pud, address)      pud_free((tlb)->mm, pud)
 #endif /* CONFIG_PGTABLE_LEVELS == 4 */
@@ -60,12 +61,12 @@ pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-       return quicklist_alloc(0, GFP_KERNEL, NULL);
+       return (pmd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
-       quicklist_free(0, NULL, pmd);
+       free_page((unsigned long)pmd);
 }
 
 #define __pmd_free_tlb(tlb, pmd, address)      pmd_free((tlb)->mm, pmd)
@@ -83,43 +84,6 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
        pmd_val(*pmd_entry) = __pa(pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
-       struct page *page;
-       void *pg;
-
-       pg = quicklist_alloc(0, GFP_KERNEL, NULL);
-       if (!pg)
-               return NULL;
-       page = virt_to_page(pg);
-       if (!pgtable_page_ctor(page)) {
-               quicklist_free(0, NULL, pg);
-               return NULL;
-       }
-       return page;
-}
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-       return quicklist_alloc(0, GFP_KERNEL, NULL);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-       pgtable_page_dtor(pte);
-       quicklist_free_page(0, NULL, pte);
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-       quicklist_free(0, NULL, pte);
-}
-
-static inline void check_pgt_cache(void)
-{
-       quicklist_trim(0, NULL, 25, 16);
-}
-
 #define __pte_free_tlb(tlb, pte, address)      pte_free((tlb)->mm, pte)
 
 #endif                         /* _ASM_IA64_PGALLOC_H */
index b1e7468..d602e7c 100644 (file)
@@ -566,11 +566,6 @@ extern struct page *zero_page_memmap_ptr;
 #define KERNEL_TR_PAGE_SHIFT   _PAGE_SIZE_64M
 #define KERNEL_TR_PAGE_SIZE    (1 << KERNEL_TR_PAGE_SHIFT)
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 /* These tell get_user_pages() that the first gate page is accessible from user-level.  */
 #define FIXADDR_USER_START     GATE_ADDR
 #ifdef HAVE_BUGGY_SEGREL
index f102084..8e91c86 100644 (file)
@@ -583,6 +583,7 @@ void ia64_process_pending_intr(void)
 static irqreturn_t dummy_handler (int irq, void *dev_id)
 {
        BUG();
+       return IRQ_NONE;
 }
 
 static struct irqaction ipi_irqaction = {
index db09a69..5b00dc3 100644 (file)
@@ -108,7 +108,6 @@ setup_per_cpu_areas(void)
        struct pcpu_group_info *gi;
        unsigned int cpu;
        ssize_t static_size, reserved_size, dyn_size;
-       int rc;
 
        ai = pcpu_alloc_alloc_info(1, num_possible_cpus());
        if (!ai)
index 219fc64..4f33f6e 100644 (file)
@@ -186,7 +186,7 @@ void __init setup_per_cpu_areas(void)
        unsigned long base_offset;
        unsigned int cpu;
        ssize_t static_size, reserved_size, dyn_size;
-       int node, prev_node, unit, nr_units, rc;
+       int node, prev_node, unit, nr_units;
 
        ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids);
        if (!ai)
index 678b98a..bf9df26 100644 (file)
@@ -64,7 +64,7 @@ __ia64_sync_icache_dcache (pte_t pte)
        if (test_bit(PG_arch_1, &page->flags))
                return;                         /* i-cache is already coherent with d-cache */
 
-       flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page)));
+       flush_icache_range(addr, addr + page_size(page));
        set_bit(PG_arch_1, &page->flags);       /* mark page as clean */
 }
 
index 4399d71..b34d44d 100644 (file)
@@ -41,7 +41,7 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
                                  unsigned long address)
 {
-       pgtable_page_dtor(page);
+       pgtable_pte_page_dtor(page);
        __free_page(page);
 }
 
@@ -54,7 +54,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm)
 
        if (!page)
                return NULL;
-       if (!pgtable_page_ctor(page)) {
+       if (!pgtable_pte_page_ctor(page)) {
                __free_page(page);
                return NULL;
        }
@@ -73,7 +73,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm)
 
 static inline void pte_free(struct mm_struct *mm, struct page *page)
 {
-       pgtable_page_dtor(page);
+       pgtable_pte_page_dtor(page);
        __free_page(page);
 }
 
index d04d9ba..acab315 100644 (file)
@@ -36,7 +36,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
        page = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0);
        if(!page)
                return NULL;
-       if (!pgtable_page_ctor(page)) {
+       if (!pgtable_pte_page_ctor(page)) {
                __free_page(page);
                return NULL;
        }
@@ -51,7 +51,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t page)
 {
-       pgtable_page_dtor(page);
+       pgtable_pte_page_dtor(page);
        cache_page(kmap(page));
        kunmap(page);
        __free_page(page);
@@ -60,7 +60,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t page)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
                                  unsigned long address)
 {
-       pgtable_page_dtor(page);
+       pgtable_pte_page_dtor(page);
        cache_page(kmap(page));
        kunmap(page);
        __free_page(page);
index fde4534..646c174 100644 (file)
@@ -176,11 +176,4 @@ pgprot_t pgprot_dmacoherent(pgprot_t prot);
 #include <asm-generic/pgtable.h>
 #endif /* !__ASSEMBLY__ */
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
-#define check_pgt_cache()      do { } while (0)
-
 #endif /* _M68K_PGTABLE_H */
index fc3a96c..c18165b 100644 (file)
@@ -44,11 +44,6 @@ extern void paging_init(void);
  */
 #define ZERO_PAGE(vaddr)       (virt_to_page(0))
 
-/*
- * No page table caches to initialise.
- */
-#define pgtable_cache_init()   do { } while (0)
-
 /*
  * All 32bit addresses are effectively valid for vmalloc...
  * Sort of meaningless for non-VM targets.
@@ -60,6 +55,4 @@ extern void paging_init(void);
 
 #include <asm-generic/pgtable.h>
 
-#define check_pgt_cache()      do { } while (0)
-
 #endif /* _M68KNOMMU_PGTABLE_H */
index 1a8ddbd..8561211 100644 (file)
@@ -21,7 +21,7 @@ extern const char bad_pmd_string[];
 
 #define __pte_free_tlb(tlb,pte,addr)                   \
 do {                                                   \
-       pgtable_page_dtor(pte);                         \
+       pgtable_pte_page_dtor(pte);                     \
        tlb_remove_page((tlb), pte);                    \
 } while (0)
 
index 632c947..c9c4be8 100644 (file)
@@ -5,15 +5,18 @@ config MICROBLAZE
        select ARCH_NO_SWAP
        select ARCH_HAS_BINFMT_FLAT if !MMU
        select ARCH_HAS_DMA_COHERENT_TO_PFN if MMU
+       select ARCH_HAS_DMA_PREP_COHERENT
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_SYNC_DMA_FOR_CPU
        select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+       select ARCH_HAS_UNCACHED_SEGMENT if !MMU
        select ARCH_MIGHT_HAVE_PC_PARPORT
        select ARCH_WANT_IPC_PARSE_VERSION
        select BUILDTIME_EXTABLE_SORT
        select TIMER_OF
        select CLONE_BACKWARDS3
        select COMMON_CLK
+       select DMA_DIRECT_REMAP if MMU
        select GENERIC_ATOMIC64
        select GENERIC_CLOCKEVENTS
        select GENERIC_CPU_DEVICES
index 5a8a9d0..5b23652 100644 (file)
@@ -18,7 +18,6 @@
        #address-cells = <1>;
        #size-cells = <1>;
        compatible = "xlnx,microblaze";
-       hard-reset-gpios = <&LEDs_8Bit 2 1>;
        model = "testing";
        DDR2_SDRAM: memory@90000000 {
                device_type = "memory";
                                gpios = <&LEDs_8Bit 7 1>;
                        };
                } ;
+
+               gpio-restart {
+                       compatible = "gpio-restart";
+                       /*
+                        * FIXME: is this active low or active high?
+                        * the current flag (1) indicates active low.
+                        * delay measures are templates, should be adjusted
+                        * to datasheet or trial-and-error with real hardware.
+                        */
+                       gpios = <&LEDs_8Bit 2 1>;
+                       active-delay = <100>;
+                       inactive-delay = <10>;
+                       wait-delay = <100>;
+               };
+
                RS232_Uart_1: serial@84000000 {
                        clock-frequency = <125000000>;
                        compatible = "xlnx,xps-uartlite-1.00.a";
index 92fd4e9..654edfd 100644 (file)
@@ -5,15 +5,10 @@ CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
-CONFIG_KALLSYMS_ALL=y
 # CONFIG_BASE_FULL is not set
+CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_EFI_PARTITION is not set
 CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1
 CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR=1
 CONFIG_XILINX_MICROBLAZE0_USE_BARREL=1
@@ -25,14 +20,19 @@ CONFIG_MMU=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE_FORCE=y
 CONFIG_HIGHMEM=y
-CONFIG_PCI=y
 CONFIG_PCI_XILINX=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_EFI_PARTITION is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
 CONFIG_BRIDGE=m
+CONFIG_PCI=y
 CONFIG_MTD=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
@@ -41,6 +41,7 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
 CONFIG_NETDEVICES=y
 CONFIG_XILINX_EMACLITE=y
+CONFIG_XILINX_AXI_EMAC=y
 CONFIG_XILINX_LL_TEMAC=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
@@ -59,6 +60,8 @@ CONFIG_SPI_XILINX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_XILINX=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_GPIO_RESTART=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_XILINX_WATCHDOG=y
@@ -74,8 +77,8 @@ CONFIG_CRAMFS=y
 CONFIG_ROMFS_FS=y
 CONFIG_NFS_FS=y
 CONFIG_CIFS=y
-CONFIG_CIFS_STATS=y
 CONFIG_CIFS_STATS2=y
+CONFIG_ENCRYPTED_KEYS=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_SLAB=y
 CONFIG_DETECT_HUNG_TASK=y
@@ -83,6 +86,3 @@ CONFIG_DEBUG_SPINLOCK=y
 CONFIG_KGDB=y
 CONFIG_KGDB_TESTS=y
 CONFIG_KGDB_KDB=y
-CONFIG_EARLY_PRINTK=y
-CONFIG_KEYS=y
-CONFIG_ENCRYPTED_KEYS=y
index 06d69a6..377de39 100644 (file)
@@ -7,15 +7,10 @@ CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
-CONFIG_KALLSYMS_ALL=y
 # CONFIG_BASE_FULL is not set
+CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_EFI_PARTITION is not set
 CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1
 CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR=1
 CONFIG_XILINX_MICROBLAZE0_USE_BARREL=1
@@ -25,13 +20,18 @@ CONFIG_XILINX_MICROBLAZE0_USE_FPU=2
 CONFIG_HZ_100=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE_FORCE=y
-CONFIG_PCI=y
 CONFIG_PCI_XILINX=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_EFI_PARTITION is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_PCI=y
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
 CONFIG_MTD_BLOCK=y
@@ -62,6 +62,8 @@ CONFIG_SPI_XILINX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_XILINX=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_GPIO_RESTART=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_XILINX_WATCHDOG=y
@@ -75,11 +77,6 @@ CONFIG_ROMFS_FS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NLS=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_SLAB=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_EARLY_PRINTK=y
 CONFIG_KEYS=y
 CONFIG_ENCRYPTED_KEYS=y
 CONFIG_CRYPTO_ECB=y
@@ -87,3 +84,7 @@ CONFIG_CRYPTO_MD4=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_ARC4=y
 CONFIG_CRYPTO_DES=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_SLAB=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEBUG_SPINLOCK=y
index c796813..86c95b2 100644 (file)
@@ -40,7 +40,6 @@ extern void iounmap(volatile void __iomem *addr);
 
 extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
 #define ioremap_nocache(addr, size)            ioremap((addr), (size))
-#define ioremap_fullcache(addr, size)          ioremap((addr), (size))
 #define ioremap_wc(addr, size)                 ioremap((addr), (size))
 #define ioremap_wt(addr, size)                 ioremap((addr), (size))
 
index 21ddba9..7c4dc5d 100644 (file)
@@ -66,8 +66,6 @@ extern pgprot_t       pci_phys_mem_access_prot(struct file *file,
                                         unsigned long size,
                                         pgprot_t prot);
 
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
-
 /* This part of code was originally in xilinx-pci.h */
 #ifdef CONFIG_PCI_XILINX
 extern void __init xilinx_pci_init(void);
index f4cc9ff..7ecb05b 100644 (file)
 #include <asm/cache.h>
 #include <asm/pgtable.h>
 
-#define PGDIR_ORDER    0
-
-/*
- * This is handled very differently on MicroBlaze since out page tables
- * are all 0's and I want to be able to use these zero'd pages elsewhere
- * as well - it gives us quite a speedup.
- * -- Cort
- */
-extern struct pgtable_cache_struct {
-       unsigned long *pgd_cache;
-       unsigned long *pte_cache;
-       unsigned long pgtable_cache_sz;
-} quicklists;
-
-#define pgd_quicklist          (quicklists.pgd_cache)
-#define pmd_quicklist          ((unsigned long *)0)
-#define pte_quicklist          (quicklists.pte_cache)
-#define pgtable_cache_size     (quicklists.pgtable_cache_sz)
-
-extern unsigned long *zero_cache; /* head linked list of pre-zero'd pages */
-extern atomic_t zero_sz; /* # currently pre-zero'd pages */
-extern atomic_t zeropage_hits; /* # zero'd pages request that we've done */
-extern atomic_t zeropage_calls; /* # zero'd pages request that've been made */
-extern atomic_t zerototal; /* # pages zero'd over time */
-
-#define zero_quicklist         (zero_cache)
-#define zero_cache_sz          (zero_sz)
-#define zero_cache_calls       (zeropage_calls)
-#define zero_cache_hits                (zeropage_hits)
-#define zero_cache_total       (zerototal)
-
-/*
- * return a pre-zero'd page from the list,
- * return NULL if none available -- Cort
- */
-extern unsigned long get_zero_page_fast(void);
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#include <asm-generic/pgalloc.h>
 
 extern void __bad_pte(pmd_t *pmd);
 
-static inline pgd_t *get_pgd_slow(void)
+static inline pgd_t *get_pgd(void)
 {
-       pgd_t *ret;
-
-       ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGDIR_ORDER);
-       if (ret != NULL)
-               clear_page(ret);
-       return ret;
+       return (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0);
 }
 
-static inline pgd_t *get_pgd_fast(void)
-{
-       unsigned long *ret;
-
-       ret = pgd_quicklist;
-       if (ret != NULL) {
-               pgd_quicklist = (unsigned long *)(*ret);
-               ret[0] = 0;
-               pgtable_cache_size--;
-       } else
-               ret = (unsigned long *)get_pgd_slow();
-       return (pgd_t *)ret;
-}
-
-static inline void free_pgd_fast(pgd_t *pgd)
-{
-       *(unsigned long **)pgd = pgd_quicklist;
-       pgd_quicklist = (unsigned long *) pgd;
-       pgtable_cache_size++;
-}
-
-static inline void free_pgd_slow(pgd_t *pgd)
+static inline void free_pgd(pgd_t *pgd)
 {
        free_page((unsigned long)pgd);
 }
 
-#define pgd_free(mm, pgd)        free_pgd_fast(pgd)
-#define pgd_alloc(mm)          get_pgd_fast()
+#define pgd_free(mm, pgd)      free_pgd(pgd)
+#define pgd_alloc(mm)          get_pgd()
 
 #define pmd_pgtable(pmd)       pmd_page(pmd)
 
@@ -110,50 +50,6 @@ static inline void free_pgd_slow(pgd_t *pgd)
 
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
-       struct page *ptepage;
-
-#ifdef CONFIG_HIGHPTE
-       int flags = GFP_KERNEL | __GFP_HIGHMEM;
-#else
-       int flags = GFP_KERNEL;
-#endif
-
-       ptepage = alloc_pages(flags, 0);
-       if (!ptepage)
-               return NULL;
-       clear_highpage(ptepage);
-       if (!pgtable_page_ctor(ptepage)) {
-               __free_page(ptepage);
-               return NULL;
-       }
-       return ptepage;
-}
-
-static inline void pte_free_fast(pte_t *pte)
-{
-       *(unsigned long **)pte = pte_quicklist;
-       pte_quicklist = (unsigned long *) pte;
-       pgtable_cache_size++;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-       free_page((unsigned long)pte);
-}
-
-static inline void pte_free_slow(struct page *ptepage)
-{
-       __free_page(ptepage);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
-{
-       pgtable_page_dtor(ptepage);
-       __free_page(ptepage);
-}
-
 #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, (pte))
 
 #define pmd_populate(mm, pmd, pte) \
@@ -171,10 +67,6 @@ static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
 #define __pmd_free_tlb(tlb, x, addr)   pmd_free((tlb)->mm, x)
 #define pgd_populate(mm, pmd, pte)     BUG()
 
-extern int do_check_pgt_cache(int, int);
-
 #endif /* CONFIG_MMU */
 
-#define check_pgt_cache()              do { } while (0)
-
 #endif /* _ASM_MICROBLAZE_PGALLOC_H */
index 142d3f0..954b69a 100644 (file)
@@ -46,8 +46,6 @@ extern int mem_init_done;
 
 #define swapper_pg_dir ((pgd_t *) NULL)
 
-#define pgtable_cache_init()   do {} while (0)
-
 #define arch_enter_lazy_cpu_mode()     do {} while (0)
 
 #define pgprot_noncached_wc(prot)      prot
@@ -526,11 +524,6 @@ extern unsigned long iopa(unsigned long addr);
 /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
 #define kern_addr_valid(addr)  (1)
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 void do_page_fault(struct pt_regs *regs, unsigned long address,
                   unsigned long error_code);
 
index bff2a71..a1f206b 100644 (file)
@@ -163,44 +163,15 @@ extern long __user_bad(void);
  * Returns zero on success, or -EFAULT on error.
  * On error, the variable @x is set to zero.
  */
-#define get_user(x, ptr)                                               \
-       __get_user_check((x), (ptr), sizeof(*(ptr)))
-
-#define __get_user_check(x, ptr, size)                                 \
-({                                                                     \
-       unsigned long __gu_val = 0;                                     \
-       const typeof(*(ptr)) __user *__gu_addr = (ptr);                 \
-       int __gu_err = 0;                                               \
-                                                                       \
-       if (access_ok(__gu_addr, size)) {                       \
-               switch (size) {                                         \
-               case 1:                                                 \
-                       __get_user_asm("lbu", __gu_addr, __gu_val,      \
-                                      __gu_err);                       \
-                       break;                                          \
-               case 2:                                                 \
-                       __get_user_asm("lhu", __gu_addr, __gu_val,      \
-                                      __gu_err);                       \
-                       break;                                          \
-               case 4:                                                 \
-                       __get_user_asm("lw", __gu_addr, __gu_val,       \
-                                      __gu_err);                       \
-                       break;                                          \
-               default:                                                \
-                       __gu_err = __user_bad();                        \
-                       break;                                          \
-               }                                                       \
-       } else {                                                        \
-               __gu_err = -EFAULT;                                     \
-       }                                                               \
-       x = (__force typeof(*(ptr)))__gu_val;                           \
-       __gu_err;                                                       \
+#define get_user(x, ptr) ({                            \
+       const typeof(*(ptr)) __user *__gu_ptr = (ptr);  \
+       access_ok(__gu_ptr, sizeof(*__gu_ptr)) ?        \
+               __get_user(x, __gu_ptr) : -EFAULT;      \
 })
 
 #define __get_user(x, ptr)                                             \
 ({                                                                     \
        unsigned long __gu_val = 0;                                     \
-       /*unsigned long __gu_ptr = (unsigned long)(ptr);*/              \
        long __gu_err;                                                  \
        switch (sizeof(*(ptr))) {                                       \
        case 1:                                                         \
@@ -212,6 +183,11 @@ extern long __user_bad(void);
        case 4:                                                         \
                __get_user_asm("lw", (ptr), __gu_val, __gu_err);        \
                break;                                                  \
+       case 8:                                                         \
+               __gu_err = __copy_from_user(&__gu_val, ptr, 8);         \
+               if (__gu_err)                                           \
+                       __gu_err = -EFAULT;                             \
+               break;                                                  \
        default:                                                        \
                /* __gu_val = 0; __gu_err = -EINVAL;*/ __gu_err = __user_bad();\
        }                                                               \
index fcbe1da..5f47229 100644 (file)
@@ -8,83 +8,9 @@
  */
 
 #include <linux/init.h>
+#include <linux/delay.h>
 #include <linux/of_platform.h>
-
-/* Trigger specific functions */
-#ifdef CONFIG_GPIOLIB
-
-#include <linux/of_gpio.h>
-
-static int handle; /* reset pin handle */
-static unsigned int reset_val;
-
-static int of_platform_reset_gpio_probe(void)
-{
-       int ret;
-       handle = of_get_named_gpio(of_find_node_by_path("/"),
-                                  "hard-reset-gpios", 0);
-
-       if (!gpio_is_valid(handle)) {
-               pr_info("Skipping unavailable RESET gpio %d (%s)\n",
-                               handle, "reset");
-               return -ENODEV;
-       }
-
-       ret = gpio_request(handle, "reset");
-       if (ret < 0) {
-               pr_info("GPIO pin is already allocated\n");
-               return ret;
-       }
-
-       /* get current setup value */
-       reset_val = gpio_get_value(handle);
-       /* FIXME maybe worth to perform any action */
-       pr_debug("Reset: Gpio output state: 0x%x\n", reset_val);
-
-       /* Setup GPIO as output */
-       ret = gpio_direction_output(handle, 0);
-       if (ret < 0)
-               goto err;
-
-       /* Setup output direction */
-       gpio_set_value(handle, 0);
-
-       pr_info("RESET: Registered gpio device: %d, current val: %d\n",
-                                                       handle, reset_val);
-       return 0;
-err:
-       gpio_free(handle);
-       return ret;
-}
-device_initcall(of_platform_reset_gpio_probe);
-
-
-static void gpio_system_reset(void)
-{
-       if (gpio_is_valid(handle))
-               gpio_set_value(handle, 1 - reset_val);
-       else
-               pr_notice("Reset GPIO unavailable - halting!\n");
-}
-#else
-static void gpio_system_reset(void)
-{
-       pr_notice("No reset GPIO present - halting!\n");
-}
-
-void of_platform_reset_gpio_probe(void)
-{
-       return;
-}
-#endif
-
-void machine_restart(char *cmd)
-{
-       pr_notice("Machine restart...\n");
-       gpio_system_reset();
-       while (1)
-               ;
-}
+#include <linux/reboot.h>
 
 void machine_shutdown(void)
 {
@@ -106,3 +32,12 @@ void machine_power_off(void)
        while (1)
                ;
 }
+
+void machine_restart(char *cmd)
+{
+       do_kernel_restart(cmd);
+       /* Give the restart hook 1 s to take us down */
+       mdelay(1000);
+       pr_emerg("Reboot failed -- System halted\n");
+       while (1);
+}
index bc70422..8c5f0c3 100644 (file)
  * Copyright (C) 2010 Michal Simek <monstr@monstr.eu>
  * Copyright (C) 2010 PetaLogix
  * Copyright (C) 2005 John Williams <jwilliams@itee.uq.edu.au>
- *
- * Based on PowerPC version derived from arch/arm/mm/consistent.c
- * Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
- * Copyright (C) 2000 Russell King
  */
 
-#include <linux/export.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
 #include <linux/kernel.h>
-#include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
 #include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/stddef.h>
-#include <linux/vmalloc.h>
 #include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/memblock.h>
-#include <linux/highmem.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/gfp.h>
 #include <linux/dma-noncoherent.h>
-
-#include <asm/pgalloc.h>
-#include <linux/io.h>
-#include <linux/hardirq.h>
-#include <linux/mmu_context.h>
-#include <asm/mmu.h>
-#include <linux/uaccess.h>
-#include <asm/pgtable.h>
 #include <asm/cpuinfo.h>
-#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
 
-#ifndef CONFIG_MMU
-/* I have to use dcache values because I can't relate on ram size */
-# define UNCACHED_SHADOW_MASK (cpuinfo.dcache_high - cpuinfo.dcache_base + 1)
-#endif
-
-/*
- * Consistent memory allocators. Used for DMA devices that want to
- * share uncached memory with the processor core.
- * My crufty no-MMU approach is simple. In the HW platform we can optionally
- * mirror the DDR up above the processor cacheable region.  So, memory accessed
- * in this mirror region will not be cached.  It's alloced from the same
- * pool as normal memory, but the handle we return is shifted up into the
- * uncached region.  This will no doubt cause big problems if memory allocated
- * here is not also freed properly. -- JW
- */
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-               gfp_t gfp, unsigned long attrs)
+void arch_dma_prep_coherent(struct page *page, size_t size)
 {
-       unsigned long order, vaddr;
-       void *ret;
-       unsigned int i, err = 0;
-       struct page *page, *end;
-
-#ifdef CONFIG_MMU
-       phys_addr_t pa;
-       struct vm_struct *area;
-       unsigned long va;
-#endif
-
-       if (in_interrupt())
-               BUG();
-
-       /* Only allocate page size areas. */
-       size = PAGE_ALIGN(size);
-       order = get_order(size);
-
-       vaddr = __get_free_pages(gfp | __GFP_ZERO, order);
-       if (!vaddr)
-               return NULL;
+       phys_addr_t paddr = page_to_phys(page);
 
-       /*
-        * we need to ensure that there are no cachelines in use,
-        * or worse dirty in this area.
-        */
-       flush_dcache_range(virt_to_phys((void *)vaddr),
-                                       virt_to_phys((void *)vaddr) + size);
+       flush_dcache_range(paddr, paddr + size);
+}
 
 #ifndef CONFIG_MMU
-       ret = (void *)vaddr;
-       /*
-        * Here's the magic!  Note if the uncached shadow is not implemented,
-        * it's up to the calling code to also test that condition and make
-        * other arranegments, such as manually flushing the cache and so on.
-        */
-# ifdef CONFIG_XILINX_UNCACHED_SHADOW
-       ret = (void *)((unsigned) ret | UNCACHED_SHADOW_MASK);
-# endif
-       if ((unsigned int)ret > cpuinfo.dcache_base &&
-                               (unsigned int)ret < cpuinfo.dcache_high)
-               pr_warn("ERROR: Your cache coherent area is CACHED!!!\n");
-
-       /* dma_handle is same as physical (shadowed) address */
-       *dma_handle = (dma_addr_t)ret;
+/*
+ * Consistent memory allocators. Used for DMA devices that want to share
+ * uncached memory with the processor core.  My crufty no-MMU approach is
+ * simple.  In the HW platform we can optionally mirror the DDR up above the
+ * processor cacheable region.  So, memory accessed in this mirror region will
+ * not be cached.  It's alloced from the same pool as normal memory, but the
+ * handle we return is shifted up into the uncached region.  This will no doubt
+ * cause big problems if memory allocated here is not also freed properly. -- JW
+ *
+ * I have to use dcache values because I can't relate on ram size:
+ */
+#ifdef CONFIG_XILINX_UNCACHED_SHADOW
+#define UNCACHED_SHADOW_MASK (cpuinfo.dcache_high - cpuinfo.dcache_base + 1)
 #else
-       /* Allocate some common virtual space to map the new pages. */
-       area = get_vm_area(size, VM_ALLOC);
-       if (!area) {
-               free_pages(vaddr, order);
-               return NULL;
-       }
-       va = (unsigned long) area->addr;
-       ret = (void *)va;
-
-       /* This gives us the real physical address of the first page. */
-       *dma_handle = pa = __virt_to_phys(vaddr);
-#endif
-
-       /*
-        * free wasted pages.  We skip the first page since we know
-        * that it will have count = 1 and won't require freeing.
-        * We also mark the pages in use as reserved so that
-        * remap_page_range works.
-        */
-       page = virt_to_page(vaddr);
-       end = page + (1 << order);
-
-       split_page(page, order);
-
-       for (i = 0; i < size && err == 0; i += PAGE_SIZE) {
-#ifdef CONFIG_MMU
-               /* MS: This is the whole magic - use cache inhibit pages */
-               err = map_page(va + i, pa + i, _PAGE_KERNEL | _PAGE_NO_CACHE);
-#endif
+#define UNCACHED_SHADOW_MASK 0
+#endif /* CONFIG_XILINX_UNCACHED_SHADOW */
 
-               SetPageReserved(page);
-               page++;
-       }
-
-       /* Free the otherwise unused pages. */
-       while (page < end) {
-               __free_page(page);
-               page++;
-       }
-
-       if (err) {
-               free_pages(vaddr, order);
-               return NULL;
-       }
-
-       return ret;
-}
-
-#ifdef CONFIG_MMU
-static pte_t *consistent_virt_to_pte(void *vaddr)
+void *uncached_kernel_address(void *ptr)
 {
-       unsigned long addr = (unsigned long)vaddr;
-
-       return pte_offset_kernel(pmd_offset(pgd_offset_k(addr), addr), addr);
-}
-
-long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr,
-               dma_addr_t dma_addr)
-{
-       pte_t *ptep = consistent_virt_to_pte(vaddr);
-
-       if (pte_none(*ptep) || !pte_present(*ptep))
-               return 0;
+       unsigned long addr = (unsigned long)ptr;
 
-       return pte_pfn(*ptep);
+       addr |= UNCACHED_SHADOW_MASK;
+       if (addr > cpuinfo.dcache_base && addr < cpuinfo.dcache_high)
+               pr_warn("ERROR: Your cache coherent area is CACHED!!!\n");
+       return (void *)addr;
 }
-#endif
 
-/*
- * free page(s) as defined by the above mapping.
- */
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
-               dma_addr_t dma_addr, unsigned long attrs)
+void *cached_kernel_address(void *ptr)
 {
-       struct page *page;
-
-       if (in_interrupt())
-               BUG();
-
-       size = PAGE_ALIGN(size);
-
-#ifndef CONFIG_MMU
-       /* Clear SHADOW_MASK bit in address, and free as per usual */
-# ifdef CONFIG_XILINX_UNCACHED_SHADOW
-       vaddr = (void *)((unsigned)vaddr & ~UNCACHED_SHADOW_MASK);
-# endif
-       page = virt_to_page(vaddr);
-
-       do {
-               __free_reserved_page(page);
-               page++;
-       } while (size -= PAGE_SIZE);
-#else
-       do {
-               pte_t *ptep = consistent_virt_to_pte(vaddr);
-               unsigned long pfn;
-
-               if (!pte_none(*ptep) && pte_present(*ptep)) {
-                       pfn = pte_pfn(*ptep);
-                       pte_clear(&init_mm, (unsigned int)vaddr, ptep);
-                       if (pfn_valid(pfn)) {
-                               page = pfn_to_page(pfn);
-                               __free_reserved_page(page);
-                       }
-               }
-               vaddr += PAGE_SIZE;
-       } while (size -= PAGE_SIZE);
+       unsigned long addr = (unsigned long)ptr;
 
-       /* flush tlb */
-       flush_tlb_all();
-#endif
+       return (void *)(addr & ~UNCACHED_SHADOW_MASK);
 }
+#endif /* CONFIG_MMU */
index 8fe54fd..010bb9c 100644 (file)
@@ -44,10 +44,6 @@ unsigned long ioremap_base;
 unsigned long ioremap_bot;
 EXPORT_SYMBOL(ioremap_bot);
 
-#ifndef CONFIG_SMP
-struct pgtable_cache_struct quicklists;
-#endif
-
 static void __iomem *__ioremap(phys_addr_t addr, unsigned long size,
                unsigned long flags)
 {
index cc8e2b1..a0bd9bd 100644 (file)
@@ -5,7 +5,6 @@ config MIPS
        select ARCH_32BIT_OFF_T if !64BIT
        select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT
        select ARCH_CLOCKSOURCE_DATA
-       select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
        select ARCH_HAS_UBSAN_SANITIZE_ALL
        select ARCH_SUPPORTS_UPROBES
@@ -13,6 +12,7 @@ config MIPS
        select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
        select ARCH_USE_QUEUED_RWLOCKS
        select ARCH_USE_QUEUED_SPINLOCKS
+       select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
        select ARCH_WANT_IPC_PARSE_VERSION
        select BUILDTIME_EXTABLE_SORT
        select CLONE_BACKWARDS
index 77a836e..df69eaa 100644 (file)
@@ -84,7 +84,7 @@ void __init prom_init(void)
                 * Here we will start up CPU1 in the background and ask it to
                 * reconfigure itself then go back to sleep.
                 */
-               memcpy((void *)0xa0000200, &bmips_smp_movevec, 0x20);
+               memcpy((void *)0xa0000200, bmips_smp_movevec, 0x20);
                __sync();
                set_c0_cause(C_SW0);
                cpumask_set_cpu(1, &bmips_booted_mask);
index 63a9f33..5cfc9d3 100644 (file)
@@ -99,7 +99,7 @@
 
                        miscintc: interrupt-controller@18060010 {
                                compatible = "qca,ar7240-misc-intc";
-                               reg = <0x18060010 0x4>;
+                               reg = <0x18060010 0x8>;
 
                                interrupt-parent = <&cpuintc>;
                                interrupts = <6>;
index 16bef81..914af12 100644 (file)
@@ -571,7 +571,6 @@ CONFIG_USB_SERIAL_OMNINET=m
 CONFIG_USB_EMI62=m
 CONFIG_USB_EMI26=m
 CONFIG_USB_ADUTUX=m
-CONFIG_USB_RIO500=m
 CONFIG_USB_LEGOTOWER=m
 CONFIG_USB_LCD=m
 CONFIG_USB_CYPRESS_CY7C63=m
index 8762e75..2c7adea 100644 (file)
@@ -314,7 +314,6 @@ CONFIG_USB_SERIAL_SAFE_PADDED=y
 CONFIG_USB_SERIAL_CYBERJACK=m
 CONFIG_USB_SERIAL_XIRCOM=m
 CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_RIO500=m
 CONFIG_USB_LEGOTOWER=m
 CONFIG_USB_LCD=m
 CONFIG_USB_CYTHERM=m
index af44b35..b4328b3 100644 (file)
@@ -160,7 +160,6 @@ void __init prom_meminit(void)
 
 void __init prom_free_prom_memory(void)
 {
-       unsigned long addr;
        int i;
 
        if (prom_flags & PROM_FLAG_DONT_FREE_TEMP)
index 8772617..80112f2 100644 (file)
@@ -43,7 +43,7 @@
 
 /* O32 stack has to be 8-byte aligned. */
 static u64 o32_stk[4096];
-#define O32_STK          &o32_stk[sizeof(o32_stk)]
+#define O32_STK          (&o32_stk[ARRAY_SIZE(o32_stk)])
 
 #define __PROM_O32(fun, arg) fun arg __asm__(#fun); \
                                     __asm__(#fun " = call_o32")
index bf6a8af..581a6a3 100644 (file)
@@ -75,11 +75,11 @@ static inline int register_bmips_smp_ops(void)
 #endif
 }
 
-extern char bmips_reset_nmi_vec;
-extern char bmips_reset_nmi_vec_end;
-extern char bmips_smp_movevec;
-extern char bmips_smp_int_vec;
-extern char bmips_smp_int_vec_end;
+extern char bmips_reset_nmi_vec[];
+extern char bmips_reset_nmi_vec_end[];
+extern char bmips_smp_movevec[];
+extern char bmips_smp_int_vec[];
+extern char bmips_smp_int_vec_end[];
 
 extern int bmips_smp_enabled;
 extern int bmips_cpu_offset;
index 79bf34e..f613687 100644 (file)
@@ -77,8 +77,8 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
 extern unsigned long __xchg_small(volatile void *ptr, unsigned long val,
                                  unsigned int size);
 
-static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
-                                  int size)
+static __always_inline
+unsigned long __xchg(volatile void *ptr, unsigned long x, int size)
 {
        switch (size) {
        case 1:
@@ -153,8 +153,9 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
 extern unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old,
                                     unsigned long new, unsigned int size);
 
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
-                                     unsigned long new, unsigned int size)
+static __always_inline
+unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+                       unsigned long new, unsigned int size)
 {
        switch (size) {
        case 1:
index cbdc14b..adab7b5 100644 (file)
@@ -36,6 +36,7 @@
 #include <asm/octeon/octeon-feature.h>
 
 #include <asm/octeon/cvmx-ipd-defs.h>
+#include <asm/octeon/cvmx-pip-defs.h>
 
 enum cvmx_ipd_mode {
    CVMX_IPD_OPC_MODE_STT = 0LL,          /* All blocks DRAM, not cached in L2 */
index 4360998..6f48649 100644 (file)
@@ -108,7 +108,6 @@ extern unsigned long PCIBIOS_MIN_MEM;
 
 #define HAVE_PCI_MMAP
 #define ARCH_GENERIC_PCI_MMAP_RESOURCE
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
 
 /*
  * Dynamic DMA mapping stuff.
index aa16b85..1668423 100644 (file)
@@ -54,7 +54,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 #define __pte_free_tlb(tlb,pte,address)                        \
 do {                                                   \
-       pgtable_page_dtor(pte);                         \
+       pgtable_pte_page_dtor(pte);                     \
        tlb_remove_page((tlb), pte);                    \
 } while (0)
 
@@ -105,8 +105,6 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 #endif /* __PAGETABLE_PUD_FOLDED */
 
-#define check_pgt_cache()      do { } while (0)
-
 extern void pagetable_init(void);
 
 #endif /* _ASM_PGALLOC_H */
index 4dca733..f85bd5b 100644 (file)
@@ -661,9 +661,4 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 #endif /* _ASM_PGTABLE_H */
index aca909b..fba18d4 100644 (file)
 
 extern unsigned int vced_count, vcei_count;
 
-/*
- * MIPS does have an arch_pick_mmap_layout()
- */
-#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
-
 #ifdef CONFIG_32BIT
 #ifdef CONFIG_KVM_GUEST
 /* User space process size is limited to 1GB in KVM Guest Mode */
index 071053e..5d70bab 100644 (file)
@@ -52,6 +52,7 @@
 # endif
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_CLONE3
 
 /* whitelists for checksyscalls */
 #define __IGNORE_fadvise64_64
index e78462e..b088255 100644 (file)
@@ -24,6 +24,8 @@
 
 #define VDSO_HAS_CLOCK_GETRES          1
 
+#define __VDSO_USE_SYSCALL             ULLONG_MAX
+
 #ifdef CONFIG_MIPS_CLOCK_VSYSCALL
 
 static __always_inline long gettimeofday_fallback(
@@ -205,7 +207,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
                break;
 #endif
        default:
-               cycle_now = 0;
+               cycle_now = __VDSO_USE_SYSCALL;
                break;
        }
 
index a2aba4b..1ade1da 100644 (file)
@@ -6,5 +6,16 @@
 #define HWCAP_MIPS_R6          (1 << 0)
 #define HWCAP_MIPS_MSA         (1 << 1)
 #define HWCAP_MIPS_CRC32       (1 << 2)
+#define HWCAP_MIPS_MIPS16      (1 << 3)
+#define HWCAP_MIPS_MDMX     (1 << 4)
+#define HWCAP_MIPS_MIPS3D   (1 << 5)
+#define HWCAP_MIPS_SMARTMIPS (1 << 6)
+#define HWCAP_MIPS_DSP      (1 << 7)
+#define HWCAP_MIPS_DSP2     (1 << 8)
+#define HWCAP_MIPS_DSP3     (1 << 9)
+#define HWCAP_MIPS_MIPS16E2 (1 << 10)
+#define HWCAP_LOONGSON_MMI  (1 << 11)
+#define HWCAP_LOONGSON_EXT  (1 << 12)
+#define HWCAP_LOONGSON_EXT2 (1 << 13)
 
 #endif /* _UAPI_ASM_HWCAP_H */
index c2b4096..57dc2ac 100644 (file)
@@ -95,6 +95,9 @@
 #define MADV_WIPEONFORK 18             /* Zero memory on fork, child only */
 #define MADV_KEEPONFORK 19             /* Undo MADV_WIPEONFORK */
 
+#define MADV_COLD      20              /* deactivate these pages */
+#define MADV_PAGEOUT   21              /* reclaim these pages */
+
 /* compatibility flags */
 #define MAP_FILE       0
 
index fa62cd1..6a7afe7 100644 (file)
@@ -24,7 +24,8 @@ static char r4kwar[] __initdata =
 static char daddiwar[] __initdata =
        "Enable CPU_DADDI_WORKAROUNDS to rectify.";
 
-static inline void align_mod(const int align, const int mod)
+static __always_inline __init
+void align_mod(const int align, const int mod)
 {
        asm volatile(
                ".set   push\n\t"
@@ -38,8 +39,9 @@ static inline void align_mod(const int align, const int mod)
                : "n"(align), "n"(mod));
 }
 
-static __always_inline void mult_sh_align_mod(long *v1, long *v2, long *w,
-                                             const int align, const int mod)
+static __always_inline __init
+void mult_sh_align_mod(long *v1, long *v2, long *w,
+                      const int align, const int mod)
 {
        unsigned long flags;
        int m1, m2;
@@ -113,7 +115,7 @@ static __always_inline void mult_sh_align_mod(long *v1, long *v2, long *w,
        *w = lw;
 }
 
-static inline void check_mult_sh(void)
+static __always_inline __init void check_mult_sh(void)
 {
        long v1[8], v2[8], w[8];
        int bug, fix, i;
@@ -176,7 +178,7 @@ asmlinkage void __init do_daddi_ov(struct pt_regs *regs)
        exception_exit(prev_state);
 }
 
-static inline void check_daddi(void)
+static __init void check_daddi(void)
 {
        extern asmlinkage void handle_daddi_ov(void);
        unsigned long flags;
@@ -242,7 +244,7 @@ static inline void check_daddi(void)
 
 int daddiu_bug = IS_ENABLED(CONFIG_CPU_MIPSR6) ? 0 : -1;
 
-static inline void check_daddiu(void)
+static __init void check_daddiu(void)
 {
        long v, w, tmp;
 
index c2eb392..f521cbf 100644 (file)
@@ -2180,6 +2180,39 @@ void cpu_probe(void)
                elf_hwcap |= HWCAP_MIPS_MSA;
        }
 
+       if (cpu_has_mips16)
+               elf_hwcap |= HWCAP_MIPS_MIPS16;
+
+       if (cpu_has_mdmx)
+               elf_hwcap |= HWCAP_MIPS_MDMX;
+
+       if (cpu_has_mips3d)
+               elf_hwcap |= HWCAP_MIPS_MIPS3D;
+
+       if (cpu_has_smartmips)
+               elf_hwcap |= HWCAP_MIPS_SMARTMIPS;
+
+       if (cpu_has_dsp)
+               elf_hwcap |= HWCAP_MIPS_DSP;
+
+       if (cpu_has_dsp2)
+               elf_hwcap |= HWCAP_MIPS_DSP2;
+
+       if (cpu_has_dsp3)
+               elf_hwcap |= HWCAP_MIPS_DSP3;
+
+       if (cpu_has_mips16e2)
+               elf_hwcap |= HWCAP_MIPS_MIPS16E2;
+
+       if (cpu_has_loongson_mmi)
+               elf_hwcap |= HWCAP_LOONGSON_MMI;
+
+       if (cpu_has_loongson_ext)
+               elf_hwcap |= HWCAP_LOONGSON_EXT;
+
+       if (cpu_has_loongson_ext2)
+               elf_hwcap |= HWCAP_LOONGSON_EXT2;
+
        if (cpu_has_vz)
                cpu_probe_vz(c);
 
index b8249c2..5eec13b 100644 (file)
@@ -108,6 +108,9 @@ void __init add_memory_region(phys_addr_t start, phys_addr_t size, long type)
                return;
        }
 
+       if (start < PHYS_OFFSET)
+               return;
+
        memblock_add(start, size);
        /* Reserve any memory except the ordinary RAM ranges. */
        switch (type) {
@@ -321,7 +324,7 @@ static void __init bootmem_init(void)
         * Reserve any memory between the start of RAM and PHYS_OFFSET
         */
        if (ramstart > PHYS_OFFSET)
-               memblock_reserve(PHYS_OFFSET, PFN_UP(ramstart) - PHYS_OFFSET);
+               memblock_reserve(PHYS_OFFSET, ramstart - PHYS_OFFSET);
 
        if (PFN_UP(ramstart) > ARCH_PFN_OFFSET) {
                pr_info("Wasting %lu bytes for tracking %lu unused pages\n",
index 76fae9b..712c15d 100644 (file)
@@ -464,10 +464,10 @@ static void bmips_wr_vec(unsigned long dst, char *start, char *end)
 
 static inline void bmips_nmi_handler_setup(void)
 {
-       bmips_wr_vec(BMIPS_NMI_RESET_VEC, &bmips_reset_nmi_vec,
-               &bmips_reset_nmi_vec_end);
-       bmips_wr_vec(BMIPS_WARM_RESTART_VEC, &bmips_smp_int_vec,
-               &bmips_smp_int_vec_end);
+       bmips_wr_vec(BMIPS_NMI_RESET_VEC, bmips_reset_nmi_vec,
+               bmips_reset_nmi_vec_end);
+       bmips_wr_vec(BMIPS_WARM_RESTART_VEC, bmips_smp_int_vec,
+               bmips_smp_int_vec_end);
 }
 
 struct reset_vec_info {
index b0e25e9..3f16f38 100644 (file)
@@ -80,6 +80,7 @@ SYSCALL_DEFINE6(mips_mmap2, unsigned long, addr, unsigned long, len,
 
 save_static_function(sys_fork);
 save_static_function(sys_clone);
+save_static_function(sys_clone3);
 
 SYSCALL_DEFINE1(set_thread_area, unsigned long, addr)
 {
index c9c879e..e7c5ab3 100644 (file)
 432    n32     fsmount                         sys_fsmount
 433    n32     fspick                          sys_fspick
 434    n32     pidfd_open                      sys_pidfd_open
-# 435 reserved for clone3
+435    n32     clone3                          __sys_clone3
index bbce915..13cd665 100644 (file)
 432    n64     fsmount                         sys_fsmount
 433    n64     fspick                          sys_fspick
 434    n64     pidfd_open                      sys_pidfd_open
-# 435 reserved for clone3
+435    n64     clone3                          __sys_clone3
index 9653591..353539e 100644 (file)
 432    o32     fsmount                         sys_fsmount
 433    o32     fspick                          sys_fspick
 434    o32     pidfd_open                      sys_pidfd_open
-# 435 reserved for clone3
+435    o32     clone3                          __sys_clone3
index c1a4d4d..9f79908 100644 (file)
@@ -66,6 +66,10 @@ else
       $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
 endif
 
+# Some -march= flags enable MMI instructions, and GCC complains about that
+# support being enabled alongside -msoft-float. Thus explicitly disable MMI.
+cflags-y += $(call cc-option,-mno-loongson-mmi)
+
 #
 # Loongson Machines' Support
 #
index 4abb92e..4254ac4 100644 (file)
@@ -3,6 +3,7 @@
  */
 #include <linux/fs.h>
 #include <linux/fcntl.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 
 #include <asm/bootinfo.h>
@@ -64,24 +65,22 @@ void __init prom_init_memory(void)
                node_id = loongson_memmap->map[i].node_id;
                mem_type = loongson_memmap->map[i].mem_type;
 
-               if (node_id == 0) {
-                       switch (mem_type) {
-                       case SYSTEM_RAM_LOW:
-                               add_memory_region(loongson_memmap->map[i].mem_start,
-                                       (u64)loongson_memmap->map[i].mem_size << 20,
-                                       BOOT_MEM_RAM);
-                               break;
-                       case SYSTEM_RAM_HIGH:
-                               add_memory_region(loongson_memmap->map[i].mem_start,
-                                       (u64)loongson_memmap->map[i].mem_size << 20,
-                                       BOOT_MEM_RAM);
-                               break;
-                       case SYSTEM_RAM_RESERVED:
-                               add_memory_region(loongson_memmap->map[i].mem_start,
-                                       (u64)loongson_memmap->map[i].mem_size << 20,
-                                       BOOT_MEM_RESERVED);
-                               break;
-                       }
+               if (node_id != 0)
+                       continue;
+
+               switch (mem_type) {
+               case SYSTEM_RAM_LOW:
+                       memblock_add(loongson_memmap->map[i].mem_start,
+                               (u64)loongson_memmap->map[i].mem_size << 20);
+                       break;
+               case SYSTEM_RAM_HIGH:
+                       memblock_add(loongson_memmap->map[i].mem_start,
+                               (u64)loongson_memmap->map[i].mem_size << 20);
+                       break;
+               case SYSTEM_RAM_RESERVED:
+                       memblock_reserve(loongson_memmap->map[i].mem_start,
+                               (u64)loongson_memmap->map[i].mem_size << 20);
+                       break;
                }
        }
 }
index ffefc1c..98c3a7f 100644 (file)
@@ -110,7 +110,7 @@ static int __init serial_init(void)
 }
 module_init(serial_init);
 
-static void __init serial_exit(void)
+static void __exit serial_exit(void)
 {
        platform_device_unregister(&uart8250_device);
 }
index 414e97d..8f20d2c 100644 (file)
@@ -142,8 +142,6 @@ static void __init szmem(unsigned int node)
                                (u32)node_id, mem_type, mem_start, mem_size);
                        pr_info("       start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
                                start_pfn, end_pfn, num_physpages);
-                       add_memory_region((node_id << 44) + mem_start,
-                               (u64)mem_size << 20, BOOT_MEM_RAM);
                        memblock_add_node(PFN_PHYS(start_pfn),
                                PFN_PHYS(end_pfn - start_pfn), node);
                        break;
@@ -156,16 +154,12 @@ static void __init szmem(unsigned int node)
                                (u32)node_id, mem_type, mem_start, mem_size);
                        pr_info("       start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
                                start_pfn, end_pfn, num_physpages);
-                       add_memory_region((node_id << 44) + mem_start,
-                               (u64)mem_size << 20, BOOT_MEM_RAM);
                        memblock_add_node(PFN_PHYS(start_pfn),
                                PFN_PHYS(end_pfn - start_pfn), node);
                        break;
                case SYSTEM_RAM_RESERVED:
                        pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
                                (u32)node_id, mem_type, mem_start, mem_size);
-                       add_memory_region((node_id << 44) + mem_start,
-                               (u64)mem_size << 20, BOOT_MEM_RESERVED);
                        memblock_reserve(((node_id << 44) + mem_start),
                                mem_size << 20);
                        break;
@@ -191,8 +185,6 @@ static void __init node_mem_init(unsigned int node)
        NODE_DATA(node)->node_start_pfn = start_pfn;
        NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn;
 
-       free_bootmem_with_active_regions(node, end_pfn);
-
        if (node == 0) {
                /* kernel end address */
                unsigned long kernel_end_pfn = PFN_UP(__pa_symbol(&_end));
@@ -209,8 +201,6 @@ static void __init node_mem_init(unsigned int node)
                        memblock_reserve((node_addrspace_offset | 0xfe000000),
                                         32 << 20);
        }
-
-       sparse_memory_present_with_active_regions(node);
 }
 
 static __init void prom_meminit(void)
@@ -227,6 +217,7 @@ static __init void prom_meminit(void)
                        cpumask_clear(&__node_data[(node)]->cpumask);
                }
        }
+       memblocks_present();
        max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
 
        for (cpu = 0; cpu < loongson_sysconf.nr_cpus; cpu++) {
index d79f2b4..00fe90c 100644 (file)
 unsigned long shm_align_mask = PAGE_SIZE - 1;  /* Sane caches */
 EXPORT_SYMBOL(shm_align_mask);
 
-/* gap between mmap and stack */
-#define MIN_GAP (128*1024*1024UL)
-#define MAX_GAP ((TASK_SIZE)/6*5)
-
-static int mmap_is_legacy(struct rlimit *rlim_stack)
-{
-       if (current->personality & ADDR_COMPAT_LAYOUT)
-               return 1;
-
-       if (rlim_stack->rlim_cur == RLIM_INFINITY)
-               return 1;
-
-       return sysctl_legacy_va_layout;
-}
-
-static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
-{
-       unsigned long gap = rlim_stack->rlim_cur;
-
-       if (gap < MIN_GAP)
-               gap = MIN_GAP;
-       else if (gap > MAX_GAP)
-               gap = MAX_GAP;
-
-       return PAGE_ALIGN(TASK_SIZE - gap - rnd);
-}
-
 #define COLOUR_ALIGN(addr, pgoff)                              \
        ((((addr) + shm_align_mask) & ~shm_align_mask) +        \
         (((pgoff) << PAGE_SHIFT) & shm_align_mask))
@@ -144,63 +117,6 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
                        addr0, len, pgoff, flags, DOWN);
 }
 
-unsigned long arch_mmap_rnd(void)
-{
-       unsigned long rnd;
-
-#ifdef CONFIG_COMPAT
-       if (TASK_IS_32BIT_ADDR)
-               rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
-       else
-#endif /* CONFIG_COMPAT */
-               rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
-
-       return rnd << PAGE_SHIFT;
-}
-
-void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
-{
-       unsigned long random_factor = 0UL;
-
-       if (current->flags & PF_RANDOMIZE)
-               random_factor = arch_mmap_rnd();
-
-       if (mmap_is_legacy(rlim_stack)) {
-               mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
-               mm->get_unmapped_area = arch_get_unmapped_area;
-       } else {
-               mm->mmap_base = mmap_base(random_factor, rlim_stack);
-               mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-       }
-}
-
-static inline unsigned long brk_rnd(void)
-{
-       unsigned long rnd = get_random_long();
-
-       rnd = rnd << PAGE_SHIFT;
-       /* 8MB for 32bit, 256MB for 64bit */
-       if (TASK_IS_32BIT_ADDR)
-               rnd = rnd & 0x7ffffful;
-       else
-               rnd = rnd & 0xffffffful;
-
-       return rnd;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-       unsigned long base = mm->brk;
-       unsigned long ret;
-
-       ret = PAGE_ALIGN(base + brk_rnd());
-
-       if (ret < mm->brk)
-               return mm->brk;
-
-       return ret;
-}
-
 bool __virt_addr_valid(const volatile void *kaddr)
 {
        unsigned long vaddr = (unsigned long)kaddr;
index e01cb33..41bb91f 100644 (file)
@@ -653,6 +653,13 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r,
                                   int restore_scratch)
 {
        if (restore_scratch) {
+               /*
+                * Ensure the MFC0 below observes the value written to the
+                * KScratch register by the prior MTC0.
+                */
+               if (scratch_reg >= 0)
+                       uasm_i_ehb(p);
+
                /* Reset default page size */
                if (PM_DEFAULT_MASK >> 16) {
                        uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16);
@@ -667,12 +674,10 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r,
                        uasm_i_mtc0(p, 0, C0_PAGEMASK);
                        uasm_il_b(p, r, lid);
                }
-               if (scratch_reg >= 0) {
-                       uasm_i_ehb(p);
+               if (scratch_reg >= 0)
                        UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
-               } else {
+               else
                        UASM_i_LW(p, 1, scratchpad_offset(0), 0);
-               }
        } else {
                /* Reset default page size */
                if (PM_DEFAULT_MASK >> 16) {
@@ -921,6 +926,10 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
        }
        if (mode != not_refill && check_for_high_segbits) {
                uasm_l_large_segbits_fault(l, *p);
+
+               if (mode == refill_scratch && scratch_reg >= 0)
+                       uasm_i_ehb(p);
+
                /*
                 * We get here if we are an xsseg address, or if we are
                 * an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary.
@@ -939,12 +948,10 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
                uasm_i_jr(p, ptr);
 
                if (mode == refill_scratch) {
-                       if (scratch_reg >= 0) {
-                               uasm_i_ehb(p);
+                       if (scratch_reg >= 0)
                                UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
-                       } else {
+                       else
                                UASM_i_LW(p, 1, scratchpad_offset(0), 0);
-                       }
                } else {
                        uasm_i_nop(p);
                }
index dfb5279..800a21b 100644 (file)
@@ -61,6 +61,7 @@ int init_debug = 1;
 /* memory blocks */
 struct prom_pmemblock mdesc[PROM_MAX_PMEMBLOCKS];
 
+#define MAX_PROM_MEM 5
 static phys_addr_t prom_mem_base[MAX_PROM_MEM] __initdata;
 static phys_addr_t prom_mem_size[MAX_PROM_MEM] __initdata;
 static unsigned int nr_prom_mem __initdata;
@@ -358,7 +359,7 @@ void __init prom_meminit(void)
                p++;
 
                if (type == BOOT_MEM_ROM_DATA) {
-                       if (nr_prom_mem >= 5) {
+                       if (nr_prom_mem >= MAX_PROM_MEM) {
                                pr_err("Too many ROM DATA regions");
                                continue;
                        }
@@ -377,7 +378,6 @@ void __init prom_free_prom_memory(void)
        char    *ptr;
        int     len = 0;
        int     i;
-       unsigned long addr;
 
        /*
         * preserve environment variables and command line from pmon/bbload
index 69cfa0a..996a934 100644 (file)
@@ -15,6 +15,7 @@ ccflags-vdso := \
        $(filter -mmicromips,$(KBUILD_CFLAGS)) \
        $(filter -march=%,$(KBUILD_CFLAGS)) \
        $(filter -m%-float,$(KBUILD_CFLAGS)) \
+       $(filter -mno-loongson-%,$(KBUILD_CFLAGS)) \
        -D__VDSO__
 
 ifdef CONFIG_CC_IS_CLANG
@@ -59,7 +60,7 @@ CFLAGS_REMOVE_vgettimeofday.o = -pg
 ifndef CONFIG_CPU_MIPSR6
   ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
     $(warning MIPS VDSO requires binutils >= 2.25)
-    obj-vdso-y := $(filter-out gettimeofday.o, $(obj-vdso-y))
+    obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y))
     ccflags-vdso += -DDISABLE_MIPS_VDSO
   endif
 endif
diff --git a/arch/mips/vdso/gettimeofday.c b/arch/mips/vdso/gettimeofday.c
deleted file mode 100644 (file)
index e8243c7..0000000
+++ /dev/null
@@ -1,269 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2015 Imagination Technologies
- * Author: Alex Smith <alex.smith@imgtec.com>
- */
-
-#include "vdso.h"
-
-#include <linux/compiler.h>
-#include <linux/time.h>
-
-#include <asm/clocksource.h>
-#include <asm/io.h>
-#include <asm/unistd.h>
-#include <asm/vdso.h>
-
-#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
-
-static __always_inline long gettimeofday_fallback(struct timeval *_tv,
-                                         struct timezone *_tz)
-{
-       register struct timezone *tz asm("a1") = _tz;
-       register struct timeval *tv asm("a0") = _tv;
-       register long ret asm("v0");
-       register long nr asm("v0") = __NR_gettimeofday;
-       register long error asm("a3");
-
-       asm volatile(
-       "       syscall\n"
-       : "=r" (ret), "=r" (error)
-       : "r" (tv), "r" (tz), "r" (nr)
-       : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-         "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-
-       return error ? -ret : ret;
-}
-
-#endif
-
-static __always_inline long clock_gettime_fallback(clockid_t _clkid,
-                                          struct timespec *_ts)
-{
-       register struct timespec *ts asm("a1") = _ts;
-       register clockid_t clkid asm("a0") = _clkid;
-       register long ret asm("v0");
-       register long nr asm("v0") = __NR_clock_gettime;
-       register long error asm("a3");
-
-       asm volatile(
-       "       syscall\n"
-       : "=r" (ret), "=r" (error)
-       : "r" (clkid), "r" (ts), "r" (nr)
-       : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-         "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-
-       return error ? -ret : ret;
-}
-
-static __always_inline int do_realtime_coarse(struct timespec *ts,
-                                             const union mips_vdso_data *data)
-{
-       u32 start_seq;
-
-       do {
-               start_seq = vdso_data_read_begin(data);
-
-               ts->tv_sec = data->xtime_sec;
-               ts->tv_nsec = data->xtime_nsec >> data->cs_shift;
-       } while (vdso_data_read_retry(data, start_seq));
-
-       return 0;
-}
-
-static __always_inline int do_monotonic_coarse(struct timespec *ts,
-                                              const union mips_vdso_data *data)
-{
-       u32 start_seq;
-       u64 to_mono_sec;
-       u64 to_mono_nsec;
-
-       do {
-               start_seq = vdso_data_read_begin(data);
-
-               ts->tv_sec = data->xtime_sec;
-               ts->tv_nsec = data->xtime_nsec >> data->cs_shift;
-
-               to_mono_sec = data->wall_to_mono_sec;
-               to_mono_nsec = data->wall_to_mono_nsec;
-       } while (vdso_data_read_retry(data, start_seq));
-
-       ts->tv_sec += to_mono_sec;
-       timespec_add_ns(ts, to_mono_nsec);
-
-       return 0;
-}
-
-#ifdef CONFIG_CSRC_R4K
-
-static __always_inline u64 read_r4k_count(void)
-{
-       unsigned int count;
-
-       __asm__ __volatile__(
-       "       .set push\n"
-       "       .set mips32r2\n"
-       "       rdhwr   %0, $2\n"
-       "       .set pop\n"
-       : "=r" (count));
-
-       return count;
-}
-
-#endif
-
-#ifdef CONFIG_CLKSRC_MIPS_GIC
-
-static __always_inline u64 read_gic_count(const union mips_vdso_data *data)
-{
-       void __iomem *gic = get_gic(data);
-       u32 hi, hi2, lo;
-
-       do {
-               hi = __raw_readl(gic + sizeof(lo));
-               lo = __raw_readl(gic);
-               hi2 = __raw_readl(gic + sizeof(lo));
-       } while (hi2 != hi);
-
-       return (((u64)hi) << 32) + lo;
-}
-
-#endif
-
-static __always_inline u64 get_ns(const union mips_vdso_data *data)
-{
-       u64 cycle_now, delta, nsec;
-
-       switch (data->clock_mode) {
-#ifdef CONFIG_CSRC_R4K
-       case VDSO_CLOCK_R4K:
-               cycle_now = read_r4k_count();
-               break;
-#endif
-#ifdef CONFIG_CLKSRC_MIPS_GIC
-       case VDSO_CLOCK_GIC:
-               cycle_now = read_gic_count(data);
-               break;
-#endif
-       default:
-               return 0;
-       }
-
-       delta = (cycle_now - data->cs_cycle_last) & data->cs_mask;
-
-       nsec = (delta * data->cs_mult) + data->xtime_nsec;
-       nsec >>= data->cs_shift;
-
-       return nsec;
-}
-
-static __always_inline int do_realtime(struct timespec *ts,
-                                      const union mips_vdso_data *data)
-{
-       u32 start_seq;
-       u64 ns;
-
-       do {
-               start_seq = vdso_data_read_begin(data);
-
-               if (data->clock_mode == VDSO_CLOCK_NONE)
-                       return -ENOSYS;
-
-               ts->tv_sec = data->xtime_sec;
-               ns = get_ns(data);
-       } while (vdso_data_read_retry(data, start_seq));
-
-       ts->tv_nsec = 0;
-       timespec_add_ns(ts, ns);
-
-       return 0;
-}
-
-static __always_inline int do_monotonic(struct timespec *ts,
-                                       const union mips_vdso_data *data)
-{
-       u32 start_seq;
-       u64 ns;
-       u64 to_mono_sec;
-       u64 to_mono_nsec;
-
-       do {
-               start_seq = vdso_data_read_begin(data);
-
-               if (data->clock_mode == VDSO_CLOCK_NONE)
-                       return -ENOSYS;
-
-               ts->tv_sec = data->xtime_sec;
-               ns = get_ns(data);
-
-               to_mono_sec = data->wall_to_mono_sec;
-               to_mono_nsec = data->wall_to_mono_nsec;
-       } while (vdso_data_read_retry(data, start_seq));
-
-       ts->tv_sec += to_mono_sec;
-       ts->tv_nsec = 0;
-       timespec_add_ns(ts, ns + to_mono_nsec);
-
-       return 0;
-}
-
-#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
-
-/*
- * This is behind the ifdef so that we don't provide the symbol when there's no
- * possibility of there being a usable clocksource, because there's nothing we
- * can do without it. When libc fails the symbol lookup it should fall back on
- * the standard syscall path.
- */
-int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
-{
-       const union mips_vdso_data *data = get_vdso_data();
-       struct timespec ts;
-       int ret;
-
-       ret = do_realtime(&ts, data);
-       if (ret)
-               return gettimeofday_fallback(tv, tz);
-
-       if (tv) {
-               tv->tv_sec = ts.tv_sec;
-               tv->tv_usec = ts.tv_nsec / 1000;
-       }
-
-       if (tz) {
-               tz->tz_minuteswest = data->tz_minuteswest;
-               tz->tz_dsttime = data->tz_dsttime;
-       }
-
-       return 0;
-}
-
-#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */
-
-int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts)
-{
-       const union mips_vdso_data *data = get_vdso_data();
-       int ret = -1;
-
-       switch (clkid) {
-       case CLOCK_REALTIME_COARSE:
-               ret = do_realtime_coarse(ts, data);
-               break;
-       case CLOCK_MONOTONIC_COARSE:
-               ret = do_monotonic_coarse(ts, data);
-               break;
-       case CLOCK_REALTIME:
-               ret = do_realtime(ts, data);
-               break;
-       case CLOCK_MONOTONIC:
-               ret = do_monotonic(ts, data);
-               break;
-       default:
-               break;
-       }
-
-       if (ret)
-               ret = clock_gettime_fallback(clkid, ts);
-
-       return ret;
-}
index e78b43d..37125e6 100644 (file)
@@ -23,8 +23,6 @@
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
-#define check_pgt_cache()              do { } while (0)
-
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
        pgtable_t pte;
index c70cc56..0588ec9 100644 (file)
@@ -403,8 +403,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
  * into virtual address `from'
  */
 
-#define pgtable_cache_init()       do { } while (0)
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASMNDS32_PGTABLE_H */
index 4bc8cf7..0b146d7 100644 (file)
@@ -41,10 +41,8 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 #define __pte_free_tlb(tlb, pte, addr)                         \
        do {                                                    \
-               pgtable_page_dtor(pte);                         \
+               pgtable_pte_page_dtor(pte);                     \
                tlb_remove_page((tlb), (pte));                  \
        } while (0)
 
-#define check_pgt_cache()      do { } while (0)
-
 #endif /* _ASM_NIOS2_PGALLOC_H */
index 95237b7..99985d8 100644 (file)
@@ -291,8 +291,6 @@ static inline void pte_clear(struct mm_struct *mm,
 
 #include <asm-generic/pgtable.h>
 
-#define pgtable_cache_init()           do { } while (0)
-
 extern void __init paging_init(void);
 extern void __init mmu_init(void);
 
index 6bbd4ae..4cf35b0 100644 (file)
@@ -123,7 +123,7 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6,
                dtb_passed = r6;
 
                if (r7)
-                       strncpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE);
+                       strlcpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE);
        }
 #endif
 
@@ -131,10 +131,10 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6,
 
 #ifndef CONFIG_CMDLINE_FORCE
        if (cmdline_passed[0])
-               strncpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE);
+               strlcpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE);
 #ifdef CONFIG_NIOS2_CMDLINE_IGNORE_DTB
        else
-               strncpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+               strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
 #endif
 #endif
 
index 3d4b397..da12a4c 100644 (file)
@@ -75,7 +75,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm)
        if (!pte)
                return NULL;
        clear_page(page_address(pte));
-       if (!pgtable_page_ctor(pte)) {
+       if (!pgtable_pte_page_ctor(pte)) {
                __free_page(pte);
                return NULL;
        }
@@ -89,18 +89,16 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
-       pgtable_page_dtor(pte);
+       pgtable_pte_page_dtor(pte);
        __free_page(pte);
 }
 
 #define __pte_free_tlb(tlb, pte, addr) \
 do {                                   \
-       pgtable_page_dtor(pte);         \
+       pgtable_pte_page_dtor(pte);     \
        tlb_remove_page((tlb), (pte));  \
 } while (0)
 
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
-#define check_pgt_cache()          do { } while (0)
-
 #endif
index 2fe9ff5..248d22d 100644 (file)
@@ -443,11 +443,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 
 #include <asm-generic/pgtable.h>
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()           do { } while (0)
-
 typedef pte_t *pte_addr_t;
 
 #endif /* __ASSEMBLY__ */
index 73ca89a..e5de3f8 100644 (file)
@@ -22,7 +22,7 @@
 
 #define ARCH_DMA_MINALIGN      L1_CACHE_BYTES
 
-#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+#define __read_mostly __section(.data..read_mostly)
 
 void parisc_cache_init(void);  /* initializes cache-flushing */
 void disable_sr_hashing_asm(int); /* low level support for above */
index 3eb4bfc..e080143 100644 (file)
@@ -52,7 +52,7 @@
 })
 
 #ifdef CONFIG_SMP
-# define __lock_aligned __attribute__((__section__(".data..lock_aligned")))
+# define __lock_aligned __section(.data..lock_aligned)
 #endif
 
 #endif /* __PARISC_LDCW_H */
index 4f2059a..d98647c 100644 (file)
@@ -124,6 +124,4 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
        pmd_populate_kernel(mm, pmd, page_address(pte_page))
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
-#define check_pgt_cache()      do { } while (0)
-
 #endif
index 6d58c17..4ac374b 100644 (file)
@@ -132,8 +132,6 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 #define PTRS_PER_PTE    (1UL << BITS_PER_PTE)
 
 /* Definitions for 2nd level */
-#define pgtable_cache_init()   do { } while (0)
-
 #define PMD_SHIFT       (PLD_SHIFT + BITS_PER_PTE)
 #define PMD_SIZE       (1UL << PMD_SHIFT)
 #define PMD_MASK       (~(PMD_SIZE-1))
index c98162f..6fd8871 100644 (file)
@@ -48,6 +48,9 @@
 #define MADV_DONTFORK  10              /* don't inherit across fork */
 #define MADV_DOFORK    11              /* do inherit across fork */
 
+#define MADV_COLD      20              /* deactivate these pages */
+#define MADV_PAGEOUT   21              /* reclaim these pages */
+
 #define MADV_MERGEABLE   65            /* KSM may merge identical pages */
 #define MADV_UNMERGEABLE 66            /* KSM may not merge identical pages */
 
index 1d1d748..b96d744 100644 (file)
@@ -2125,7 +2125,7 @@ ftrace_regs_caller:
        copy    %rp, %r26
        LDREG   -FTRACE_FRAME_SIZE-PT_SZ_ALGN(%sp), %r25
        ldo     -8(%r25), %r25
-       copy    %r3, %arg2
+       ldo     -FTRACE_FRAME_SIZE(%r1), %arg2
        b,l     ftrace_function_trampoline, %rp
        copy    %r1, %arg3 /* struct pt_regs */
 
index 92a9b5f..f29f682 100644 (file)
@@ -3,7 +3,7 @@
  * arch/parisc/mm/ioremap.c
  *
  * (C) Copyright 1995 1996 Linus Torvalds
- * (C) Copyright 2001-2006 Helge Deller <deller@gmx.de>
+ * (C) Copyright 2001-2019 Helge Deller <deller@gmx.de>
  * (C) Copyright 2005 Kyle McMartin <kyle@parisc-linux.org>
  */
 
@@ -84,7 +84,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
        addr = (void __iomem *) area->addr;
        if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
                               phys_addr, pgprot)) {
-               vfree(addr);
+               vunmap(addr);
                return NULL;
        }
 
@@ -92,9 +92,11 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 }
 EXPORT_SYMBOL(__ioremap);
 
-void iounmap(const volatile void __iomem *addr)
+void iounmap(const volatile void __iomem *io_addr)
 {
-       if (addr > high_memory)
-               return vfree((void *) (PAGE_MASK & (unsigned long __force) addr));
+       unsigned long addr = (unsigned long)io_addr & PAGE_MASK;
+
+       if (is_vmalloc_addr((void *)addr))
+               vunmap((void *)addr);
 }
 EXPORT_SYMBOL(iounmap);
index 6841bd5..dfbd7f2 100644 (file)
@@ -50,7 +50,7 @@ endif
 
 BOOTAFLAGS     := -D__ASSEMBLY__ $(BOOTCFLAGS) -nostdinc
 
-BOOTARFLAGS    := -cr$(KBUILD_ARFLAGS)
+BOOTARFLAGS    := -crD
 
 ifdef CONFIG_CC_IS_CLANG
 BOOTCFLAGS += $(CLANG_FLAGS)
index 677e9ba..f9dc597 100644 (file)
@@ -91,6 +91,7 @@
 
 static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
 {
+       addr &= 0xf0000000;     /* align addr to start of segment */
        barrier();      /* make sure thread.kuap is updated before playing with SRs */
        while (addr < end) {
                mtsrin(sr, addr);
index 574eca3..d97db3a 100644 (file)
@@ -254,7 +254,13 @@ extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
 extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
                                      unsigned long addr, pmd_t *pmdp);
-extern int radix__has_transparent_hugepage(void);
+static inline int radix__has_transparent_hugepage(void)
+{
+       /* For radix 2M at PMD level means thp */
+       if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT)
+               return 1;
+       return 0;
+}
 #endif
 
 extern int __meminit radix__vmemmap_create_mapping(unsigned long start,
index 4ce795d..ca8db19 100644 (file)
@@ -35,6 +35,10 @@ static inline void radix__flush_all_lpid(unsigned int lpid)
 {
        WARN_ON(1);
 }
+static inline void radix__flush_all_lpid_guest(unsigned int lpid)
+{
+       WARN_ON(1);
+}
 #endif
 
 extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma,
index a1ebcbc..cf00ff0 100644 (file)
@@ -209,8 +209,9 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTR_POWER9_DD2_1           LONG_ASM_CONST(0x0000080000000000)
 #define CPU_FTR_P9_TM_HV_ASSIST                LONG_ASM_CONST(0x0000100000000000)
 #define CPU_FTR_P9_TM_XER_SO_BUG       LONG_ASM_CONST(0x0000200000000000)
-#define CPU_FTR_P9_TLBIE_BUG           LONG_ASM_CONST(0x0000400000000000)
+#define CPU_FTR_P9_TLBIE_STQ_BUG       LONG_ASM_CONST(0x0000400000000000)
 #define CPU_FTR_P9_TIDR                        LONG_ASM_CONST(0x0000800000000000)
+#define CPU_FTR_P9_TLBIE_ERAT_BUG      LONG_ASM_CONST(0x0001000000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -457,7 +458,7 @@ static inline void cpu_feature_keys_init(void) { }
            CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
            CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
            CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
-           CPU_FTR_P9_TLBIE_BUG | CPU_FTR_P9_TIDR)
+           CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR)
 #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
 #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
 #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
index 409c9bf..57c229a 100644 (file)
@@ -175,4 +175,7 @@ do {                                                                        \
        ARCH_DLINFO_CACHE_GEOMETRY;                                     \
 } while (0)
 
+/* Relocate the kernel image to @final_address */
+void relocate(unsigned long final_address);
+
 #endif /* _ASM_POWERPC_ELF_H */
index 8e8514e..ee62776 100644 (file)
@@ -452,9 +452,100 @@ static inline u32 kvmppc_get_xics_latch(void)
        return xirr;
 }
 
-static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+/*
+ * To avoid the need to unnecessarily exit fully to the host kernel, an IPI to
+ * a CPU thread that's running/napping inside of a guest is by default regarded
+ * as a request to wake the CPU (if needed) and continue execution within the
+ * guest, potentially to process new state like externally-generated
+ * interrupts or IPIs sent from within the guest itself (e.g. H_PROD/H_IPI).
+ *
+ * To force an exit to the host kernel, kvmppc_set_host_ipi() must be called
+ * prior to issuing the IPI to set the corresponding 'host_ipi' flag in the
+ * target CPU's PACA. To avoid unnecessary exits to the host, this flag should
+ * be immediately cleared via kvmppc_clear_host_ipi() by the IPI handler on
+ * the receiving side prior to processing the IPI work.
+ *
+ * NOTE:
+ *
+ * We currently issue an smp_mb() at the beginning of kvmppc_set_host_ipi().
+ * This is to guard against sequences such as the following:
+ *
+ *      CPU
+ *        X: smp_muxed_ipi_set_message():
+ *        X:   smp_mb()
+ *        X:   message[RESCHEDULE] = 1
+ *        X: doorbell_global_ipi(42):
+ *        X:   kvmppc_set_host_ipi(42)
+ *        X:   ppc_msgsnd_sync()/smp_mb()
+ *        X:   ppc_msgsnd() -> 42
+ *       42: doorbell_exception(): // from CPU X
+ *       42:   ppc_msgsync()
+ *      105: smp_muxed_ipi_set_message():
+ *      105:   smb_mb()
+ *           // STORE DEFERRED DUE TO RE-ORDERING
+ *    --105:   message[CALL_FUNCTION] = 1
+ *    | 105: doorbell_global_ipi(42):
+ *    | 105:   kvmppc_set_host_ipi(42)
+ *    |  42:   kvmppc_clear_host_ipi(42)
+ *    |  42: smp_ipi_demux_relaxed()
+ *    |  42: // returns to executing guest
+ *    |      // RE-ORDERED STORE COMPLETES
+ *    ->105:   message[CALL_FUNCTION] = 1
+ *      105:   ppc_msgsnd_sync()/smp_mb()
+ *      105:   ppc_msgsnd() -> 42
+ *       42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+ *      105: // hangs waiting on 42 to process messages/call_single_queue
+ *
+ * We also issue an smp_mb() at the end of kvmppc_clear_host_ipi(). This is
+ * to guard against sequences such as the following (as well as to create
+ * a read-side pairing with the barrier in kvmppc_set_host_ipi()):
+ *
+ *      CPU
+ *        X: smp_muxed_ipi_set_message():
+ *        X:   smp_mb()
+ *        X:   message[RESCHEDULE] = 1
+ *        X: doorbell_global_ipi(42):
+ *        X:   kvmppc_set_host_ipi(42)
+ *        X:   ppc_msgsnd_sync()/smp_mb()
+ *        X:   ppc_msgsnd() -> 42
+ *       42: doorbell_exception(): // from CPU X
+ *       42:   ppc_msgsync()
+ *           // STORE DEFERRED DUE TO RE-ORDERING
+ *    -- 42:   kvmppc_clear_host_ipi(42)
+ *    |  42: smp_ipi_demux_relaxed()
+ *    | 105: smp_muxed_ipi_set_message():
+ *    | 105:   smb_mb()
+ *    | 105:   message[CALL_FUNCTION] = 1
+ *    | 105: doorbell_global_ipi(42):
+ *    | 105:   kvmppc_set_host_ipi(42)
+ *    |      // RE-ORDERED STORE COMPLETES
+ *    -> 42:   kvmppc_clear_host_ipi(42)
+ *       42: // returns to executing guest
+ *      105:   ppc_msgsnd_sync()/smp_mb()
+ *      105:   ppc_msgsnd() -> 42
+ *       42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+ *      105: // hangs waiting on 42 to process messages/call_single_queue
+ */
+static inline void kvmppc_set_host_ipi(int cpu)
 {
-       paca_ptrs[cpu]->kvm_hstate.host_ipi = host_ipi;
+       /*
+        * order stores of IPI messages vs. setting of host_ipi flag
+        *
+        * pairs with the barrier in kvmppc_clear_host_ipi()
+        */
+       smp_mb();
+       paca_ptrs[cpu]->kvm_hstate.host_ipi = 1;
+}
+
+static inline void kvmppc_clear_host_ipi(int cpu)
+{
+       paca_ptrs[cpu]->kvm_hstate.host_ipi = 0;
+       /*
+        * order clearing of host_ipi flag vs. processing of IPI messages
+        *
+        * pairs with the barrier in kvmppc_set_host_ipi()
+        */
+       smp_mb();
 }
 
 static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
@@ -486,7 +577,10 @@ static inline u32 kvmppc_get_xics_latch(void)
        return 0;
 }
 
-static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+static inline void kvmppc_set_host_ipi(int cpu)
+{}
+
+static inline void kvmppc_clear_host_ipi(int cpu)
 {}
 
 static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
index 2372d35..327567b 100644 (file)
@@ -112,8 +112,6 @@ extern pgprot_t     pci_phys_mem_access_prot(struct file *file,
                                         unsigned long size,
                                         pgprot_t prot);
 
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
-
 extern resource_size_t pcibios_io_space_offset(struct pci_controller *hose);
 extern void pcibios_setup_bus_devices(struct pci_bus *bus);
 extern void pcibios_setup_bus_self(struct pci_bus *bus);
index 2b2c60a..6dd78a2 100644 (file)
@@ -64,8 +64,6 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 extern struct kmem_cache *pgtable_cache[];
 #define PGT_CACHE(shift) pgtable_cache[shift]
 
-static inline void check_pgt_cache(void) { }
-
 #ifdef CONFIG_PPC_BOOK3S
 #include <asm/book3s/pgalloc.h>
 #else
index 8b7865a..4053b2a 100644 (file)
@@ -87,7 +87,6 @@ extern unsigned long ioremap_bot;
 unsigned long vmalloc_to_phys(void *vmalloc_addr);
 
 void pgtable_cache_add(unsigned int shift);
-void pgtable_cache_init(void);
 
 #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32)
 void mark_initmem_nx(void);
index ec3714c..b3cbb11 100644 (file)
 #define   HMER_DEBUG_TRIG      (1ul << (63 - 17)) /* Debug trigger */
 #define        SPRN_HMEER      0x151   /* Hyp maintenance exception enable reg */
 #define SPRN_PCR       0x152   /* Processor compatibility register */
-#define   PCR_VEC_DIS  (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
-#define   PCR_VSX_DIS  (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
-#define   PCR_TM_DIS   (1ul << (63-2)) /* Trans. memory disable (POWER8) */
+#define   PCR_VEC_DIS  (__MASK(63-0))  /* Vec. disable (bit NA since POWER8) */
+#define   PCR_VSX_DIS  (__MASK(63-1))  /* VSX disable (bit NA since POWER8) */
+#define   PCR_TM_DIS   (__MASK(63-2))  /* Trans. memory disable (POWER8) */
+#define   PCR_HIGH_BITS        (PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS)
 /*
  * These bits are used in the function kvmppc_set_arch_compat() to specify and
  * determine both the compatibility level which we want to emulate and the
 #define   PCR_ARCH_207 0x8             /* Architecture 2.07 */
 #define   PCR_ARCH_206 0x4             /* Architecture 2.06 */
 #define   PCR_ARCH_205 0x2             /* Architecture 2.05 */
+#define   PCR_LOW_BITS (PCR_ARCH_207 | PCR_ARCH_206 | PCR_ARCH_205)
+#define   PCR_MASK     ~(PCR_HIGH_BITS | PCR_LOW_BITS) /* PCR Reserved Bits */
 #define        SPRN_HEIR       0x153   /* Hypervisor Emulated Instruction Register */
 #define SPRN_TLBINDEXR 0x154   /* P7 TLB control register */
 #define SPRN_TLBVPNR   0x155   /* P7 TLB control register */
index 3239a9f..a460298 100644 (file)
@@ -23,6 +23,7 @@ _GLOBAL(__setup_cpu_power7)
        beqlr
        li      r0,0
        mtspr   SPRN_LPID,r0
+       LOAD_REG_IMMEDIATE(r0, PCR_MASK)
        mtspr   SPRN_PCR,r0
        mfspr   r3,SPRN_LPCR
        li      r4,(LPCR_LPES1 >> LPCR_LPES_SH)
@@ -37,6 +38,7 @@ _GLOBAL(__restore_cpu_power7)
        beqlr
        li      r0,0
        mtspr   SPRN_LPID,r0
+       LOAD_REG_IMMEDIATE(r0, PCR_MASK)
        mtspr   SPRN_PCR,r0
        mfspr   r3,SPRN_LPCR
        li      r4,(LPCR_LPES1 >> LPCR_LPES_SH)
@@ -54,6 +56,7 @@ _GLOBAL(__setup_cpu_power8)
        beqlr
        li      r0,0
        mtspr   SPRN_LPID,r0
+       LOAD_REG_IMMEDIATE(r0, PCR_MASK)
        mtspr   SPRN_PCR,r0
        mfspr   r3,SPRN_LPCR
        ori     r3, r3, LPCR_PECEDH
@@ -76,6 +79,7 @@ _GLOBAL(__restore_cpu_power8)
        beqlr
        li      r0,0
        mtspr   SPRN_LPID,r0
+       LOAD_REG_IMMEDIATE(r0, PCR_MASK)
        mtspr   SPRN_PCR,r0
        mfspr   r3,SPRN_LPCR
        ori     r3, r3, LPCR_PECEDH
@@ -98,6 +102,7 @@ _GLOBAL(__setup_cpu_power9)
        mtspr   SPRN_PSSCR,r0
        mtspr   SPRN_LPID,r0
        mtspr   SPRN_PID,r0
+       LOAD_REG_IMMEDIATE(r0, PCR_MASK)
        mtspr   SPRN_PCR,r0
        mfspr   r3,SPRN_LPCR
        LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE  | LPCR_HEIC)
@@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9)
        mtspr   SPRN_PSSCR,r0
        mtspr   SPRN_LPID,r0
        mtspr   SPRN_PID,r0
+       LOAD_REG_IMMEDIATE(r0, PCR_MASK)
        mtspr   SPRN_PCR,r0
        mfspr   r3,SPRN_LPCR
        LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
index 804b1a6..f17ff12 100644 (file)
@@ -33,7 +33,7 @@ void doorbell_global_ipi(int cpu)
 {
        u32 tag = get_hard_smp_processor_id(cpu);
 
-       kvmppc_set_host_ipi(cpu, 1);
+       kvmppc_set_host_ipi(cpu);
        /* Order previous accesses vs. msgsnd, which is treated as a store */
        ppc_msgsnd_sync();
        ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -48,7 +48,7 @@ void doorbell_core_ipi(int cpu)
 {
        u32 tag = cpu_thread_in_core(cpu);
 
-       kvmppc_set_host_ipi(cpu, 1);
+       kvmppc_set_host_ipi(cpu);
        /* Order previous accesses vs. msgsnd, which is treated as a store */
        ppc_msgsnd_sync();
        ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -84,7 +84,7 @@ void doorbell_exception(struct pt_regs *regs)
 
        may_hard_irq_enable();
 
-       kvmppc_set_host_ipi(smp_processor_id(), 0);
+       kvmppc_clear_host_ipi(smp_processor_id());
        __this_cpu_inc(irq_stat.doorbell_irqs);
 
        smp_ipi_demux_relaxed(); /* already performed the barrier */
index bd95318..180b3a5 100644 (file)
@@ -101,7 +101,7 @@ static void __restore_cpu_cpufeatures(void)
        if (hv_mode) {
                mtspr(SPRN_LPID, 0);
                mtspr(SPRN_HFSCR, system_registers.hfscr);
-               mtspr(SPRN_PCR, 0);
+               mtspr(SPRN_PCR, PCR_MASK);
        }
        mtspr(SPRN_FSCR, system_registers.fscr);
 
@@ -144,6 +144,7 @@ static void __init cpufeatures_setup_cpu(void)
                mtspr(SPRN_HFSCR, 0);
        }
        mtspr(SPRN_FSCR, 0);
+       mtspr(SPRN_PCR, PCR_MASK);
 
        /*
         * LPCR does not get cleared, to match behaviour with secondaries
@@ -691,9 +692,37 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
        return true;
 }
 
+/*
+ * Handle POWER9 broadcast tlbie invalidation issue using
+ * cpu feature flag.
+ */
+static __init void update_tlbie_feature_flag(unsigned long pvr)
+{
+       if (PVR_VER(pvr) == PVR_POWER9) {
+               /*
+                * Set the tlbie feature flag for anything below
+                * Nimbus DD 2.3 and Cumulus DD 1.3
+                */
+               if ((pvr & 0xe000) == 0) {
+                       /* Nimbus */
+                       if ((pvr & 0xfff) < 0x203)
+                               cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+               } else if ((pvr & 0xc000) == 0) {
+                       /* Cumulus */
+                       if ((pvr & 0xfff) < 0x103)
+                               cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+               } else {
+                       WARN_ONCE(1, "Unknown PVR");
+                       cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+               }
+
+               cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
+       }
+}
+
 static __init void cpufeatures_cpu_quirks(void)
 {
-       int version = mfspr(SPRN_PVR);
+       unsigned long version = mfspr(SPRN_PVR);
 
        /*
         * Not all quirks can be derived from the cpufeatures device tree.
@@ -712,10 +741,10 @@ static __init void cpufeatures_cpu_quirks(void)
 
        if ((version & 0xffff0000) == 0x004e0000) {
                cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
-               cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
                cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR;
        }
 
+       update_tlbie_feature_flag(version);
        /*
         * PKEY was not in the initial base or feature node
         * specification, but it should become optional in the next
index 0a91dee..bc8a551 100644 (file)
@@ -1960,7 +1960,7 @@ static int eeh_debugfs_break_device(struct pci_dev *pdev)
        pci_err(pdev, "Going to break: %pR\n", bar);
 
        if (pdev->is_virtfn) {
-#ifndef CONFIG_IOV
+#ifndef CONFIG_PCI_IOV
                return -ENXIO;
 #else
                /*
@@ -1980,7 +1980,7 @@ static int eeh_debugfs_break_device(struct pci_dev *pdev)
                pos  = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
                pos += PCI_SRIOV_CTRL;
                bit  = PCI_SRIOV_CTRL_MSE;
-#endif /* !CONFIG_IOV */
+#endif /* !CONFIG_PCI_IOV */
        } else {
                bit = PCI_COMMAND_MEMORY;
                pos = PCI_COMMAND;
index a4e7762..100f1b5 100644 (file)
@@ -3249,7 +3249,20 @@ static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
        /* Switch to secure mode. */
        prom_printf("Switching to secure mode.\n");
 
+       /*
+        * The ultravisor will do an integrity check of the kernel image but we
+        * relocated it so the check will fail. Restore the original image by
+        * relocating it back to the kernel virtual base address.
+        */
+       if (IS_ENABLED(CONFIG_RELOCATABLE))
+               relocate(KERNELBASE);
+
        ret = enter_secure_mode(kbase, fdt);
+
+       /* Relocate the kernel again. */
+       if (IS_ENABLED(CONFIG_RELOCATABLE))
+               relocate(kbase);
+
        if (ret != U_SUCCESS) {
                prom_printf("Returned %d from switching to secure mode.\n", ret);
                prom_rtas_os_term("Switch to secure mode failed.\n");
index 78bab17..b183ab9 100644 (file)
@@ -26,7 +26,8 @@ _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
 __secondary_hold_acknowledge __secondary_hold_spinloop __start
 logo_linux_clut224 btext_prepare_BAT
 reloc_got2 kernstart_addr memstart_addr linux_banner _stext
-__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
+__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC.
+relocate"
 
 NM="$1"
 OBJ="$2"
index d7fcdfa..ec2547c 100644 (file)
@@ -36,8 +36,8 @@
 #include "book3s.h"
 #include "trace.h"
 
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
+#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
 
 /* #define EXIT_DEBUG */
 
@@ -69,8 +69,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "pthru_all",       VCPU_STAT(pthru_all) },
        { "pthru_host",      VCPU_STAT(pthru_host) },
        { "pthru_bad_aff",   VCPU_STAT(pthru_bad_aff) },
-       { "largepages_2M",    VM_STAT(num_2M_pages) },
-       { "largepages_1G",    VM_STAT(num_1G_pages) },
+       { "largepages_2M",    VM_STAT(num_2M_pages, .mode = 0444) },
+       { "largepages_1G",    VM_STAT(num_1G_pages, .mode = 0444) },
        { NULL }
 };
 
index efd8f93..709cf1f 100644 (file)
@@ -401,8 +401,11 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
 
        spin_lock(&vc->lock);
        vc->arch_compat = arch_compat;
-       /* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit */
-       vc->pcr = host_pcr_bit - guest_pcr_bit;
+       /*
+        * Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit
+        * Also set all reserved PCR bits
+        */
+       vc->pcr = (host_pcr_bit - guest_pcr_bit) | PCR_MASK;
        spin_unlock(&vc->lock);
 
        return 0;
@@ -3410,7 +3413,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
        }
 
        if (vc->pcr)
-               mtspr(SPRN_PCR, vc->pcr);
+               mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
        mtspr(SPRN_DPDES, vc->dpdes);
        mtspr(SPRN_VTB, vc->vtb);
 
@@ -3490,7 +3493,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
        vc->vtb = mfspr(SPRN_VTB);
        mtspr(SPRN_DPDES, 0);
        if (vc->pcr)
-               mtspr(SPRN_PCR, 0);
+               mtspr(SPRN_PCR, PCR_MASK);
 
        if (vc->tb_offset_applied) {
                u64 new_tb = mftb() - vc->tb_offset_applied;
index fff90f2..cdf30c6 100644 (file)
@@ -29,7 +29,7 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 {
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
-       hr->pcr = vc->pcr;
+       hr->pcr = vc->pcr | PCR_MASK;
        hr->dpdes = vc->dpdes;
        hr->hfscr = vcpu->arch.hfscr;
        hr->tb_offset = vc->tb_offset;
@@ -65,7 +65,7 @@ static void byteswap_hv_regs(struct hv_guest_state *hr)
        hr->lpid = swab32(hr->lpid);
        hr->vcpu_token = swab32(hr->vcpu_token);
        hr->lpcr = swab64(hr->lpcr);
-       hr->pcr = swab64(hr->pcr);
+       hr->pcr = swab64(hr->pcr) | PCR_MASK;
        hr->amor = swab64(hr->amor);
        hr->dpdes = swab64(hr->dpdes);
        hr->hfscr = swab64(hr->hfscr);
@@ -148,7 +148,7 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 {
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
-       vc->pcr = hr->pcr;
+       vc->pcr = hr->pcr | PCR_MASK;
        vc->dpdes = hr->dpdes;
        vcpu->arch.hfscr = hr->hfscr;
        vcpu->arch.dawr = hr->dawr0;
index 7186c65..2203054 100644 (file)
@@ -433,6 +433,37 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r)
                (HPTE_R_KEY_HI | HPTE_R_KEY_LO));
 }
 
+static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
+{
+
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+               /* Radix flush for a hash guest */
+
+               unsigned long rb,rs,prs,r,ric;
+
+               rb = PPC_BIT(52); /* IS = 2 */
+               rs = 0;  /* lpid = 0 */
+               prs = 0; /* partition scoped */
+               r = 1;   /* radix format */
+               ric = 0; /* RIC_FLSUH_TLB */
+
+               /*
+                * Need the extra ptesync to make sure we don't
+                * re-order the tlbie
+                */
+               asm volatile("ptesync": : :"memory");
+               asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+                            : : "r"(rb), "i"(r), "i"(prs),
+                              "i"(ric), "r"(rs) : "memory");
+       }
+
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+               asm volatile("ptesync": : :"memory");
+               asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
+                            "r" (rb_value), "r" (lpid));
+       }
+}
+
 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
                      long npages, int global, bool need_sync)
 {
@@ -451,16 +482,7 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
                                     "r" (rbvalues[i]), "r" (kvm->arch.lpid));
                }
 
-               if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
-                       /*
-                        * Need the extra ptesync to make sure we don't
-                        * re-order the tlbie
-                        */
-                       asm volatile("ptesync": : :"memory");
-                       asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
-                                    "r" (rbvalues[0]), "r" (kvm->arch.lpid));
-               }
-
+               fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
                asm volatile("eieio; tlbsync; ptesync" : : : "memory");
        } else {
                if (need_sync)
index 4d2ec77..287d591 100644 (file)
@@ -58,7 +58,7 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
        hcpu = hcore << threads_shift;
        kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
        smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
-       kvmppc_set_host_ipi(hcpu, 1);
+       kvmppc_set_host_ipi(hcpu);
        smp_mb();
        kvmhv_rm_send_ipi(hcpu);
 }
index 9a05b0d..faebcbb 100644 (file)
@@ -644,8 +644,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 
        /* Load guest PCR value to select appropriate compat mode */
 37:    ld      r7, VCORE_PCR(r5)
-       cmpdi   r7, 0
+       LOAD_REG_IMMEDIATE(r6, PCR_MASK)
+       cmpld   r7, r6
        beq     38f
+       or      r7, r7, r6
        mtspr   SPRN_PCR, r7
 38:
 
@@ -1913,12 +1915,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
        /* Reset PCR */
        ld      r0, VCORE_PCR(r5)
-       cmpdi   r0, 0
+       LOAD_REG_IMMEDIATE(r6, PCR_MASK)
+       cmpld   r0, r6
        beq     18f
-       li      r0, 0
-       mtspr   SPRN_PCR, r0
+       mtspr   SPRN_PCR, r6
 18:
        /* Signal secondary CPUs to continue */
+       li      r0, 0
        stb     r0,VCORE_IN_GUEST(r5)
 19:    lis     r8,0x7fff               /* MAX_INT@h */
        mtspr   SPRN_HDEC,r8
index 591bfb4..a3f9c66 100644 (file)
@@ -1217,6 +1217,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
        struct kvmppc_xive *xive = dev->private;
        struct kvmppc_xive_vcpu *xc;
        int i, r = -EBUSY;
+       u32 vp_id;
 
        pr_devel("connect_vcpu(cpu=%d)\n", cpu);
 
@@ -1228,25 +1229,32 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
                return -EPERM;
        if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
                return -EBUSY;
-       if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
-               pr_devel("Duplicate !\n");
-               return -EEXIST;
-       }
        if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
                pr_devel("Out of bounds !\n");
                return -EINVAL;
        }
-       xc = kzalloc(sizeof(*xc), GFP_KERNEL);
-       if (!xc)
-               return -ENOMEM;
 
        /* We need to synchronize with queue provisioning */
        mutex_lock(&xive->lock);
+
+       vp_id = kvmppc_xive_vp(xive, cpu);
+       if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
+               pr_devel("Duplicate !\n");
+               r = -EEXIST;
+               goto bail;
+       }
+
+       xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+       if (!xc) {
+               r = -ENOMEM;
+               goto bail;
+       }
+
        vcpu->arch.xive_vcpu = xc;
        xc->xive = xive;
        xc->vcpu = vcpu;
        xc->server_num = cpu;
-       xc->vp_id = kvmppc_xive_vp(xive, cpu);
+       xc->vp_id = vp_id;
        xc->mfrr = 0xff;
        xc->valid = true;
 
index 955b820..fe3ed50 100644 (file)
@@ -220,6 +220,18 @@ static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server)
        return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
 }
 
+static inline bool kvmppc_xive_vp_in_use(struct kvm *kvm, u32 vp_id)
+{
+       struct kvm_vcpu *vcpu = NULL;
+       int i;
+
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               if (vcpu->arch.xive_vcpu && vp_id == vcpu->arch.xive_vcpu->vp_id)
+                       return true;
+       }
+       return false;
+}
+
 /*
  * Mapping between guest priorities and host priorities
  * is as follow.
index 248c1ea..78b906f 100644 (file)
@@ -106,6 +106,7 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
        struct kvmppc_xive *xive = dev->private;
        struct kvmppc_xive_vcpu *xc = NULL;
        int rc;
+       u32 vp_id;
 
        pr_devel("native_connect_vcpu(server=%d)\n", server_num);
 
@@ -124,7 +125,8 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
 
        mutex_lock(&xive->lock);
 
-       if (kvmppc_xive_find_server(vcpu->kvm, server_num)) {
+       vp_id = kvmppc_xive_vp(xive, server_num);
+       if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
                pr_devel("Duplicate !\n");
                rc = -EEXIST;
                goto bail;
@@ -141,7 +143,7 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
        xc->vcpu = vcpu;
        xc->server_num = server_num;
 
-       xc->vp_id = kvmppc_xive_vp(xive, server_num);
+       xc->vp_id = vp_id;
        xc->valid = true;
        vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
 
index 90ab4f3..523e42e 100644 (file)
@@ -197,9 +197,32 @@ static inline unsigned long  ___tlbie(unsigned long vpn, int psize,
        return va;
 }
 
-static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize)
+static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
+                                  int apsize, int ssize)
 {
-       if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+               /* Radix flush for a hash guest */
+
+               unsigned long rb,rs,prs,r,ric;
+
+               rb = PPC_BIT(52); /* IS = 2 */
+               rs = 0;  /* lpid = 0 */
+               prs = 0; /* partition scoped */
+               r = 1;   /* radix format */
+               ric = 0; /* RIC_FLSUH_TLB */
+
+               /*
+                * Need the extra ptesync to make sure we don't
+                * re-order the tlbie
+                */
+               asm volatile("ptesync": : :"memory");
+               asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+                            : : "r"(rb), "i"(r), "i"(prs),
+                              "i"(ric), "r"(rs) : "memory");
+       }
+
+
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
                /* Need the extra ptesync to ensure we don't reorder tlbie*/
                asm volatile("ptesync": : :"memory");
                ___tlbie(vpn, psize, apsize, ssize);
@@ -283,7 +306,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,
                asm volatile("ptesync": : :"memory");
        } else {
                __tlbie(vpn, psize, apsize, ssize);
-               fixup_tlbie(vpn, psize, apsize, ssize);
+               fixup_tlbie_vpn(vpn, psize, apsize, ssize);
                asm volatile("eieio; tlbsync; ptesync": : :"memory");
        }
        if (lock_tlbie && !use_local)
@@ -856,7 +879,7 @@ static void native_flush_hash_range(unsigned long number, int local)
                /*
                 * Just do one more with the last used values.
                 */
-               fixup_tlbie(vpn, psize, psize, ssize);
+               fixup_tlbie_vpn(vpn, psize, psize, ssize);
                asm volatile("eieio; tlbsync; ptesync":::"memory");
 
                if (lock_tlbie)
index d1f390a..64733b9 100644 (file)
@@ -406,6 +406,8 @@ int hash__has_transparent_hugepage(void)
 
        return 1;
 }
+EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
+
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
index 3410ea9..6c12376 100644 (file)
@@ -1748,7 +1748,7 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
        /*
         * IF we try to do a HUGE PTE update after a withdraw is done.
         * we will find the below NULL. This happens when we do
-        * split_huge_page_pmd
+        * split_huge_pmd
         */
        if (!hpte_slot_array)
                return;
index b056cae..56cc845 100644 (file)
@@ -129,11 +129,8 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
                 * Allow to use larger than 64k IOMMU pages. Only do that
                 * if we are backed by hugetlb.
                 */
-               if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) {
-                       struct page *head = compound_head(page);
-
-                       pageshift = compound_order(head) + PAGE_SHIFT;
-               }
+               if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page))
+                       pageshift = page_shift(compound_head(page));
                mem->pageshift = min(mem->pageshift, pageshift);
                /*
                 * We don't need struct page reference any more, switch
index 2d0cb5b..0ba30b8 100644 (file)
@@ -256,8 +256,21 @@ void destroy_context(struct mm_struct *mm)
 #ifdef CONFIG_SPAPR_TCE_IOMMU
        WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
 #endif
+       /*
+        * For tasks which were successfully initialized we end up calling
+        * arch_exit_mmap() which clears the process table entry. And
+        * arch_exit_mmap() is called before the required fullmm TLB flush
+        * which does a RIC=2 flush. Hence for an initialized task, we do clear
+        * any cached process table entries.
+        *
+        * The condition below handles the error case during task init. We have
+        * set the process table entry early and if we fail a task
+        * initialization, we need to ensure the process table entry is zeroed.
+        * We need not worry about process table entry caches because the task
+        * never ran with the PID value.
+        */
        if (radix_enabled())
-               WARN_ON(process_tb[mm->context.id].prtb0 != 0);
+               process_tb[mm->context.id].prtb0 = 0;
        else
                subpage_prot_free(mm);
        destroy_contexts(&mm->context);
index 3a1fbf9..6ee17d0 100644 (file)
@@ -1027,13 +1027,6 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
        return old_pmd;
 }
 
-int radix__has_transparent_hugepage(void)
-{
-       /* For radix 2M at PMD level means thp */
-       if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT)
-               return 1;
-       return 0;
-}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
index 631be42..67af871 100644 (file)
@@ -196,22 +196,83 @@ static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid
        trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
 }
 
-static inline void fixup_tlbie(void)
+
+static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
+                                 unsigned long ap)
+{
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+               asm volatile("ptesync": : :"memory");
+               __tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
+       }
+
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+               asm volatile("ptesync": : :"memory");
+               __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
+       }
+}
+
+static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
+                                       unsigned long ap)
+{
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+               asm volatile("ptesync": : :"memory");
+               __tlbie_pid(0, RIC_FLUSH_TLB);
+       }
+
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+               asm volatile("ptesync": : :"memory");
+               __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
+       }
+}
+
+static inline void fixup_tlbie_pid(unsigned long pid)
 {
-       unsigned long pid = 0;
+       /*
+        * We can use any address for the invalidation, pick one which is
+        * probably unused as an optimisation.
+        */
        unsigned long va = ((1UL << 52) - 1);
 
-       if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+               asm volatile("ptesync": : :"memory");
+               __tlbie_pid(0, RIC_FLUSH_TLB);
+       }
+
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
                asm volatile("ptesync": : :"memory");
                __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
        }
 }
 
+
+static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
+                                      unsigned long ap)
+{
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+               asm volatile("ptesync": : :"memory");
+               __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
+       }
+
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+               asm volatile("ptesync": : :"memory");
+               __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
+       }
+}
+
 static inline void fixup_tlbie_lpid(unsigned long lpid)
 {
+       /*
+        * We can use any address for the invalidation, pick one which is
+        * probably unused as an optimisation.
+        */
        unsigned long va = ((1UL << 52) - 1);
 
-       if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+               asm volatile("ptesync": : :"memory");
+               __tlbie_lpid(0, RIC_FLUSH_TLB);
+       }
+
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
                asm volatile("ptesync": : :"memory");
                __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
        }
@@ -258,6 +319,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
        switch (ric) {
        case RIC_FLUSH_TLB:
                __tlbie_pid(pid, RIC_FLUSH_TLB);
+               fixup_tlbie_pid(pid);
                break;
        case RIC_FLUSH_PWC:
                __tlbie_pid(pid, RIC_FLUSH_PWC);
@@ -265,8 +327,8 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
        case RIC_FLUSH_ALL:
        default:
                __tlbie_pid(pid, RIC_FLUSH_ALL);
+               fixup_tlbie_pid(pid);
        }
-       fixup_tlbie();
        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
@@ -315,6 +377,7 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
        switch (ric) {
        case RIC_FLUSH_TLB:
                __tlbie_lpid(lpid, RIC_FLUSH_TLB);
+               fixup_tlbie_lpid(lpid);
                break;
        case RIC_FLUSH_PWC:
                __tlbie_lpid(lpid, RIC_FLUSH_PWC);
@@ -322,8 +385,8 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
        case RIC_FLUSH_ALL:
        default:
                __tlbie_lpid(lpid, RIC_FLUSH_ALL);
+               fixup_tlbie_lpid(lpid);
        }
-       fixup_tlbie_lpid(lpid);
        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
@@ -390,6 +453,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
 
        for (addr = start; addr < end; addr += page_size)
                __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+
+       fixup_tlbie_va_range(addr - page_size, pid, ap);
 }
 
 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -399,7 +464,7 @@ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
 
        asm volatile("ptesync": : :"memory");
        __tlbie_va(va, pid, ap, ric);
-       fixup_tlbie();
+       fixup_tlbie_va(va, pid, ap);
        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
@@ -457,7 +522,7 @@ static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
 
        asm volatile("ptesync": : :"memory");
        __tlbie_lpid_va(va, lpid, ap, ric);
-       fixup_tlbie_lpid(lpid);
+       fixup_tlbie_lpid_va(va, lpid, ap);
        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
@@ -469,7 +534,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
        if (also_pwc)
                __tlbie_pid(pid, RIC_FLUSH_PWC);
        __tlbie_va_range(start, end, pid, page_size, psize);
-       fixup_tlbie();
        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
@@ -856,7 +920,7 @@ is_local:
                        if (gflush)
                                __tlbie_va_range(gstart, gend, pid,
                                                PUD_SIZE, MMU_PAGE_1G);
-                       fixup_tlbie();
+
                        asm volatile("eieio; tlbsync; ptesync": : :"memory");
                } else {
                        _tlbiel_va_range_multicast(mm,
index a8953f1..73d4873 100644 (file)
@@ -667,7 +667,7 @@ void flush_dcache_icache_hugepage(struct page *page)
 
        BUG_ON(!PageCompound(page));
 
-       for (i = 0; i < (1UL << compound_order(page)); i++) {
+       for (i = 0; i < compound_nr(page); i++) {
                if (!PageHighMem(page)) {
                        __flush_dcache_icache(page_address(page+i));
                } else {
index a44f628..4e08246 100644 (file)
@@ -172,6 +172,21 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
        vmemmap_list = vmem_back;
 }
 
+static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
+                               unsigned long page_size)
+{
+       unsigned long nr_pfn = page_size / sizeof(struct page);
+       unsigned long start_pfn = page_to_pfn((struct page *)start);
+
+       if ((start_pfn + nr_pfn) > altmap->end_pfn)
+               return true;
+
+       if (start_pfn < altmap->base_pfn)
+               return true;
+
+       return false;
+}
+
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
                struct vmem_altmap *altmap)
 {
@@ -194,7 +209,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
                 * fail due to alignment issues when using 16MB hugepages, so
                 * fall back to system memory if the altmap allocation fail.
                 */
-               if (altmap) {
+               if (altmap && !altmap_cross_boundary(altmap, start, page_size)) {
                        p = altmap_alloc_block_buf(page_size, altmap);
                        if (!p)
                                pr_debug("altmap block allocation failed, falling back to system memory");
index 802387b..0e6ed44 100644 (file)
 #include <asm/code-patching.h>
 #include <mm/mmu_decl.h>
 
+static pgprot_t kasan_prot_ro(void)
+{
+       if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+               return PAGE_READONLY;
+
+       return PAGE_KERNEL_RO;
+}
+
 static void kasan_populate_pte(pte_t *ptep, pgprot_t prot)
 {
        unsigned long va = (unsigned long)kasan_early_shadow_page;
@@ -26,6 +34,7 @@ static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned l
 {
        pmd_t *pmd;
        unsigned long k_cur, k_next;
+       pgprot_t prot = slab_is_available() ? kasan_prot_ro() : PAGE_KERNEL;
 
        pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start);
 
@@ -43,10 +52,7 @@ static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned l
 
                if (!new)
                        return -ENOMEM;
-               if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
-                       kasan_populate_pte(new, PAGE_READONLY);
-               else
-                       kasan_populate_pte(new, PAGE_KERNEL_RO);
+               kasan_populate_pte(new, prot);
 
                smp_wmb(); /* See comment in __pte_alloc */
 
@@ -103,11 +109,23 @@ static int __ref kasan_init_region(void *start, size_t size)
 
 static void __init kasan_remap_early_shadow_ro(void)
 {
-       if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
-               kasan_populate_pte(kasan_early_shadow_pte, PAGE_READONLY);
-       else
-               kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL_RO);
+       pgprot_t prot = kasan_prot_ro();
+       unsigned long k_start = KASAN_SHADOW_START;
+       unsigned long k_end = KASAN_SHADOW_END;
+       unsigned long k_cur;
+       phys_addr_t pa = __pa(kasan_early_shadow_page);
+
+       kasan_populate_pte(kasan_early_shadow_pte, prot);
+
+       for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
+               pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
+               pte_t *ptep = pte_offset_kernel(pmd, k_cur);
+
+               if ((pte_val(*ptep) & PTE_RPN_MASK) != pa)
+                       continue;
 
+               __set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
+       }
        flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
 }
 
index a7b0521..ee4bd6d 100644 (file)
@@ -25,7 +25,7 @@ void pte_frag_destroy(void *pte_frag)
        count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
        /* We allow PTE_FRAG_NR fragments from a PTE page */
        if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
-               pgtable_page_dtor(page);
+               pgtable_pte_page_dtor(page);
                __free_page(page);
        }
 }
@@ -61,7 +61,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
                page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
                if (!page)
                        return NULL;
-               if (!pgtable_page_ctor(page)) {
+               if (!pgtable_pte_page_ctor(page)) {
                        __free_page(page);
                        return NULL;
                }
@@ -113,7 +113,7 @@ void pte_fragment_free(unsigned long *table, int kernel)
        BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
        if (atomic_dec_and_test(&page->pt_frag_refcount)) {
                if (!kernel)
-                       pgtable_page_dtor(page);
+                       pgtable_pte_page_dtor(page);
                __free_page(page);
        }
 }
index 065ff14..2dd452a 100644 (file)
@@ -10,6 +10,8 @@
 
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
 #include <linux/fsnotify.h>
 #include <linux/backing-dev.h>
 #include <linux/init.h>
@@ -20,7 +22,6 @@
 #include <linux/pagemap.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
-#include <linux/parser.h>
 
 #include <asm/prom.h>
 #include <asm/spu.h>
@@ -30,7 +31,7 @@
 #include "spufs.h"
 
 struct spufs_sb_info {
-       int debug;
+       bool debug;
 };
 
 static struct kmem_cache *spufs_inode_cache;
@@ -574,16 +575,27 @@ long spufs_create(struct path *path, struct dentry *dentry,
 }
 
 /* File system initialization */
+struct spufs_fs_context {
+       kuid_t  uid;
+       kgid_t  gid;
+       umode_t mode;
+};
+
 enum {
-       Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
+       Opt_uid, Opt_gid, Opt_mode, Opt_debug,
+};
+
+static const struct fs_parameter_spec spufs_param_specs[] = {
+       fsparam_u32     ("gid",                         Opt_gid),
+       fsparam_u32oct  ("mode",                        Opt_mode),
+       fsparam_u32     ("uid",                         Opt_uid),
+       fsparam_flag    ("debug",                       Opt_debug),
+       {}
 };
 
-static const match_table_t spufs_tokens = {
-       { Opt_uid,   "uid=%d" },
-       { Opt_gid,   "gid=%d" },
-       { Opt_mode,  "mode=%o" },
-       { Opt_debug, "debug" },
-       { Opt_err,    NULL  },
+static const struct fs_parameter_description spufs_fs_parameters = {
+       .name           = "spufs",
+       .specs          = spufs_param_specs,
 };
 
 static int spufs_show_options(struct seq_file *m, struct dentry *root)
@@ -604,47 +616,41 @@ static int spufs_show_options(struct seq_file *m, struct dentry *root)
        return 0;
 }
 
-static int
-spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
-{
-       char *p;
-       substring_t args[MAX_OPT_ARGS];
-
-       while ((p = strsep(&options, ",")) != NULL) {
-               int token, option;
-
-               if (!*p)
-                       continue;
-
-               token = match_token(p, spufs_tokens, args);
-               switch (token) {
-               case Opt_uid:
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       root->i_uid = make_kuid(current_user_ns(), option);
-                       if (!uid_valid(root->i_uid))
-                               return 0;
-                       break;
-               case Opt_gid:
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       root->i_gid = make_kgid(current_user_ns(), option);
-                       if (!gid_valid(root->i_gid))
-                               return 0;
-                       break;
-               case Opt_mode:
-                       if (match_octal(&args[0], &option))
-                               return 0;
-                       root->i_mode = option | S_IFDIR;
-                       break;
-               case Opt_debug:
-                       spufs_get_sb_info(sb)->debug = 1;
-                       break;
-               default:
-                       return 0;
-               }
+static int spufs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+       struct spufs_fs_context *ctx = fc->fs_private;
+       struct spufs_sb_info *sbi = fc->s_fs_info;
+       struct fs_parse_result result;
+       kuid_t uid;
+       kgid_t gid;
+       int opt;
+
+       opt = fs_parse(fc, &spufs_fs_parameters, param, &result);
+       if (opt < 0)
+               return opt;
+
+       switch (opt) {
+       case Opt_uid:
+               uid = make_kuid(current_user_ns(), result.uint_32);
+               if (!uid_valid(uid))
+                       return invalf(fc, "Unknown uid");
+               ctx->uid = uid;
+               break;
+       case Opt_gid:
+               gid = make_kgid(current_user_ns(), result.uint_32);
+               if (!gid_valid(gid))
+                       return invalf(fc, "Unknown gid");
+               ctx->gid = gid;
+               break;
+       case Opt_mode:
+               ctx->mode = result.uint_32 & S_IALLUGO;
+               break;
+       case Opt_debug:
+               sbi->debug = true;
+               break;
        }
-       return 1;
+
+       return 0;
 }
 
 static void spufs_exit_isolated_loader(void)
@@ -678,79 +684,99 @@ spufs_init_isolated_loader(void)
        printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
 }
 
-static int
-spufs_create_root(struct super_block *sb, void *data)
+static int spufs_create_root(struct super_block *sb, struct fs_context *fc)
 {
+       struct spufs_fs_context *ctx = fc->fs_private;
        struct inode *inode;
-       int ret;
 
-       ret = -ENODEV;
        if (!spu_management_ops)
-               goto out;
+               return -ENODEV;
 
-       ret = -ENOMEM;
-       inode = spufs_new_inode(sb, S_IFDIR | 0775);
+       inode = spufs_new_inode(sb, S_IFDIR | ctx->mode);
        if (!inode)
-               goto out;
+               return -ENOMEM;
 
+       inode->i_uid = ctx->uid;
+       inode->i_gid = ctx->gid;
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
        SPUFS_I(inode)->i_ctx = NULL;
        inc_nlink(inode);
 
-       ret = -EINVAL;
-       if (!spufs_parse_options(sb, data, inode))
-               goto out_iput;
-
-       ret = -ENOMEM;
        sb->s_root = d_make_root(inode);
        if (!sb->s_root)
-               goto out;
-
+               return -ENOMEM;
        return 0;
-out_iput:
-       iput(inode);
-out:
-       return ret;
 }
 
-static int
-spufs_fill_super(struct super_block *sb, void *data, int silent)
-{
-       struct spufs_sb_info *info;
-       static const struct super_operations s_ops = {
-               .alloc_inode = spufs_alloc_inode,
-               .free_inode = spufs_free_inode,
-               .statfs = simple_statfs,
-               .evict_inode = spufs_evict_inode,
-               .show_options = spufs_show_options,
-       };
-
-       info = kzalloc(sizeof(*info), GFP_KERNEL);
-       if (!info)
-               return -ENOMEM;
+static const struct super_operations spufs_ops = {
+       .alloc_inode    = spufs_alloc_inode,
+       .free_inode     = spufs_free_inode,
+       .statfs         = simple_statfs,
+       .evict_inode    = spufs_evict_inode,
+       .show_options   = spufs_show_options,
+};
 
+static int spufs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        sb->s_blocksize = PAGE_SIZE;
        sb->s_blocksize_bits = PAGE_SHIFT;
        sb->s_magic = SPUFS_MAGIC;
-       sb->s_op = &s_ops;
-       sb->s_fs_info = info;
+       sb->s_op = &spufs_ops;
 
-       return spufs_create_root(sb, data);
+       return spufs_create_root(sb, fc);
+}
+
+static int spufs_get_tree(struct fs_context *fc)
+{
+       return get_tree_single(fc, spufs_fill_super);
 }
 
-static struct dentry *
-spufs_mount(struct file_system_type *fstype, int flags,
-               const char *name, void *data)
+static void spufs_free_fc(struct fs_context *fc)
 {
-       return mount_single(fstype, flags, data, spufs_fill_super);
+       kfree(fc->s_fs_info);
+}
+
+static const struct fs_context_operations spufs_context_ops = {
+       .free           = spufs_free_fc,
+       .parse_param    = spufs_parse_param,
+       .get_tree       = spufs_get_tree,
+};
+
+static int spufs_init_fs_context(struct fs_context *fc)
+{
+       struct spufs_fs_context *ctx;
+       struct spufs_sb_info *sbi;
+
+       ctx = kzalloc(sizeof(struct spufs_fs_context), GFP_KERNEL);
+       if (!ctx)
+               goto nomem;
+
+       sbi = kzalloc(sizeof(struct spufs_sb_info), GFP_KERNEL);
+       if (!sbi)
+               goto nomem_ctx;
+
+       ctx->uid = current_uid();
+       ctx->gid = current_gid();
+       ctx->mode = 0755;
+
+       fc->fs_private = ctx;
+       fc->s_fs_info = sbi;
+       fc->ops = &spufs_context_ops;
+       return 0;
+
+nomem_ctx:
+       kfree(ctx);
+nomem:
+       return -ENOMEM;
 }
 
 static struct file_system_type spufs_type = {
        .owner = THIS_MODULE,
        .name = "spufs",
-       .mount = spufs_mount,
+       .init_fs_context = spufs_init_fs_context,
+       .parameters     = &spufs_fs_parameters,
        .kill_sb = kill_litter_super,
 };
 MODULE_ALIAS_FS("spufs");
index 6bc24a4..6f300ab 100644 (file)
@@ -42,7 +42,7 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
 {
        struct pci_dn *pdn = pci_get_pdn(pdev);
 
-       if (eeh_has_flag(EEH_FORCE_DISABLED))
+       if (!pdn || eeh_has_flag(EEH_FORCE_DISABLED))
                return;
 
        dev_dbg(&pdev->dev, "EEH: Setting up device\n");
index 94cd96b..13e2516 100644 (file)
@@ -146,20 +146,25 @@ static int pnv_smp_cpu_disable(void)
        return 0;
 }
 
+static void pnv_flush_interrupts(void)
+{
+       if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+               if (xive_enabled())
+                       xive_flush_interrupt();
+               else
+                       icp_opal_flush_interrupt();
+       } else {
+               icp_native_flush_interrupt();
+       }
+}
+
 static void pnv_smp_cpu_kill_self(void)
 {
+       unsigned long srr1, unexpected_mask, wmask;
        unsigned int cpu;
-       unsigned long srr1, wmask;
        u64 lpcr_val;
 
        /* Standard hot unplug procedure */
-       /*
-        * This hard disables local interurpts, ensuring we have no lazy
-        * irqs pending.
-        */
-       WARN_ON(irqs_disabled());
-       hard_irq_disable();
-       WARN_ON(lazy_irq_pending());
 
        idle_task_exit();
        current->active_mm = NULL; /* for sanity */
@@ -172,6 +177,27 @@ static void pnv_smp_cpu_kill_self(void)
        if (cpu_has_feature(CPU_FTR_ARCH_207S))
                wmask = SRR1_WAKEMASK_P8;
 
+       /*
+        * This turns the irq soft-disabled state we're called with, into a
+        * hard-disabled state with pending irq_happened interrupts cleared.
+        *
+        * PACA_IRQ_DEC   - Decrementer should be ignored.
+        * PACA_IRQ_HMI   - Can be ignored, processing is done in real mode.
+        * PACA_IRQ_DBELL, EE, PMI - Unexpected.
+        */
+       hard_irq_disable();
+       if (generic_check_cpu_restart(cpu))
+               goto out;
+
+       unexpected_mask = ~(PACA_IRQ_DEC | PACA_IRQ_HMI | PACA_IRQ_HARD_DIS);
+       if (local_paca->irq_happened & unexpected_mask) {
+               if (local_paca->irq_happened & PACA_IRQ_EE)
+                       pnv_flush_interrupts();
+               DBG("CPU%d Unexpected exit while offline irq_happened=%lx!\n",
+                               cpu, local_paca->irq_happened);
+       }
+       local_paca->irq_happened = PACA_IRQ_HARD_DIS;
+
        /*
         * We don't want to take decrementer interrupts while we are
         * offline, so clear LPCR:PECE1. We keep PECE2 (and
@@ -193,10 +219,11 @@ static void pnv_smp_cpu_kill_self(void)
                 * for coming online, which are handled via
                 * generic_check_cpu_restart() calls.
                 */
-               kvmppc_set_host_ipi(cpu, 0);
+               kvmppc_clear_host_ipi(cpu);
 
                srr1 = pnv_cpu_offline(cpu);
 
+               WARN_ON_ONCE(!irqs_disabled());
                WARN_ON(lazy_irq_pending());
 
                /*
@@ -212,13 +239,7 @@ static void pnv_smp_cpu_kill_self(void)
                 */
                if (((srr1 & wmask) == SRR1_WAKEEE) ||
                    ((srr1 & wmask) == SRR1_WAKEHVI)) {
-                       if (cpu_has_feature(CPU_FTR_ARCH_300)) {
-                               if (xive_enabled())
-                                       xive_flush_interrupt();
-                               else
-                                       icp_opal_flush_interrupt();
-                       } else
-                               icp_native_flush_interrupt();
+                       pnv_flush_interrupts();
                } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
                        unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
                        asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
@@ -266,7 +287,7 @@ static void pnv_smp_cpu_kill_self(void)
         */
        lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
        pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
-
+out:
        DBG("CPU%d coming online...\n", cpu);
 }
 
index 36b846f..f87a5c6 100644 (file)
@@ -56,6 +56,22 @@ EXPORT_SYMBOL(plpar_hcall);
 EXPORT_SYMBOL(plpar_hcall9);
 EXPORT_SYMBOL(plpar_hcall_norets);
 
+/*
+ * H_BLOCK_REMOVE supported block size for this page size in segment who's base
+ * page size is that page size.
+ *
+ * The first index is the segment base page size, the second one is the actual
+ * page size.
+ */
+static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init;
+
+/*
+ * Due to the involved complexity, and that the current hypervisor is only
+ * returning this value or 0, we are limiting the support of the H_BLOCK_REMOVE
+ * buffer size to 8 size block.
+ */
+#define HBLKRM_SUPPORTED_BLOCK_SIZE 8
+
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 static u8 dtl_mask = DTL_LOG_PREEMPT;
 #else
@@ -984,6 +1000,17 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
 #define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL
 #define HBLKR_CTRL_ERRBUSY     0xa000000000000000UL
 
+/*
+ * Returned true if we are supporting this block size for the specified segment
+ * base page size and actual page size.
+ *
+ * Currently, we only support 8 size block.
+ */
+static inline bool is_supported_hlbkrm(int bpsize, int psize)
+{
+       return (hblkrm_size[bpsize][psize] == HBLKRM_SUPPORTED_BLOCK_SIZE);
+}
+
 /**
  * H_BLOCK_REMOVE caller.
  * @idx should point to the latest @param entry set with a PTEX.
@@ -1143,7 +1170,8 @@ static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
        if (lock_tlbie)
                spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
 
-       if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
+       /* Assuming THP size is 16M */
+       if (is_supported_hlbkrm(psize, MMU_PAGE_16M))
                hugepage_block_invalidate(slot, vpn, count, psize, ssize);
        else
                hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
@@ -1311,6 +1339,140 @@ static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
                (void)call_block_remove(pix, param, true);
 }
 
+/*
+ * TLB Block Invalidate Characteristics
+ *
+ * These characteristics define the size of the block the hcall H_BLOCK_REMOVE
+ * is able to process for each couple segment base page size, actual page size.
+ *
+ * The ibm,get-system-parameter properties is returning a buffer with the
+ * following layout:
+ *
+ * [ 2 bytes size of the RTAS buffer (excluding these 2 bytes) ]
+ * -----------------
+ * TLB Block Invalidate Specifiers:
+ * [ 1 byte LOG base 2 of the TLB invalidate block size being specified ]
+ * [ 1 byte Number of page sizes (N) that are supported for the specified
+ *          TLB invalidate block size ]
+ * [ 1 byte Encoded segment base page size and actual page size
+ *          MSB=0 means 4k segment base page size and actual page size
+ *          MSB=1 the penc value in mmu_psize_def ]
+ * ...
+ * -----------------
+ * Next TLB Block Invalidate Specifiers...
+ * -----------------
+ * [ 0 ]
+ */
+static inline void set_hblkrm_bloc_size(int bpsize, int psize,
+                                       unsigned int block_size)
+{
+       if (block_size > hblkrm_size[bpsize][psize])
+               hblkrm_size[bpsize][psize] = block_size;
+}
+
+/*
+ * Decode the Encoded segment base page size and actual page size.
+ * PAPR specifies:
+ *   - bit 7 is the L bit
+ *   - bits 0-5 are the penc value
+ * If the L bit is 0, this means 4K segment base page size and actual page size
+ * otherwise the penc value should be read.
+ */
+#define HBLKRM_L_MASK          0x80
+#define HBLKRM_PENC_MASK       0x3f
+static inline void __init check_lp_set_hblkrm(unsigned int lp,
+                                             unsigned int block_size)
+{
+       unsigned int bpsize, psize;
+
+       /* First, check the L bit, if not set, this means 4K */
+       if ((lp & HBLKRM_L_MASK) == 0) {
+               set_hblkrm_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
+               return;
+       }
+
+       lp &= HBLKRM_PENC_MASK;
+       for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
+               struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
+
+               for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+                       if (def->penc[psize] == lp) {
+                               set_hblkrm_bloc_size(bpsize, psize, block_size);
+                               return;
+                       }
+               }
+       }
+}
+
+#define SPLPAR_TLB_BIC_TOKEN           50
+
+/*
+ * The size of the TLB Block Invalidate Characteristics is variable. But at the
+ * maximum it will be the number of possible page sizes *2 + 10 bytes.
+ * Currently MMU_PAGE_COUNT is 16, which means 42 bytes. Use a cache line size
+ * (128 bytes) for the buffer to get plenty of space.
+ */
+#define SPLPAR_TLB_BIC_MAXLENGTH       128
+
+void __init pseries_lpar_read_hblkrm_characteristics(void)
+{
+       unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
+       int call_status, len, idx, bpsize;
+
+       if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
+               return;
+
+       spin_lock(&rtas_data_buf_lock);
+       memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
+       call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+                               NULL,
+                               SPLPAR_TLB_BIC_TOKEN,
+                               __pa(rtas_data_buf),
+                               RTAS_DATA_BUF_SIZE);
+       memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH);
+       local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0';
+       spin_unlock(&rtas_data_buf_lock);
+
+       if (call_status != 0) {
+               pr_warn("%s %s Error calling get-system-parameter (0x%x)\n",
+                       __FILE__, __func__, call_status);
+               return;
+       }
+
+       /*
+        * The first two (2) bytes of the data in the buffer are the length of
+        * the returned data, not counting these first two (2) bytes.
+        */
+       len = be16_to_cpu(*((u16 *)local_buffer)) + 2;
+       if (len > SPLPAR_TLB_BIC_MAXLENGTH) {
+               pr_warn("%s too large returned buffer %d", __func__, len);
+               return;
+       }
+
+       idx = 2;
+       while (idx < len) {
+               u8 block_shift = local_buffer[idx++];
+               u32 block_size;
+               unsigned int npsize;
+
+               if (!block_shift)
+                       break;
+
+               block_size = 1 << block_shift;
+
+               for (npsize = local_buffer[idx++];
+                    npsize > 0 && idx < len; npsize--)
+                       check_lp_set_hblkrm((unsigned int) local_buffer[idx++],
+                                           block_size);
+       }
+
+       for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
+               for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
+                       if (hblkrm_size[bpsize][idx])
+                               pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d",
+                                       bpsize, idx, hblkrm_size[bpsize][idx]);
+}
+
 /*
  * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
  * lock.
@@ -1330,7 +1492,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
        if (lock_tlbie)
                spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
 
-       if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
+       if (is_supported_hlbkrm(batch->psize, batch->psize)) {
                do_block_remove(number, batch, param);
                goto out;
        }
index a5ac371..6188329 100644 (file)
@@ -65,29 +65,21 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
                cond_resched();
        } while (rc == H_BUSY);
 
-       if (rc) {
-               /* H_OVERLAP needs a separate error path */
-               if (rc == H_OVERLAP)
-                       return -EBUSY;
-
-               dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
-               return -ENXIO;
-       }
+       if (rc)
+               return rc;
 
        p->bound_addr = saved;
-
-       dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res);
-
-       return 0;
+       dev_dbg(&p->pdev->dev, "bound drc 0x%x to %pR\n", p->drc_index, &p->res);
+       return rc;
 }
 
-static int drc_pmem_unbind(struct papr_scm_priv *p)
+static void drc_pmem_unbind(struct papr_scm_priv *p)
 {
        unsigned long ret[PLPAR_HCALL_BUFSIZE];
        uint64_t token = 0;
        int64_t rc;
 
-       dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index);
+       dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n", p->drc_index);
 
        /* NB: unbind has the same retry requirements as drc_pmem_bind() */
        do {
@@ -110,12 +102,48 @@ static int drc_pmem_unbind(struct papr_scm_priv *p)
        if (rc)
                dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
        else
-               dev_dbg(&p->pdev->dev, "unbind drc %x complete\n",
+               dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n",
                        p->drc_index);
 
-       return rc == H_SUCCESS ? 0 : -ENXIO;
+       return;
 }
 
+static int drc_pmem_query_n_bind(struct papr_scm_priv *p)
+{
+       unsigned long start_addr;
+       unsigned long end_addr;
+       unsigned long ret[PLPAR_HCALL_BUFSIZE];
+       int64_t rc;
+
+
+       rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
+                        p->drc_index, 0);
+       if (rc)
+               goto err_out;
+       start_addr = ret[0];
+
+       /* Make sure the full region is bound. */
+       rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
+                        p->drc_index, p->blocks - 1);
+       if (rc)
+               goto err_out;
+       end_addr = ret[0];
+
+       if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size))
+               goto err_out;
+
+       p->bound_addr = start_addr;
+       dev_dbg(&p->pdev->dev, "bound drc 0x%x to %pR\n", p->drc_index, &p->res);
+       return rc;
+
+err_out:
+       dev_info(&p->pdev->dev,
+                "Failed to query, trying an unbind followed by bind");
+       drc_pmem_unbind(p);
+       return drc_pmem_bind(p);
+}
+
+
 static int papr_scm_meta_get(struct papr_scm_priv *p,
                             struct nd_cmd_get_config_data_hdr *hdr)
 {
@@ -436,14 +464,14 @@ static int papr_scm_probe(struct platform_device *pdev)
        rc = drc_pmem_bind(p);
 
        /* If phyp says drc memory still bound then force unbound and retry */
-       if (rc == -EBUSY) {
-               dev_warn(&pdev->dev, "Retrying bind after unbinding\n");
-               drc_pmem_unbind(p);
-               rc = drc_pmem_bind(p);
-       }
+       if (rc == H_OVERLAP)
+               rc = drc_pmem_query_n_bind(p);
 
-       if (rc)
+       if (rc != H_SUCCESS) {
+               dev_err(&p->pdev->dev, "bind err: %d\n", rc);
+               rc = -ENXIO;
                goto err;
+       }
 
        /* setup the resource for the newly bound range */
        p->res.start = p->bound_addr;
index a6624d4..13fa370 100644 (file)
@@ -112,5 +112,6 @@ static inline unsigned long cmo_get_page_size(void)
 int dlpar_workqueue_init(void);
 
 void pseries_setup_rfi_flush(void);
+void pseries_lpar_read_hblkrm_characteristics(void);
 
 #endif /* _PSERIES_PSERIES_H */
index f8adcd0..0a40201 100644 (file)
@@ -744,6 +744,7 @@ static void __init pSeries_setup_arch(void)
 
        pseries_setup_rfi_flush();
        setup_stf_barrier();
+       pseries_lpar_read_hblkrm_characteristics();
 
        /* By default, only probe PCI (can be overridden by rtas_pci) */
        pci_add_flags(PCI_PROBE_ONLY);
index 485569f..7d13d2e 100644 (file)
@@ -140,7 +140,7 @@ static unsigned int icp_native_get_irq(void)
 
 static void icp_native_cause_ipi(int cpu)
 {
-       kvmppc_set_host_ipi(cpu, 1);
+       kvmppc_set_host_ipi(cpu);
        icp_native_set_qirr(cpu, IPI_PRIORITY);
 }
 
@@ -179,7 +179,7 @@ void icp_native_flush_interrupt(void)
        if (vec == XICS_IPI) {
                /* Clear pending IPI */
                int cpu = smp_processor_id();
-               kvmppc_set_host_ipi(cpu, 0);
+               kvmppc_clear_host_ipi(cpu);
                icp_native_set_qirr(cpu, 0xff);
        } else {
                pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n",
@@ -200,7 +200,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
 {
        int cpu = smp_processor_id();
 
-       kvmppc_set_host_ipi(cpu, 0);
+       kvmppc_clear_host_ipi(cpu);
        icp_native_set_qirr(cpu, 0xff);
 
        return smp_ipi_demux();
index 8bb8dd7..68fd254 100644 (file)
@@ -126,7 +126,7 @@ static void icp_opal_cause_ipi(int cpu)
 {
        int hw_cpu = get_hard_smp_processor_id(cpu);
 
-       kvmppc_set_host_ipi(cpu, 1);
+       kvmppc_set_host_ipi(cpu);
        opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
 }
 
@@ -134,7 +134,7 @@ static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
 {
        int cpu = smp_processor_id();
 
-       kvmppc_set_host_ipi(cpu, 0);
+       kvmppc_clear_host_ipi(cpu);
        opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
 
        return smp_ipi_demux();
@@ -157,7 +157,7 @@ void icp_opal_flush_interrupt(void)
                if (vec == XICS_IPI) {
                        /* Clear pending IPI */
                        int cpu = smp_processor_id();
-                       kvmppc_set_host_ipi(cpu, 0);
+                       kvmppc_clear_host_ipi(cpu);
                        opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
                } else {
                        pr_err("XICS: hw interrupt 0x%x to offline cpu, "
index 71d29fb..8eebbc8 100644 (file)
@@ -59,6 +59,18 @@ config RISCV
        select ARCH_HAS_GIGANTIC_PAGE
        select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
        select SPARSEMEM_STATIC if 32BIT
+       select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
+       select HAVE_ARCH_MMAP_RND_BITS
+
+config ARCH_MMAP_RND_BITS_MIN
+       default 18 if 64BIT
+       default 8
+
+# max bits determined by the following formula:
+#  VA_BITS - PAGE_SHIFT - 3
+config ARCH_MMAP_RND_BITS_MAX
+       default 24 if 64BIT # SV39 based
+       default 17
 
 config MMU
        def_bool y
index 42b5ec2..afa43c7 100644 (file)
@@ -13,6 +13,7 @@
        aliases {
                serial0 = &uart0;
                serial1 = &uart1;
+               ethernet0 = &eth0;
        };
 
        chosen {
@@ -60,7 +61,6 @@
                        };
                };
                cpu2: cpu@2 {
-                       clock-frequency = <0>;
                        compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
                        d-cache-block-size = <64>;
                        d-cache-sets = <64>;
@@ -84,7 +84,6 @@
                        };
                };
                cpu3: cpu@3 {
-                       clock-frequency = <0>;
                        compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
                        d-cache-block-size = <64>;
                        d-cache-sets = <64>;
                        };
                };
                cpu4: cpu@4 {
-                       clock-frequency = <0>;
                        compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
                        d-cache-block-size = <64>;
                        d-cache-sets = <64>;
                        #size-cells = <0>;
                        status = "disabled";
                };
+               pwm0: pwm@10020000 {
+                       compatible = "sifive,fu540-c000-pwm", "sifive,pwm0";
+                       reg = <0x0 0x10020000 0x0 0x1000>;
+                       interrupt-parent = <&plic0>;
+                       interrupts = <42 43 44 45>;
+                       clocks = <&prci PRCI_CLK_TLCLK>;
+                       #pwm-cells = <3>;
+                       status = "disabled";
+               };
+               pwm1: pwm@10021000 {
+                       compatible = "sifive,fu540-c000-pwm", "sifive,pwm0";
+                       reg = <0x0 0x10021000 0x0 0x1000>;
+                       interrupt-parent = <&plic0>;
+                       interrupts = <46 47 48 49>;
+                       clocks = <&prci PRCI_CLK_TLCLK>;
+                       #pwm-cells = <3>;
+                       status = "disabled";
+               };
 
        };
 };
index 93d68cb..88cfcb9 100644 (file)
@@ -13,6 +13,7 @@
        compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000";
 
        chosen {
+               stdout-path = "serial0";
        };
 
        cpus {
                reg = <0>;
        };
 };
+
+&pwm0 {
+       status = "okay";
+};
+
+&pwm1 {
+       status = "okay";
+};
index 3efff55..420a0db 100644 (file)
@@ -29,6 +29,8 @@ CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NETLINK_DIAG=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
 CONFIG_PCI=y
 CONFIG_PCIEPORTBUS=y
 CONFIG_PCI_HOST_GENERIC=y
@@ -39,6 +41,7 @@ CONFIG_BLK_DEV_LOOP=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=y
+CONFIG_SCSI_VIRTIO=y
 CONFIG_ATA=y
 CONFIG_SATA_AHCI=y
 CONFIG_SATA_AHCI_PLATFORM=y
@@ -54,6 +57,7 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
 CONFIG_HVC_RISCV_SBI=y
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM=y
 CONFIG_HW_RANDOM_VIRTIO=y
 CONFIG_SPI=y
@@ -61,6 +65,7 @@ CONFIG_SPI_SIFIVE=y
 # CONFIG_PTP_1588_CLOCK is not set
 CONFIG_DRM=y
 CONFIG_DRM_RADEON=y
+CONFIG_DRM_VIRTIO_GPU=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_USB=y
 CONFIG_USB_XHCI_HCD=y
@@ -73,7 +78,12 @@ CONFIG_USB_STORAGE=y
 CONFIG_USB_UAS=y
 CONFIG_MMC=y
 CONFIG_MMC_SPI=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_INPUT=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_RPMSG_CHAR=y
+CONFIG_RPMSG_VIRTIO=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_AUTOFS4_FS=y
@@ -86,6 +96,7 @@ CONFIG_NFS_V4=y
 CONFIG_NFS_V4_1=y
 CONFIG_NFS_V4_2=y
 CONFIG_ROOT_NFS=y
+CONFIG_9P_FS=y
 CONFIG_CRYPTO_USER_API_HASH=y
 CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_PRINTK_TIME=y
index 7da93e4..87ee6e6 100644 (file)
@@ -29,6 +29,8 @@ CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NETLINK_DIAG=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
 CONFIG_PCI=y
 CONFIG_PCIEPORTBUS=y
 CONFIG_PCI_HOST_GENERIC=y
@@ -39,6 +41,7 @@ CONFIG_BLK_DEV_LOOP=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=y
+CONFIG_SCSI_VIRTIO=y
 CONFIG_ATA=y
 CONFIG_SATA_AHCI=y
 CONFIG_SATA_AHCI_PLATFORM=y
@@ -54,11 +57,13 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
 CONFIG_HVC_RISCV_SBI=y
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM=y
 CONFIG_HW_RANDOM_VIRTIO=y
 # CONFIG_PTP_1588_CLOCK is not set
 CONFIG_DRM=y
 CONFIG_DRM_RADEON=y
+CONFIG_DRM_VIRTIO_GPU=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_USB=y
 CONFIG_USB_XHCI_HCD=y
@@ -69,7 +74,12 @@ CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=y
 CONFIG_USB_UAS=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_INPUT=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_RPMSG_CHAR=y
+CONFIG_RPMSG_VIRTIO=y
 CONFIG_SIFIVE_PLIC=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -83,6 +93,7 @@ CONFIG_NFS_V4=y
 CONFIG_NFS_V4_1=y
 CONFIG_NFS_V4_2=y
 CONFIG_ROOT_NFS=y
+CONFIG_9P_FS=y
 CONFIG_CRYPTO_USER_API_HASH=y
 CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_PRINTK_TIME=y
index 5a02b7d..9c992a8 100644 (file)
@@ -22,6 +22,7 @@
 
 #define REG_L          __REG_SEL(ld, lw)
 #define REG_S          __REG_SEL(sd, sw)
+#define REG_SC         __REG_SEL(sc.d, sc.w)
 #define SZREG          __REG_SEL(8, 4)
 #define LGREG          __REG_SEL(3, 2)
 
index 07ceee8..75604fe 100644 (file)
@@ -12,7 +12,6 @@
 
 #include <asm/asm.h>
 
-#ifdef CONFIG_GENERIC_BUG
 #define __INSN_LENGTH_MASK  _UL(0x3)
 #define __INSN_LENGTH_32    _UL(0x3)
 #define __COMPRESSED_INSN_MASK _UL(0xffff)
@@ -20,7 +19,6 @@
 #define __BUG_INSN_32  _UL(0x00100073) /* ebreak */
 #define __BUG_INSN_16  _UL(0x9002) /* c.ebreak */
 
-#ifndef __ASSEMBLY__
 typedef u32 bug_insn_t;
 
 #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
@@ -43,6 +41,7 @@ typedef u32 bug_insn_t;
        RISCV_SHORT " %2"
 #endif
 
+#ifdef CONFIG_GENERIC_BUG
 #define __BUG_FLAGS(flags)                                     \
 do {                                                           \
        __asm__ __volatile__ (                                  \
@@ -58,14 +57,10 @@ do {                                                                \
                  "i" (flags),                                  \
                  "i" (sizeof(struct bug_entry)));              \
 } while (0)
-
-#endif /* !__ASSEMBLY__ */
 #else /* CONFIG_GENERIC_BUG */
-#ifndef __ASSEMBLY__
 #define __BUG_FLAGS(flags) do {                                        \
        __asm__ __volatile__ ("ebreak\n");                      \
 } while (0)
-#endif /* !__ASSEMBLY__ */
 #endif /* CONFIG_GENERIC_BUG */
 
 #define BUG() do {                                             \
@@ -79,15 +74,10 @@ do {                                                                \
 
 #include <asm-generic/bug.h>
 
-#ifndef __ASSEMBLY__
-
 struct pt_regs;
 struct task_struct;
 
-extern void die(struct pt_regs *regs, const char *str);
-extern void do_trap(struct pt_regs *regs, int signo, int code,
-       unsigned long addr);
-
-#endif /* !__ASSEMBLY__ */
+void die(struct pt_regs *regs, const char *str);
+void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr);
 
 #endif /* _ASM_RISCV_BUG_H */
index fc1189a..3ba4d93 100644 (file)
@@ -13,6 +13,7 @@
 
 #include <linux/types.h>
 #include <asm/mmiowb.h>
+#include <asm/pgtable.h>
 
 extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
 
@@ -161,6 +162,12 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
 #define writeq(v,c)    ({ __io_bw(); writeq_cpu((v),(c)); __io_aw(); })
 #endif
 
+/*
+ *  I/O port access constants.
+ */
+#define IO_SPACE_LIMIT         (PCI_IO_SIZE - 1)
+#define PCI_IOBASE             ((void __iomem *)PCI_IO_START)
+
 /*
  * Emulation routines for the port-mapped IO space used by some PCI drivers.
  * These are defined as being "fully synchronous", but also "not guaranteed to
index 7557642..6e1b0e0 100644 (file)
@@ -7,6 +7,9 @@
 #ifndef _ASM_RISCV_IRQ_H
 #define _ASM_RISCV_IRQ_H
 
+#include <linux/interrupt.h>
+#include <linux/linkage.h>
+
 #define NR_IRQS         0
 
 void riscv_timer_interrupt(void);
index 56a67d6..d59ea92 100644 (file)
@@ -78,12 +78,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #define __pte_free_tlb(tlb, pte, buf)   \
 do {                                    \
-       pgtable_page_dtor(pte);         \
+       pgtable_pte_page_dtor(pte);     \
        tlb_remove_page((tlb), pte);    \
 } while (0)
 
-static inline void check_pgt_cache(void)
-{
-}
-
 #endif /* _ASM_RISCV_PGALLOC_H */
index 80905b2..d322101 100644 (file)
@@ -7,6 +7,7 @@
 #define _ASM_RISCV_PGTABLE_H
 
 #include <linux/mmzone.h>
+#include <linux/sizes.h>
 
 #include <asm/pgtable-bits.h>
 
@@ -83,6 +84,11 @@ extern pgd_t swapper_pg_dir[];
 #define __S110 PAGE_SHARED_EXEC
 #define __S111 PAGE_SHARED_EXEC
 
+#define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
+#define VMALLOC_END      (PAGE_OFFSET - 1)
+#define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
+#define PCI_IO_SIZE      SZ_16M
+
 /*
  * Roughly size the vmemmap space to be large enough to fit enough
  * struct pages to map half the virtual address space. Then
@@ -96,6 +102,17 @@ extern pgd_t swapper_pg_dir[];
 
 #define vmemmap                ((struct page *)VMEMMAP_START)
 
+#define PCI_IO_END       VMEMMAP_START
+#define PCI_IO_START     (PCI_IO_END - PCI_IO_SIZE)
+#define FIXADDR_TOP      PCI_IO_START
+
+#ifdef CONFIG_64BIT
+#define FIXADDR_SIZE     PMD_SIZE
+#else
+#define FIXADDR_SIZE     PGDIR_SIZE
+#endif
+#define FIXADDR_START    (FIXADDR_TOP - FIXADDR_SIZE)
+
 /*
  * ZERO_PAGE is a global shared page that is always zero,
  * used for zero-mapped memory areas, etc.
@@ -172,10 +189,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
        return __pte((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
 }
 
-static inline pte_t mk_pte(struct page *page, pgprot_t prot)
-{
-       return pfn_pte(page_to_pfn(page), prot);
-}
+#define mk_pte(page, prot)       pfn_pte(page_to_pfn(page), prot)
 
 #define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
@@ -416,31 +430,12 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
 #define __pte_to_swp_entry(pte)        ((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)  ((pte_t) { (x).val })
 
-#ifdef CONFIG_FLATMEM
 #define kern_addr_valid(addr)   (1) /* FIXME */
-#endif
 
 extern void *dtb_early_va;
 extern void setup_bootmem(void);
 extern void paging_init(void);
 
-static inline void pgtable_cache_init(void)
-{
-       /* No page table caches to initialize */
-}
-
-#define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END      (PAGE_OFFSET - 1)
-#define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
-
-#define FIXADDR_TOP      VMALLOC_START
-#ifdef CONFIG_64BIT
-#define FIXADDR_SIZE     PMD_SIZE
-#else
-#define FIXADDR_SIZE     PGDIR_SIZE
-#endif
-#define FIXADDR_START    (FIXADDR_TOP - FIXADDR_SIZE)
-
 /*
  * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
  * Note that PGDIR_SIZE must evenly divide TASK_SIZE.
index f0227bd..ee4f0ac 100644 (file)
@@ -6,6 +6,7 @@
 #ifndef _ASM_RISCV_SWITCH_TO_H
 #define _ASM_RISCV_SWITCH_TO_H
 
+#include <linux/sched/task_stack.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/csr.h>
index 37ae4e3..f02188a 100644 (file)
 #include <linux/mm_types.h>
 #include <asm/smp.h>
 
-/*
- * Flush entire local TLB.  'sfence.vma' implicitly fences with the instruction
- * cache as well, so a 'fence.i' is not necessary.
- */
 static inline void local_flush_tlb_all(void)
 {
        __asm__ __volatile__ ("sfence.vma" : : : "memory");
index b1ade9a..a5ad000 100644 (file)
@@ -10,6 +10,7 @@
 #include <asm/processor.h>
 #include <asm/hwcap.h>
 #include <asm/smp.h>
+#include <asm/switch_to.h>
 
 unsigned long elf_hwcap __read_mostly;
 #ifdef CONFIG_FPU
index 74ccfd4..8ca4798 100644 (file)
@@ -98,7 +98,26 @@ _save_context:
  */
        .macro RESTORE_ALL
        REG_L a0, PT_SSTATUS(sp)
-       REG_L a2, PT_SEPC(sp)
+       /*
+        * The current load reservation is effectively part of the processor's
+        * state, in the sense that load reservations cannot be shared between
+        * different hart contexts.  We can't actually save and restore a load
+        * reservation, so instead here we clear any existing reservation --
+        * it's always legal for implementations to clear load reservations at
+        * any point (as long as the forward progress guarantee is kept, but
+        * we'll ignore that here).
+        *
+        * Dangling load reservations can be the result of taking a trap in the
+        * middle of an LR/SC sequence, but can also be the result of a taken
+        * forward branch around an SC -- which is how we implement CAS.  As a
+        * result we need to clear reservations between the last CAS and the
+        * jump back to the new context.  While it is unlikely the store
+        * completes, implementations are allowed to expand reservations to be
+        * arbitrarily large.
+        */
+       REG_L  a2, PT_SEPC(sp)
+       REG_SC x0, a2, PT_SEPC(sp)
+
        csrw CSR_SSTATUS, a0
        csrw CSR_SEPC, a2
 
@@ -166,9 +185,13 @@ ENTRY(handle_exception)
        move a0, sp /* pt_regs */
        tail do_IRQ
 1:
-       /* Exceptions run with interrupts enabled */
+       /* Exceptions run with interrupts enabled or disabled
+          depending on the state of sstatus.SR_SPIE */
+       andi t0, s1, SR_SPIE
+       beqz t0, 1f
        csrs CSR_SSTATUS, SR_SIE
 
+1:
        /* Handle syscalls */
        li t0, EXC_SYSCALL
        beq s4, t0, handle_syscall
@@ -250,12 +273,11 @@ restore_all:
 resume_kernel:
        REG_L s0, TASK_TI_PREEMPT_COUNT(tp)
        bnez s0, restore_all
-need_resched:
        REG_L s0, TASK_TI_FLAGS(tp)
        andi s0, s0, _TIF_NEED_RESCHED
        beqz s0, restore_all
        call preempt_schedule_irq
-       j need_resched
+       j restore_all
 #endif
 
 work_pending:
index 15a9189..72f89b7 100644 (file)
@@ -63,6 +63,11 @@ _start_kernel:
        li t0, SR_FS
        csrc CSR_SSTATUS, t0
 
+#ifdef CONFIG_SMP
+       li t0, CONFIG_NR_CPUS
+       bgeu a0, t0, .Lsecondary_park
+#endif
+
        /* Pick one hart to run the main boot sequence */
        la a3, hart_lottery
        li a2, 1
@@ -154,9 +159,6 @@ relocate:
 
 .Lsecondary_start:
 #ifdef CONFIG_SMP
-       li a1, CONFIG_NR_CPUS
-       bgeu a0, a1, .Lsecondary_park
-
        /* Set trap vector to spin forever to help debug */
        la a3, .Lsecondary_park
        csrw CSR_STVEC, a3
diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h
new file mode 100644 (file)
index 0000000..105fb04
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 SiFive, Inc.
+ */
+#ifndef __ASM_HEAD_H
+#define __ASM_HEAD_H
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+extern atomic_t hart_lottery;
+
+asmlinkage void do_page_fault(struct pt_regs *regs);
+asmlinkage void __init setup_vm(uintptr_t dtb_pa);
+
+extern void *__cpu_up_stack_pointer[];
+extern void *__cpu_up_task_pointer[];
+
+void __init parse_dtb(void);
+
+#endif /* __ASM_HEAD_H */
index 6d86593..fffac6d 100644 (file)
@@ -24,7 +24,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
        return 0;
 }
 
-asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs)
+asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs)
 {
        struct pt_regs *old_regs = set_irq_regs(regs);
 
index c9ae483..e264e59 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/elf.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/moduleloader.h>
 
 unsigned long module_emit_got_entry(struct module *mod, unsigned long val)
 {
index fb3a082..85e3c39 100644 (file)
@@ -7,6 +7,7 @@
  * Copyright (C) 2017 SiFive
  */
 
+#include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
@@ -19,6 +20,7 @@
 #include <asm/csr.h>
 #include <asm/string.h>
 #include <asm/switch_to.h>
+#include <asm/thread_info.h>
 
 extern asmlinkage void ret_from_fork(void);
 extern asmlinkage void ret_from_kernel_thread(void);
index 3687514..1252113 100644 (file)
@@ -148,7 +148,7 @@ long arch_ptrace(struct task_struct *child, long request,
  * Allows PTRACE_SYSCALL to work.  These are called from entry.S in
  * {handle,ret_from}_syscall.
  */
-void do_syscall_trace_enter(struct pt_regs *regs)
+__visible void do_syscall_trace_enter(struct pt_regs *regs)
 {
        if (test_thread_flag(TIF_SYSCALL_TRACE))
                if (tracehook_report_syscall_entry(regs))
@@ -162,7 +162,7 @@ void do_syscall_trace_enter(struct pt_regs *regs)
        audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3);
 }
 
-void do_syscall_trace_exit(struct pt_regs *regs)
+__visible void do_syscall_trace_exit(struct pt_regs *regs)
 {
        audit_syscall_exit(regs);
 
index d0fe623..aa56bb1 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include <linux/reboot.h>
+#include <linux/pm.h>
 #include <asm/sbi.h>
 
 static void default_power_off(void)
index a990a6c..845ae0e 100644 (file)
@@ -24,6 +24,8 @@
 #include <asm/tlbflush.h>
 #include <asm/thread_info.h>
 
+#include "head.h"
+
 #ifdef CONFIG_DUMMY_CONSOLE
 struct screen_info screen_info = {
        .orig_video_lines       = 30,
index b14d764..d0f6f21 100644 (file)
@@ -26,7 +26,7 @@ struct rt_sigframe {
 
 #ifdef CONFIG_FPU
 static long restore_fp_state(struct pt_regs *regs,
-                            union __riscv_fp_state *sc_fpregs)
+                            union __riscv_fp_state __user *sc_fpregs)
 {
        long err;
        struct __riscv_d_ext_state __user *state = &sc_fpregs->d;
@@ -53,7 +53,7 @@ static long restore_fp_state(struct pt_regs *regs,
 }
 
 static long save_fp_state(struct pt_regs *regs,
-                         union __riscv_fp_state *sc_fpregs)
+                         union __riscv_fp_state __user *sc_fpregs)
 {
        long err;
        struct __riscv_d_ext_state __user *state = &sc_fpregs->d;
@@ -292,8 +292,8 @@ static void do_signal(struct pt_regs *regs)
  * notification of userspace execution resumption
  * - triggered by the _TIF_WORK_MASK flags
  */
-asmlinkage void do_notify_resume(struct pt_regs *regs,
-       unsigned long thread_info_flags)
+asmlinkage __visible void do_notify_resume(struct pt_regs *regs,
+                                          unsigned long thread_info_flags)
 {
        /* Handle pending signal delivery */
        if (thread_info_flags & _TIF_SIGPENDING)
index 3836760..5c9ec78 100644 (file)
@@ -8,7 +8,9 @@
  * Copyright (C) 2017 SiFive
  */
 
+#include <linux/cpu.h>
 #include <linux/interrupt.h>
+#include <linux/profile.h>
 #include <linux/smp.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
@@ -206,3 +208,4 @@ void smp_send_reschedule(int cpu)
 {
        send_ipi_single(cpu, IPI_RESCHEDULE);
 }
+EXPORT_SYMBOL_GPL(smp_send_reschedule);
index 18ae6da..261f408 100644 (file)
@@ -29,6 +29,9 @@
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/sbi.h>
+#include <asm/smp.h>
+
+#include "head.h"
 
 void *__cpu_up_stack_pointer[NR_CPUS];
 void *__cpu_up_task_pointer[NR_CPUS];
@@ -130,7 +133,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 /*
  * C entry point for a secondary processor.
  */
-asmlinkage void __init smp_callin(void)
+asmlinkage __visible void __init smp_callin(void)
 {
        struct mm_struct *mm = &init_mm;
 
index e5dd52d..f1ead9d 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/syscalls.h>
 #include <asm-generic/syscalls.h>
 #include <asm/vdso.h>
+#include <asm/syscall.h>
 
 #undef __SYSCALL
 #define __SYSCALL(nr, call)    [nr] = (call),
index 541a2b8..6a53c02 100644 (file)
@@ -7,8 +7,10 @@
 #include <linux/clocksource.h>
 #include <linux/delay.h>
 #include <asm/sbi.h>
+#include <asm/processor.h>
 
 unsigned long riscv_timebase;
+EXPORT_SYMBOL_GPL(riscv_timebase);
 
 void __init time_init(void)
 {
index 424eb72..473de3a 100644 (file)
@@ -3,6 +3,7 @@
  * Copyright (C) 2012 Regents of the University of California
  */
 
+#include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/sched.h>
@@ -83,7 +84,7 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code,
 }
 
 #define DO_ERROR_INFO(name, signo, code, str)                          \
-asmlinkage void name(struct pt_regs *regs)                             \
+asmlinkage __visible void name(struct pt_regs *regs)                   \
 {                                                                      \
        do_trap_error(regs, signo, code, regs->sepc, "Oops - " str);    \
 }
@@ -111,7 +112,6 @@ DO_ERROR_INFO(do_trap_ecall_s,
 DO_ERROR_INFO(do_trap_ecall_m,
        SIGILL, ILL_ILLTRP, "environment call from M-mode");
 
-#ifdef CONFIG_GENERIC_BUG
 static inline unsigned long get_break_insn_length(unsigned long pc)
 {
        bug_insn_t insn;
@@ -120,28 +120,15 @@ static inline unsigned long get_break_insn_length(unsigned long pc)
                return 0;
        return (((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) ? 4UL : 2UL);
 }
-#endif /* CONFIG_GENERIC_BUG */
 
-asmlinkage void do_trap_break(struct pt_regs *regs)
+asmlinkage __visible void do_trap_break(struct pt_regs *regs)
 {
-#ifdef CONFIG_GENERIC_BUG
-       if (!user_mode(regs)) {
-               enum bug_trap_type type;
-
-               type = report_bug(regs->sepc, regs);
-               switch (type) {
-               case BUG_TRAP_TYPE_NONE:
-                       break;
-               case BUG_TRAP_TYPE_WARN:
-                       regs->sepc += get_break_insn_length(regs->sepc);
-                       break;
-               case BUG_TRAP_TYPE_BUG:
-                       die(regs, "Kernel BUG");
-               }
-       }
-#endif /* CONFIG_GENERIC_BUG */
-
-       force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc));
+       if (user_mode(regs))
+               force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->sepc);
+       else if (report_bug(regs->sepc, regs) == BUG_TRAP_TYPE_WARN)
+               regs->sepc += get_break_insn_length(regs->sepc);
+       else
+               die(regs, "Kernel BUG");
 }
 
 #ifdef CONFIG_GENERIC_BUG
index c9c21e0..484d95a 100644 (file)
@@ -6,6 +6,7 @@
  * Copyright (C) 2015 Regents of the University of California
  */
 
+#include <linux/elf.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/binfmts.h>
@@ -25,7 +26,7 @@ static union {
        struct vdso_data        data;
        u8                      page[PAGE_SIZE];
 } vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
+static struct vdso_data *vdso_data = &vdso_data_store.data;
 
 static int __init vdso_init(void)
 {
index beeb5d7..ca66d44 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/mm.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
 
 /*
  * When necessary, performs a deferred icache flush for the given MM context,
index 96add14..247b8c8 100644 (file)
@@ -18,6 +18,8 @@
 #include <asm/ptrace.h>
 #include <asm/tlbflush.h>
 
+#include "../kernel/head.h"
+
 /*
  * This routine handles page faults.  It determines the address and the
  * problem, and then passes it off to one of the appropriate routines.
index f0ba713..573463d 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/swap.h>
 #include <linux/sizes.h>
 #include <linux/of_fdt.h>
+#include <linux/libfdt.h>
 
 #include <asm/fixmap.h>
 #include <asm/tlbflush.h>
@@ -18,6 +19,8 @@
 #include <asm/pgtable.h>
 #include <asm/io.h>
 
+#include "../kernel/head.h"
+
 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
                                                        __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
@@ -82,6 +85,8 @@ disable:
 }
 #endif /* CONFIG_BLK_DEV_INITRD */
 
+static phys_addr_t dtb_early_pa __initdata;
+
 void __init setup_bootmem(void)
 {
        struct memblock_region *reg;
@@ -117,7 +122,12 @@ void __init setup_bootmem(void)
        setup_initrd();
 #endif /* CONFIG_BLK_DEV_INITRD */
 
-       early_init_fdt_reserve_self();
+       /*
+        * Avoid using early_init_fdt_reserve_self() since __pa() does
+        * not work for DTB pointers that are fixmap addresses
+        */
+       memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
+
        early_init_fdt_scan_reserved_mem();
        memblock_allow_resize();
        memblock_dump_all();
@@ -329,8 +339,7 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
  */
 
 #ifndef __riscv_cmodel_medany
-#error "setup_vm() is called from head.S before relocate so it should "
-       "not use absolute addressing."
+#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
 #endif
 
 asmlinkage void __init setup_vm(uintptr_t dtb_pa)
@@ -393,6 +402,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 
        /* Save pointer to DTB for early FDT parsing */
        dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK);
+       /* Save physical address for memblock reservation */
+       dtb_early_pa = dtb_pa;
 }
 
 static void __init setup_vm_final(void)
@@ -448,7 +459,7 @@ void __init paging_init(void)
        zone_sizes_init();
 }
 
-#ifdef CONFIG_SPARSEMEM
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
                               struct vmem_altmap *altmap)
 {
index 2e637ad..a9ffff3 100644 (file)
@@ -142,7 +142,7 @@ static irqreturn_t l2_int_handler(int irq, void *device)
        return IRQ_HANDLED;
 }
 
-int __init sifive_l2_init(void)
+static int __init sifive_l2_init(void)
 {
        struct device_node *np;
        struct resource res;
index f933a47..43a81d0 100644 (file)
@@ -554,9 +554,9 @@ config ARCH_HAS_KEXEC_PURGATORY
        def_bool y
        depends on KEXEC_FILE
 
-config KEXEC_VERIFY_SIG
+config KEXEC_SIG
        bool "Verify kernel signature during kexec_file_load() syscall"
-       depends on KEXEC_FILE && SYSTEM_DATA_VERIFICATION
+       depends on KEXEC_FILE && MODULE_SIG_FORMAT
        help
          This option makes kernel signature verification mandatory for
          the kexec_file_load() syscall.
index 596ca7c..5367950 100644 (file)
@@ -101,10 +101,18 @@ static void handle_relocs(unsigned long offset)
        dynsym = (Elf64_Sym *) vmlinux.dynsym_start;
        for (rela = rela_start; rela < rela_end; rela++) {
                loc = rela->r_offset + offset;
-               val = rela->r_addend + offset;
+               val = rela->r_addend;
                r_sym = ELF64_R_SYM(rela->r_info);
-               if (r_sym)
-                       val += dynsym[r_sym].st_value;
+               if (r_sym) {
+                       if (dynsym[r_sym].st_shndx != SHN_UNDEF)
+                               val += dynsym[r_sym].st_value + offset;
+               } else {
+                       /*
+                        * 0 == undefined symbol table index (STN_UNDEF),
+                        * used for R_390_RELATIVE, only add KASLR offset
+                        */
+                       val += offset;
+               }
                r_type = ELF64_R_TYPE(rela->r_info);
                rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0);
                if (rc)
index 347f487..38d6403 100644 (file)
@@ -44,6 +44,7 @@ CONFIG_NR_CPUS=512
 CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_SIG=y
 CONFIG_EXPOLINE=y
 CONFIG_EXPOLINE_AUTO=y
 CONFIG_CHSC_SCH=y
@@ -69,12 +70,13 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG=y
 CONFIG_MODULE_SIG_SHA256=y
+CONFIG_UNUSED_SYMBOLS=y
 CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_BLK_DEV_THROTTLING=y
 CONFIG_BLK_WBT=y
 CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_CGROUP_IOCOST=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_BSD_DISKLABEL=y
@@ -370,6 +372,7 @@ CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
+# CONFIG_NET_DROP_MONITOR is not set
 CONFIG_PCI=y
 CONFIG_PCI_DEBUG=y
 CONFIG_HOTPLUG_PCI=y
@@ -424,6 +427,7 @@ CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_LOG_USERSPACE=m
 CONFIG_DM_RAID=m
@@ -435,6 +439,7 @@ CONFIG_DM_DELAY=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_FLAKEY=m
 CONFIG_DM_VERITY=m
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
 CONFIG_DM_SWITCH=m
 CONFIG_NETDEVICES=y
 CONFIG_BONDING=m
@@ -489,6 +494,7 @@ CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_NVIDIA is not set
 # CONFIG_NET_VENDOR_OKI is not set
 # CONFIG_NET_VENDOR_PACKET_ENGINES is not set
+# CONFIG_NET_VENDOR_PENSANDO is not set
 # CONFIG_NET_VENDOR_QLOGIC is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RDC is not set
@@ -538,15 +544,16 @@ CONFIG_WATCHDOG=y
 CONFIG_WATCHDOG_NOWAYOUT=y
 CONFIG_SOFT_WATCHDOG=m
 CONFIG_DIAG288_WATCHDOG=m
-CONFIG_DRM=y
-CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_FB=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_MLX4_INFINIBAND=m
 CONFIG_MLX5_INFINIBAND=m
+CONFIG_SYNC_FILE=y
 CONFIG_VFIO=m
 CONFIG_VFIO_PCI=m
 CONFIG_VFIO_MDEV=m
@@ -580,6 +587,8 @@ CONFIG_NILFS2_FS=m
 CONFIG_FS_DAX=y
 CONFIG_EXPORTFS_BLOCK_OPS=y
 CONFIG_FS_ENCRYPTION=y
+CONFIG_FS_VERITY=y
+CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
@@ -589,6 +598,7 @@ CONFIG_QFMT_V2=m
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
+CONFIG_VIRTIO_FS=m
 CONFIG_OVERLAY_FS=m
 CONFIG_FSCACHE=m
 CONFIG_CACHEFILES=m
@@ -648,12 +658,15 @@ CONFIG_FORTIFY_SOURCE=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_SECURITY_LOCKDOWN_LSM=y
+CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
 CONFIG_INTEGRITY_SIGNATURE=y
 CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
 CONFIG_IMA=y
 CONFIG_IMA_DEFAULT_HASH_SHA256=y
 CONFIG_IMA_WRITE_POLICY=y
 CONFIG_IMA_APPRAISE=y
+CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
 CONFIG_CRYPTO_USER=m
 # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
 CONFIG_CRYPTO_PCRYPT=m
@@ -664,10 +677,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -739,7 +748,6 @@ CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_GDB_SCRIPTS=y
 CONFIG_FRAME_WARN=1024
-CONFIG_UNUSED_SYMBOLS=y
 CONFIG_HEADERS_INSTALL=y
 CONFIG_HEADERS_CHECK=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
index 8514b8b..25f7998 100644 (file)
@@ -44,6 +44,7 @@ CONFIG_NUMA=y
 # CONFIG_NUMA_EMU is not set
 CONFIG_HZ_100=y
 CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_SIG=y
 CONFIG_EXPOLINE=y
 CONFIG_EXPOLINE_AUTO=y
 CONFIG_CHSC_SCH=y
@@ -66,11 +67,12 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG=y
 CONFIG_MODULE_SIG_SHA256=y
+CONFIG_UNUSED_SYMBOLS=y
 CONFIG_BLK_DEV_THROTTLING=y
 CONFIG_BLK_WBT=y
 CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_CGROUP_IOCOST=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_BSD_DISKLABEL=y
@@ -363,6 +365,7 @@ CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
+# CONFIG_NET_DROP_MONITOR is not set
 CONFIG_PCI=y
 CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_S390=y
@@ -418,6 +421,7 @@ CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_LOG_USERSPACE=m
 CONFIG_DM_RAID=m
@@ -429,6 +433,7 @@ CONFIG_DM_DELAY=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_FLAKEY=m
 CONFIG_DM_VERITY=m
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
 CONFIG_DM_SWITCH=m
 CONFIG_DM_INTEGRITY=m
 CONFIG_NETDEVICES=y
@@ -484,6 +489,7 @@ CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_NVIDIA is not set
 # CONFIG_NET_VENDOR_OKI is not set
 # CONFIG_NET_VENDOR_PACKET_ENGINES is not set
+# CONFIG_NET_VENDOR_PENSANDO is not set
 # CONFIG_NET_VENDOR_QLOGIC is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RDC is not set
@@ -533,16 +539,16 @@ CONFIG_WATCHDOG_CORE=y
 CONFIG_WATCHDOG_NOWAYOUT=y
 CONFIG_SOFT_WATCHDOG=m
 CONFIG_DIAG288_WATCHDOG=m
-CONFIG_DRM=y
-CONFIG_DRM_VIRTIO_GPU=y
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
+CONFIG_FB=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_MLX4_INFINIBAND=m
 CONFIG_MLX5_INFINIBAND=m
+CONFIG_SYNC_FILE=y
 CONFIG_VFIO=m
 CONFIG_VFIO_PCI=m
 CONFIG_VFIO_MDEV=m
@@ -573,6 +579,8 @@ CONFIG_NILFS2_FS=m
 CONFIG_FS_DAX=y
 CONFIG_EXPORTFS_BLOCK_OPS=y
 CONFIG_FS_ENCRYPTION=y
+CONFIG_FS_VERITY=y
+CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
@@ -581,6 +589,7 @@ CONFIG_QFMT_V2=m
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
+CONFIG_VIRTIO_FS=m
 CONFIG_OVERLAY_FS=m
 CONFIG_FSCACHE=m
 CONFIG_CACHEFILES=m
@@ -639,12 +648,15 @@ CONFIG_SECURITY_NETWORK=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_SECURITY_LOCKDOWN_LSM=y
+CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
 CONFIG_INTEGRITY_SIGNATURE=y
 CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
 CONFIG_IMA=y
 CONFIG_IMA_DEFAULT_HASH_SHA256=y
 CONFIG_IMA_WRITE_POLICY=y
 CONFIG_IMA_APPRAISE=y
+CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
 CONFIG_CRYPTO_FIPS=y
 CONFIG_CRYPTO_USER=m
 # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
@@ -656,10 +668,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_OFB=m
@@ -727,7 +735,6 @@ CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_GDB_SCRIPTS=y
 CONFIG_FRAME_WARN=1024
-CONFIG_UNUSED_SYMBOLS=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_MEMORY_INIT=y
index be09a20..20c51e5 100644 (file)
@@ -61,7 +61,7 @@ CONFIG_RAW_DRIVER=y
 CONFIG_CONFIGFS_FS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 # CONFIG_NETWORK_FILESYSTEMS is not set
-# CONFIG_DIMLIB is not set
+CONFIG_LSM="yama,loadpin,safesetid,integrity"
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_FS=y
index a4418fc..70139d0 100644 (file)
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
 #include <linux/namei.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/time.h>
-#include <linux/parser.h>
 #include <linux/sysfs.h>
 #include <linux/init.h>
 #include <linux/kobject.h>
 #include <linux/seq_file.h>
-#include <linux/mount.h>
 #include <linux/uio.h>
 #include <asm/ebcdic.h>
 #include "hypfs.h"
@@ -207,52 +207,44 @@ static int hypfs_release(struct inode *inode, struct file *filp)
        return 0;
 }
 
-enum { opt_uid, opt_gid, opt_err };
+enum { Opt_uid, Opt_gid, };
 
-static const match_table_t hypfs_tokens = {
-       {opt_uid, "uid=%u"},
-       {opt_gid, "gid=%u"},
-       {opt_err, NULL}
+static const struct fs_parameter_spec hypfs_param_specs[] = {
+       fsparam_u32("gid", Opt_gid),
+       fsparam_u32("uid", Opt_uid),
+       {}
 };
 
-static int hypfs_parse_options(char *options, struct super_block *sb)
+static const struct fs_parameter_description hypfs_fs_parameters = {
+       .name           = "hypfs",
+       .specs          = hypfs_param_specs,
+};
+
+static int hypfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
-       char *str;
-       substring_t args[MAX_OPT_ARGS];
+       struct hypfs_sb_info *hypfs_info = fc->s_fs_info;
+       struct fs_parse_result result;
        kuid_t uid;
        kgid_t gid;
-
-       if (!options)
-               return 0;
-       while ((str = strsep(&options, ",")) != NULL) {
-               int token, option;
-               struct hypfs_sb_info *hypfs_info = sb->s_fs_info;
-
-               if (!*str)
-                       continue;
-               token = match_token(str, hypfs_tokens, args);
-               switch (token) {
-               case opt_uid:
-                       if (match_int(&args[0], &option))
-                               return -EINVAL;
-                       uid = make_kuid(current_user_ns(), option);
-                       if (!uid_valid(uid))
-                               return -EINVAL;
-                       hypfs_info->uid = uid;
-                       break;
-               case opt_gid:
-                       if (match_int(&args[0], &option))
-                               return -EINVAL;
-                       gid = make_kgid(current_user_ns(), option);
-                       if (!gid_valid(gid))
-                               return -EINVAL;
-                       hypfs_info->gid = gid;
-                       break;
-               case opt_err:
-               default:
-                       pr_err("%s is not a valid mount option\n", str);
-                       return -EINVAL;
-               }
+       int opt;
+
+       opt = fs_parse(fc, &hypfs_fs_parameters, param, &result);
+       if (opt < 0)
+               return opt;
+
+       switch (opt) {
+       case Opt_uid:
+               uid = make_kuid(current_user_ns(), result.uint_32);
+               if (!uid_valid(uid))
+                       return invalf(fc, "Unknown uid");
+               hypfs_info->uid = uid;
+               break;
+       case Opt_gid:
+               gid = make_kgid(current_user_ns(), result.uint_32);
+               if (!gid_valid(gid))
+                       return invalf(fc, "Unknown gid");
+               hypfs_info->gid = gid;
+               break;
        }
        return 0;
 }
@@ -266,26 +258,18 @@ static int hypfs_show_options(struct seq_file *s, struct dentry *root)
        return 0;
 }
 
-static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
+static int hypfs_fill_super(struct super_block *sb, struct fs_context *fc)
 {
+       struct hypfs_sb_info *sbi = sb->s_fs_info;
        struct inode *root_inode;
-       struct dentry *root_dentry;
-       int rc = 0;
-       struct hypfs_sb_info *sbi;
+       struct dentry *root_dentry, *update_file;
+       int rc;
 
-       sbi = kzalloc(sizeof(struct hypfs_sb_info), GFP_KERNEL);
-       if (!sbi)
-               return -ENOMEM;
-       mutex_init(&sbi->lock);
-       sbi->uid = current_uid();
-       sbi->gid = current_gid();
-       sb->s_fs_info = sbi;
        sb->s_blocksize = PAGE_SIZE;
        sb->s_blocksize_bits = PAGE_SHIFT;
        sb->s_magic = HYPFS_MAGIC;
        sb->s_op = &hypfs_s_ops;
-       if (hypfs_parse_options(data, sb))
-               return -EINVAL;
+
        root_inode = hypfs_make_inode(sb, S_IFDIR | 0755);
        if (!root_inode)
                return -ENOMEM;
@@ -300,18 +284,46 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
                rc = hypfs_diag_create_files(root_dentry);
        if (rc)
                return rc;
-       sbi->update_file = hypfs_create_update_file(root_dentry);
-       if (IS_ERR(sbi->update_file))
-               return PTR_ERR(sbi->update_file);
+       update_file = hypfs_create_update_file(root_dentry);
+       if (IS_ERR(update_file))
+               return PTR_ERR(update_file);
+       sbi->update_file = update_file;
        hypfs_update_update(sb);
        pr_info("Hypervisor filesystem mounted\n");
        return 0;
 }
 
-static struct dentry *hypfs_mount(struct file_system_type *fst, int flags,
-                       const char *devname, void *data)
+static int hypfs_get_tree(struct fs_context *fc)
+{
+       return get_tree_single(fc, hypfs_fill_super);
+}
+
+static void hypfs_free_fc(struct fs_context *fc)
 {
-       return mount_single(fst, flags, data, hypfs_fill_super);
+       kfree(fc->s_fs_info);
+}
+
+static const struct fs_context_operations hypfs_context_ops = {
+       .free           = hypfs_free_fc,
+       .parse_param    = hypfs_parse_param,
+       .get_tree       = hypfs_get_tree,
+};
+
+static int hypfs_init_fs_context(struct fs_context *fc)
+{
+       struct hypfs_sb_info *sbi;
+
+       sbi = kzalloc(sizeof(struct hypfs_sb_info), GFP_KERNEL);
+       if (!sbi)
+               return -ENOMEM;
+
+       mutex_init(&sbi->lock);
+       sbi->uid = current_uid();
+       sbi->gid = current_gid();
+
+       fc->s_fs_info = sbi;
+       fc->ops = &hypfs_context_ops;
+       return 0;
 }
 
 static void hypfs_kill_super(struct super_block *sb)
@@ -442,7 +454,8 @@ static const struct file_operations hypfs_file_ops = {
 static struct file_system_type hypfs_type = {
        .owner          = THIS_MODULE,
        .name           = "s390_hypfs",
-       .mount          = hypfs_mount,
+       .init_fs_context = hypfs_init_fs_context,
+       .parameters     = &hypfs_fs_parameters,
        .kill_sb        = hypfs_kill_super
 };
 
index d3f0952..61467b9 100644 (file)
@@ -41,7 +41,7 @@ __ATOMIC_OPS(__atomic64_xor, long, "laxg")
 #undef __ATOMIC_OP
 
 #define __ATOMIC_CONST_OP(op_name, op_type, op_string, op_barrier)     \
-static inline void op_name(op_type val, op_type *ptr)                  \
+static __always_inline void op_name(op_type val, op_type *ptr)         \
 {                                                                      \
        asm volatile(                                                   \
                op_string "     %[ptr],%[val]\n"                        \
index b8833ac..eb7eed4 100644 (file)
@@ -56,7 +56,7 @@ __bitops_byte(unsigned long nr, volatile unsigned long *ptr)
        return ((unsigned char *)ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
 }
 
-static inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr)
+static __always_inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr)
 {
        unsigned long *addr = __bitops_word(nr, ptr);
        unsigned long mask;
@@ -77,7 +77,7 @@ static inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr)
        __atomic64_or(mask, (long *)addr);
 }
 
-static inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr)
+static __always_inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr)
 {
        unsigned long *addr = __bitops_word(nr, ptr);
        unsigned long mask;
@@ -98,8 +98,8 @@ static inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr)
        __atomic64_and(mask, (long *)addr);
 }
 
-static inline void arch_change_bit(unsigned long nr,
-                                  volatile unsigned long *ptr)
+static __always_inline void arch_change_bit(unsigned long nr,
+                                           volatile unsigned long *ptr)
 {
        unsigned long *addr = __bitops_word(nr, ptr);
        unsigned long mask;
index a092f63..c0f3bfe 100644 (file)
@@ -171,7 +171,7 @@ typedef struct { unsigned char bytes[16]; } cpacf_mask_t;
  *
  * Returns 1 if @func is available for @opcode, 0 otherwise
  */
-static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
+static __always_inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
 {
        register unsigned long r0 asm("0") = 0; /* query function */
        register unsigned long r1 asm("1") = (unsigned long) mask;
index ae3e322..819803a 100644 (file)
@@ -28,6 +28,8 @@ asm(".include \"asm/cpu_mf-insn.h\"\n");
                                 CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA|  \
                                 CPU_MF_INT_SF_LSDA)
 
+#define CPU_MF_SF_RIBM_NOTAV   0x1             /* Sampling unavailable */
+
 /* CPU measurement facility support */
 static inline int cpum_cf_avail(void)
 {
@@ -69,8 +71,9 @@ struct hws_qsi_info_block {       /* Bit(s) */
        unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
        unsigned long tear;         /* 24-31: TEAR contents              */
        unsigned long dear;         /* 32-39: DEAR contents              */
-       unsigned int rsvrd0;        /* 40-43: reserved                   */
-       unsigned int cpu_speed;     /* 44-47: CPU speed                  */
+       unsigned int rsvrd0:24;     /* 40-42: reserved                   */
+       unsigned int ribm:8;        /* 43: Reserved by IBM               */
+       unsigned int cpu_speed;     /* 44-47: CPU speed                  */
        unsigned long long rsvrd1;  /* 48-55: reserved                   */
        unsigned long long rsvrd2;  /* 56-63: reserved                   */
 } __packed;
@@ -89,10 +92,10 @@ struct hws_lsctl_request_block {
        unsigned long tear;         /* 16-23: TEAR contents              */
        unsigned long dear;         /* 24-31: DEAR contents              */
        /* 32-63:                                                        */
-       unsigned long rsvrd1;       /* reserved                          */
-       unsigned long rsvrd2;       /* reserved                          */
-       unsigned long rsvrd3;       /* reserved                          */
-       unsigned long rsvrd4;       /* reserved                          */
+       unsigned long rsvrd1;       /* reserved                          */
+       unsigned long rsvrd2;       /* reserved                          */
+       unsigned long rsvrd3;       /* reserved                          */
+       unsigned long rsvrd4;       /* reserved                          */
 } __packed;
 
 struct hws_basic_entry {
@@ -220,7 +223,8 @@ enum stcctm_ctr_set {
        MT_DIAG = 5,
        MT_DIAG_CLEARING = 9,   /* clears loss-of-MT-ctr-data alert */
 };
-static inline int stcctm(enum stcctm_ctr_set set, u64 range, u64 *dest)
+
+static __always_inline int stcctm(enum stcctm_ctr_set set, u64 range, u64 *dest)
 {
        int cc;
 
index bb59dd9..de8f0bf 100644 (file)
@@ -12,8 +12,6 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
-
-#define is_hugepage_only_range(mm, addr, len)  0
 #define hugetlb_free_pgd_range                 free_pgd_range
 #define hugepages_supported()                  (MACHINE_HAS_EDAT1)
 
@@ -23,6 +21,13 @@ pte_t huge_ptep_get(pte_t *ptep);
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
                              unsigned long addr, pte_t *ptep);
 
+static inline bool is_hugepage_only_range(struct mm_struct *mm,
+                                         unsigned long addr,
+                                         unsigned long len)
+{
+       return false;
+}
+
 /*
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
index e548ec1..39f747d 100644 (file)
@@ -20,7 +20,7 @@
  * We use a brcl 0,2 instruction for jump labels at compile time so it
  * can be easily distinguished from a hotpatch generated instruction.
  */
-static inline bool arch_static_branch(struct static_key *key, bool branch)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
        asm_volatile_goto("0:   brcl    0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
                          ".pushsection __jump_table,\"aw\"\n"
@@ -34,7 +34,7 @@ label:
        return true;
 }
 
-static inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
 {
        asm_volatile_goto("0:   brcl 15,%l[label]\n"
                          ".pushsection __jump_table,\"aw\"\n"
index 560d8f7..4652fff 100644 (file)
@@ -60,6 +60,7 @@ struct perf_sf_sde_regs {
 #define PERF_CPUM_SF_MODE_MASK         (PERF_CPUM_SF_BASIC_MODE| \
                                         PERF_CPUM_SF_DIAG_MODE)
 #define PERF_CPUM_SF_FULL_BLOCKS       0x0004    /* Process full SDBs only */
+#define PERF_CPUM_SF_FREQ_MODE         0x0008    /* Sampling with frequency */
 
 #define REG_NONE               0
 #define REG_OVERFLOW           1
@@ -70,5 +71,6 @@ struct perf_sf_sde_regs {
 #define SAMPL_FLAGS(hwc)       ((hwc)->config_base)
 #define SAMPL_DIAG_MODE(hwc)   (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
 #define SDB_FULL_BLOCKS(hwc)   (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
+#define SAMPLE_FREQ_MODE(hwc)  (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
 
 #endif /* _ASM_S390_PERF_EVENT_H */
index 0c46007..5ff98d7 100644 (file)
@@ -997,9 +997,9 @@ static inline pte_t pte_mkhuge(pte_t pte)
 #define IPTE_NODAT     0x400
 #define IPTE_GUEST_ASCE        0x800
 
-static inline void __ptep_ipte(unsigned long address, pte_t *ptep,
-                              unsigned long opt, unsigned long asce,
-                              int local)
+static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep,
+                                       unsigned long opt, unsigned long asce,
+                                       int local)
 {
        unsigned long pto = (unsigned long) ptep;
 
@@ -1020,8 +1020,8 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep,
                : [r1] "a" (pto), [m4] "i" (local) : "memory");
 }
 
-static inline void __ptep_ipte_range(unsigned long address, int nr,
-                                    pte_t *ptep, int local)
+static __always_inline void __ptep_ipte_range(unsigned long address, int nr,
+                                             pte_t *ptep, int local)
 {
        unsigned long pto = (unsigned long) ptep;
 
@@ -1269,7 +1269,8 @@ static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address)
 
 #define pte_offset_kernel(pmd, address) pte_offset(pmd, address)
 #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
-#define pte_unmap(pte) do { } while (0)
+
+static inline void pte_unmap(pte_t *pte) { }
 
 static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
 {
@@ -1435,9 +1436,9 @@ static inline void __pmdp_csp(pmd_t *pmdp)
 #define IDTE_NODAT     0x1000
 #define IDTE_GUEST_ASCE        0x2000
 
-static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp,
-                              unsigned long opt, unsigned long asce,
-                              int local)
+static __always_inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp,
+                                       unsigned long opt, unsigned long asce,
+                                       int local)
 {
        unsigned long sto;
 
@@ -1461,9 +1462,9 @@ static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp,
        }
 }
 
-static inline void __pudp_idte(unsigned long addr, pud_t *pudp,
-                              unsigned long opt, unsigned long asce,
-                              int local)
+static __always_inline void __pudp_idte(unsigned long addr, pud_t *pudp,
+                                       unsigned long opt, unsigned long asce,
+                                       int local)
 {
        unsigned long r3o;
 
@@ -1682,12 +1683,6 @@ extern void s390_reset_cmma(struct mm_struct *mm);
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-/*
- * No page table caches to initialise
- */
-static inline void pgtable_cache_init(void) { }
-static inline void check_pgt_cache(void) { }
-
 #include <asm-generic/pgtable.h>
 
 #endif /* _S390_PAGE_H */
index 78e8a88..e3f238e 100644 (file)
@@ -111,7 +111,7 @@ struct qib {
        /* private: */
        u8 res[88];
        /* public: */
-       u8 parm[QDIO_MAX_BUFFERS_PER_Q];
+       u8 parm[128];
 } __attribute__ ((packed, aligned(256)));
 
 /**
index bd2fd9a..a470f1f 100644 (file)
@@ -83,7 +83,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n);
        __rc;                                                   \
 })
 
-static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
+static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
 {
        unsigned long spec = 0x010000UL;
        int rc;
@@ -113,7 +113,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
        return rc;
 }
 
-static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
+static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
 {
        unsigned long spec = 0x01UL;
        int rc;
index d827b5b..eaaefec 100644 (file)
@@ -35,6 +35,7 @@ struct unwind_state {
        struct task_struct *task;
        struct pt_regs *regs;
        unsigned long sp, ip;
+       bool reuse_sp;
        int graph_idx;
        bool reliable;
        bool error;
index 8c5755f..f9e5e1f 100644 (file)
@@ -4,7 +4,7 @@
  *
  *  zcrypt 2.2.1 (user-visible header)
  *
- *  Copyright IBM Corp. 2001, 2018
+ *  Copyright IBM Corp. 2001, 2019
  *  Author(s): Robert Burroughs
  *            Eric Rossman (edrossma@us.ibm.com)
  *
@@ -286,7 +286,7 @@ struct zcrypt_device_matrix_ext {
  *      0x08: CEX3A
  *      0x0a: CEX4
  *      0x0b: CEX5
- *      0x0c: CEX6
+ *      0x0c: CEX6 and CEX7
  *      0x0d: device is disabled
  *
  *   ZCRYPT_QDEPTH_MASK
index b9d8fe4..8f84568 100644 (file)
@@ -69,18 +69,26 @@ DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
 static ssize_t show_idle_time(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
+       unsigned long long now, idle_time, idle_enter, idle_exit, in_idle;
        struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
-       unsigned long long now, idle_time, idle_enter, idle_exit;
        unsigned int seq;
 
        do {
-               now = get_tod_clock();
                seq = read_seqcount_begin(&idle->seqcount);
                idle_time = READ_ONCE(idle->idle_time);
                idle_enter = READ_ONCE(idle->clock_idle_enter);
                idle_exit = READ_ONCE(idle->clock_idle_exit);
        } while (read_seqcount_retry(&idle->seqcount, seq));
-       idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
+       in_idle = 0;
+       now = get_tod_clock();
+       if (idle_enter) {
+               if (idle_exit) {
+                       in_idle = idle_exit - idle_enter;
+               } else if (now > idle_enter) {
+                       in_idle = now - idle_enter;
+               }
+       }
+       idle_time += in_idle;
        return sprintf(buf, "%llu\n", idle_time >> 12);
 }
 DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
@@ -88,17 +96,24 @@ DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
 u64 arch_cpu_idle_time(int cpu)
 {
        struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
-       unsigned long long now, idle_enter, idle_exit;
+       unsigned long long now, idle_enter, idle_exit, in_idle;
        unsigned int seq;
 
        do {
-               now = get_tod_clock();
                seq = read_seqcount_begin(&idle->seqcount);
                idle_enter = READ_ONCE(idle->clock_idle_enter);
                idle_exit = READ_ONCE(idle->clock_idle_exit);
        } while (read_seqcount_retry(&idle->seqcount, seq));
-
-       return cputime_to_nsecs(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0);
+       in_idle = 0;
+       now = get_tod_clock();
+       if (idle_enter) {
+               if (idle_exit) {
+                       in_idle = idle_exit - idle_enter;
+               } else if (now > idle_enter) {
+                       in_idle = now - idle_enter;
+               }
+       }
+       return cputime_to_nsecs(in_idle);
 }
 
 void arch_cpu_idle_enter(void)
index 6d0635c..9da6fa3 100644 (file)
@@ -130,7 +130,7 @@ static int s390_elf_probe(const char *buf, unsigned long len)
 const struct kexec_file_ops s390_kexec_elf_ops = {
        .probe = s390_elf_probe,
        .load = s390_elf_load,
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
        .verify_sig = s390_verify_sig,
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
 };
index 58318bf..af23eff 100644 (file)
@@ -59,7 +59,7 @@ static int s390_image_probe(const char *buf, unsigned long len)
 const struct kexec_file_ops s390_kexec_image_ops = {
        .probe = s390_image_probe,
        .load = s390_image_load,
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
        .verify_sig = s390_verify_sig,
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
 };
index fbdd3ea..8415ae7 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/elf.h>
 #include <linux/errno.h>
 #include <linux/kexec.h>
-#include <linux/module.h>
+#include <linux/module_signature.h>
 #include <linux/verification.h>
 #include <asm/boot_data.h>
 #include <asm/ipl.h>
@@ -22,29 +22,7 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
        NULL,
 };
 
-#ifdef CONFIG_KEXEC_VERIFY_SIG
-/*
- * Module signature information block.
- *
- * The constituents of the signature section are, in order:
- *
- *     - Signer's name
- *     - Key identifier
- *     - Signature data
- *     - Information block
- */
-struct module_signature {
-       u8      algo;           /* Public-key crypto algorithm [0] */
-       u8      hash;           /* Digest algorithm [0] */
-       u8      id_type;        /* Key identifier type [PKEY_ID_PKCS7] */
-       u8      signer_len;     /* Length of signer's name [0] */
-       u8      key_id_len;     /* Length of key identifier [0] */
-       u8      __pad[3];
-       __be32  sig_len;        /* Length of signature data */
-};
-
-#define PKEY_ID_PKCS7 2
-
+#ifdef CONFIG_KEXEC_SIG
 int s390_verify_sig(const char *kernel, unsigned long kernel_len)
 {
        const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
@@ -90,7 +68,7 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
                                      VERIFYING_MODULE_SIGNATURE,
                                      NULL, NULL);
 }
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
 
 static int kexec_file_update_purgatory(struct kimage *image,
                                       struct s390_load_data *data)
index 3b664cb..d5035de 100644 (file)
@@ -27,6 +27,7 @@ int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
                *(u32 *)loc = val;
                break;
        case R_390_64:          /* Direct 64 bit.  */
+       case R_390_GLOB_DAT:
                *(u64 *)loc = val;
                break;
        case R_390_PC16:        /* PC relative 16 bit.  */
index 5f1fd15..2654e34 100644 (file)
@@ -390,7 +390,7 @@ static size_t cf_diag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
 
        debug_sprintf_event(cf_diag_dbg, 6,
                            "%s ctrset %d ctrset_size %zu cfvn %d csvn %d"
-                           " need %zd rc:%d\n",
+                           " need %zd rc %d\n",
                            __func__, ctrset, ctrset_size, cpuhw->info.cfvn,
                            cpuhw->info.csvn, need, rc);
        return need;
@@ -567,7 +567,7 @@ static int cf_diag_add(struct perf_event *event, int flags)
        int err = 0;
 
        debug_sprintf_event(cf_diag_dbg, 5,
-                           "%s event %p cpu %d flags %#x cpuhw:%p\n",
+                           "%s event %p cpu %d flags %#x cpuhw %p\n",
                            __func__, event, event->cpu, flags, cpuhw);
 
        if (cpuhw->flags & PMU_F_IN_USE) {
index 292a452..3d8b12a 100644 (file)
@@ -673,13 +673,89 @@ out:
        rcu_read_unlock();
 }
 
+static unsigned long getrate(bool freq, unsigned long sample,
+                            struct hws_qsi_info_block *si)
+{
+       unsigned long rate;
+
+       if (freq) {
+               rate = freq_to_sample_rate(si, sample);
+               rate = hw_limit_rate(si, rate);
+       } else {
+               /* The min/max sampling rates specifies the valid range
+                * of sample periods.  If the specified sample period is
+                * out of range, limit the period to the range boundary.
+                */
+               rate = hw_limit_rate(si, sample);
+
+               /* The perf core maintains a maximum sample rate that is
+                * configurable through the sysctl interface.  Ensure the
+                * sampling rate does not exceed this value.  This also helps
+                * to avoid throttling when pushing samples with
+                * perf_event_overflow().
+                */
+               if (sample_rate_to_freq(si, rate) >
+                   sysctl_perf_event_sample_rate) {
+                       debug_sprintf_event(sfdbg, 1,
+                                           "Sampling rate exceeds maximum "
+                                           "perf sample rate\n");
+                       rate = 0;
+               }
+       }
+       return rate;
+}
+
+/* The sampling information (si) contains information about the
+ * min/max sampling intervals and the CPU speed.  So calculate the
+ * correct sampling interval and avoid the whole period adjust
+ * feedback loop.
+ *
+ * Since the CPU Measurement sampling facility can not handle frequency
+ * calculate the sampling interval when frequency is specified using
+ * this formula:
+ *     interval := cpu_speed * 1000000 / sample_freq
+ *
+ * Returns errno on bad input and zero on success with parameter interval
+ * set to the correct sampling rate.
+ *
+ * Note: This function turns off freq bit to avoid calling function
+ * perf_adjust_period(). This causes frequency adjustment in the common
+ * code part which causes tremendous variations in the counter values.
+ */
+static int __hw_perf_event_init_rate(struct perf_event *event,
+                                    struct hws_qsi_info_block *si)
+{
+       struct perf_event_attr *attr = &event->attr;
+       struct hw_perf_event *hwc = &event->hw;
+       unsigned long rate;
+
+       if (attr->freq) {
+               if (!attr->sample_freq)
+                       return -EINVAL;
+               rate = getrate(attr->freq, attr->sample_freq, si);
+               attr->freq = 0;         /* Don't call  perf_adjust_period() */
+               SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FREQ_MODE;
+       } else {
+               rate = getrate(attr->freq, attr->sample_period, si);
+               if (!rate)
+                       return -EINVAL;
+       }
+       attr->sample_period = rate;
+       SAMPL_RATE(hwc) = rate;
+       hw_init_period(hwc, SAMPL_RATE(hwc));
+       debug_sprintf_event(sfdbg, 4, "__hw_perf_event_init_rate:"
+                           "cpu:%d period:%llx freq:%d,%#lx\n", event->cpu,
+                           event->attr.sample_period, event->attr.freq,
+                           SAMPLE_FREQ_MODE(hwc));
+       return 0;
+}
+
 static int __hw_perf_event_init(struct perf_event *event)
 {
        struct cpu_hw_sf *cpuhw;
        struct hws_qsi_info_block si;
        struct perf_event_attr *attr = &event->attr;
        struct hw_perf_event *hwc = &event->hw;
-       unsigned long rate;
        int cpu, err;
 
        /* Reserve CPU-measurement sampling facility */
@@ -727,6 +803,12 @@ static int __hw_perf_event_init(struct perf_event *event)
                goto out;
        }
 
+       if (si.ribm & CPU_MF_SF_RIBM_NOTAV) {
+               pr_warn("CPU Measurement Facility sampling is temporarily not available\n");
+               err = -EBUSY;
+               goto out;
+       }
+
        /* Always enable basic sampling */
        SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
 
@@ -745,43 +827,9 @@ static int __hw_perf_event_init(struct perf_event *event)
        if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
                SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
 
-       /* The sampling information (si) contains information about the
-        * min/max sampling intervals and the CPU speed.  So calculate the
-        * correct sampling interval and avoid the whole period adjust
-        * feedback loop.
-        */
-       rate = 0;
-       if (attr->freq) {
-               if (!attr->sample_freq) {
-                       err = -EINVAL;
-                       goto out;
-               }
-               rate = freq_to_sample_rate(&si, attr->sample_freq);
-               rate = hw_limit_rate(&si, rate);
-               attr->freq = 0;
-               attr->sample_period = rate;
-       } else {
-               /* The min/max sampling rates specifies the valid range
-                * of sample periods.  If the specified sample period is
-                * out of range, limit the period to the range boundary.
-                */
-               rate = hw_limit_rate(&si, hwc->sample_period);
-
-               /* The perf core maintains a maximum sample rate that is
-                * configurable through the sysctl interface.  Ensure the
-                * sampling rate does not exceed this value.  This also helps
-                * to avoid throttling when pushing samples with
-                * perf_event_overflow().
-                */
-               if (sample_rate_to_freq(&si, rate) >
-                     sysctl_perf_event_sample_rate) {
-                       err = -EINVAL;
-                       debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
-                       goto out;
-               }
-       }
-       SAMPL_RATE(hwc) = rate;
-       hw_init_period(hwc, SAMPL_RATE(hwc));
+       err =  __hw_perf_event_init_rate(event, &si);
+       if (err)
+               goto out;
 
        /* Initialize sample data overflow accounting */
        hwc->extra_reg.reg = REG_OVERFLOW;
@@ -853,7 +901,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
 
        /* Check online status of the CPU to which the event is pinned */
        if (event->cpu >= 0 && !cpu_online(event->cpu))
-                       return -ENODEV;
+               return -ENODEV;
 
        /* Force reset of idle/hv excludes regardless of what the
         * user requested.
@@ -904,6 +952,8 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
                        if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
                                extend_sampling_buffer(&cpuhw->sfb, hwc);
                }
+               /* Rate may be adjusted with ioctl() */
+               cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
        }
 
        /* (Re)enable the PMU and sampling facility */
@@ -922,8 +972,9 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
        lpp(&S390_lowcore.lpp);
 
        debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
-                           "tear=%p dear=%p\n", cpuhw->lsctl.es,
-                           cpuhw->lsctl.cs, cpuhw->lsctl.ed, cpuhw->lsctl.cd,
+                           "interval:%lx tear=%p dear=%p\n",
+                           cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
+                           cpuhw->lsctl.cd, cpuhw->lsctl.interval,
                            (void *) cpuhw->lsctl.tear,
                            (void *) cpuhw->lsctl.dear);
 }
@@ -1717,6 +1768,44 @@ static void cpumsf_pmu_read(struct perf_event *event)
        /* Nothing to do ... updates are interrupt-driven */
 }
 
+/* Check if the new sampling period/freqeuncy is appropriate.
+ *
+ * Return non-zero on error and zero on passed checks.
+ */
+static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
+{
+       struct hws_qsi_info_block si;
+       unsigned long rate;
+       bool do_freq;
+
+       memset(&si, 0, sizeof(si));
+       if (event->cpu == -1) {
+               if (qsi(&si))
+                       return -ENODEV;
+       } else {
+               /* Event is pinned to a particular CPU, retrieve the per-CPU
+                * sampling structure for accessing the CPU-specific QSI.
+                */
+               struct cpu_hw_sf *cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+
+               si = cpuhw->qsi;
+       }
+
+       do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
+       rate = getrate(do_freq, value, &si);
+       if (!rate)
+               return -EINVAL;
+
+       event->attr.sample_period = rate;
+       SAMPL_RATE(&event->hw) = rate;
+       hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
+       debug_sprintf_event(sfdbg, 4, "cpumsf_pmu_check_period:"
+                           "cpu:%d value:%llx period:%llx freq:%d\n",
+                           event->cpu, value,
+                           event->attr.sample_period, do_freq);
+       return 0;
+}
+
 /* Activate sampling control.
  * Next call of pmu_enable() starts sampling.
  */
@@ -1908,6 +1997,8 @@ static struct pmu cpumf_sampling = {
 
        .setup_aux    = aux_buffer_setup,
        .free_aux     = aux_buffer_free,
+
+       .check_period = cpumsf_pmu_check_period,
 };
 
 static void cpumf_measurement_alert(struct ext_code ext_code,
index 2db6fb4..3627953 100644 (file)
@@ -311,7 +311,8 @@ int arch_update_cpu_topology(void)
        on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
        for_each_online_cpu(cpu) {
                dev = get_cpu_device(cpu);
-               kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+               if (dev)
+                       kobject_uevent(&dev->kobj, KOBJ_CHANGE);
        }
        return rc;
 }
index 8fc9daa..a8204f9 100644 (file)
@@ -46,10 +46,15 @@ bool unwind_next_frame(struct unwind_state *state)
 
        regs = state->regs;
        if (unlikely(regs)) {
-               sp = READ_ONCE_NOCHECK(regs->gprs[15]);
-               if (unlikely(outside_of_stack(state, sp))) {
-                       if (!update_stack_info(state, sp))
-                               goto out_err;
+               if (state->reuse_sp) {
+                       sp = state->sp;
+                       state->reuse_sp = false;
+               } else {
+                       sp = READ_ONCE_NOCHECK(regs->gprs[15]);
+                       if (unlikely(outside_of_stack(state, sp))) {
+                               if (!update_stack_info(state, sp))
+                                       goto out_err;
+                       }
                }
                sf = (struct stack_frame *) sp;
                ip = READ_ONCE_NOCHECK(sf->gprs[8]);
@@ -107,9 +112,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 {
        struct stack_info *info = &state->stack_info;
        unsigned long *mask = &state->stack_mask;
+       bool reliable, reuse_sp;
        struct stack_frame *sf;
        unsigned long ip;
-       bool reliable;
 
        memset(state, 0, sizeof(*state));
        state->task = task;
@@ -134,10 +139,12 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
        if (regs) {
                ip = READ_ONCE_NOCHECK(regs->psw.addr);
                reliable = true;
+               reuse_sp = true;
        } else {
                sf = (struct stack_frame *) sp;
                ip = READ_ONCE_NOCHECK(sf->gprs[8]);
                reliable = false;
+               reuse_sp = false;
        }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -151,5 +158,6 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
        state->sp = sp;
        state->ip = ip;
        state->reliable = reliable;
+       state->reuse_sp = reuse_sp;
 }
 EXPORT_SYMBOL_GPL(__unwind_start);
index f6db0f1..d047e84 100644 (file)
@@ -332,7 +332,7 @@ static inline int plo_test_bit(unsigned char nr)
        return cc == 0;
 }
 
-static inline void __insn32_query(unsigned int opcode, u8 query[32])
+static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 {
        register unsigned long r0 asm("0") = 0; /* query function */
        register unsigned long r1 asm("1") = (unsigned long) query;
@@ -340,9 +340,9 @@ static inline void __insn32_query(unsigned int opcode, u8 query[32])
        asm volatile(
                /* Parameter regs are ignored */
                "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
-               : "=m" (*query)
+               :
                : "d" (r0), "a" (r1), [opc] "i" (opcode)
-               : "cc");
+               : "cc", "memory");
 }
 
 #define INSN_SORTL 0xb938
index 510a182..a51c892 100644 (file)
@@ -298,16 +298,16 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
        }
 
        if (write) {
-               len = *lenp;
-               if (copy_from_user(buf, buffer,
-                                  len > sizeof(buf) ? sizeof(buf) : len))
+               len = min(*lenp, sizeof(buf));
+               if (copy_from_user(buf, buffer, len))
                        return -EFAULT;
-               buf[sizeof(buf) - 1] = '\0';
+               buf[len - 1] = '\0';
                cmm_skip_blanks(buf, &p);
                nr = simple_strtoul(p, &p, 0);
                cmm_skip_blanks(p, &p);
                seconds = simple_strtoul(p, &p, 0);
                cmm_set_timeout(nr, seconds);
+               *ppos += *lenp;
        } else {
                len = sprintf(buf, "%ld %ld\n",
                              cmm_timeout_pages, cmm_timeout_seconds);
@@ -315,9 +315,9 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
                        len = *lenp;
                if (copy_to_user(buffer, buf, len))
                        return -EFAULT;
+               *lenp = len;
+               *ppos += len;
        }
-       *lenp = len;
-       *ppos += len;
        return 0;
 }
 
index 54fcdf6..3dd253f 100644 (file)
@@ -210,7 +210,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
        page = alloc_page(GFP_KERNEL);
        if (!page)
                return NULL;
-       if (!pgtable_page_ctor(page)) {
+       if (!pgtable_pte_page_ctor(page)) {
                __free_page(page);
                return NULL;
        }
@@ -256,7 +256,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
                atomic_xor_bits(&page->_refcount, 3U << 24);
        }
 
-       pgtable_page_dtor(page);
+       pgtable_pte_page_dtor(page);
        __free_page(page);
 }
 
@@ -308,7 +308,7 @@ void __tlb_remove_table(void *_table)
        case 3:         /* 4K page table with pgstes */
                if (mask & 3)
                        atomic_xor_bits(&page->_refcount, 3 << 24);
-               pgtable_page_dtor(page);
+               pgtable_pte_page_dtor(page);
                __free_page(page);
                break;
        }
index 9bdff4d..e585a62 100644 (file)
@@ -66,7 +66,7 @@ static inline int clp_get_ilp(unsigned long *ilp)
 /*
  * Call Logical Processor with c=0, the give constant lps and an lpcb request.
  */
-static inline int clp_req(void *data, unsigned int lps)
+static __always_inline int clp_req(void *data, unsigned int lps)
 {
        struct { u8 _[CLP_BLK_SIZE]; } *req = data;
        u64 ignored;
index b56f908..22d968b 100644 (file)
@@ -2,10 +2,8 @@
 #ifndef __ASM_SH_PGALLOC_H
 #define __ASM_SH_PGALLOC_H
 
-#include <linux/quicklist.h>
 #include <asm/page.h>
-
-#define QUICK_PT 0     /* Other page table pages that are zero on free */
+#include <asm-generic/pgalloc.h>
 
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
@@ -29,44 +27,9 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 }
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
-/*
- * Allocate and free page tables.
- */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-       return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
-       struct page *page;
-       void *pg;
-
-       pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
-       if (!pg)
-               return NULL;
-       page = virt_to_page(pg);
-       if (!pgtable_page_ctor(page)) {
-               quicklist_free(QUICK_PT, NULL, pg);
-               return NULL;
-       }
-       return page;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-       quicklist_free(QUICK_PT, NULL, pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-       pgtable_page_dtor(pte);
-       quicklist_free_page(QUICK_PT, NULL, pte);
-}
-
 #define __pte_free_tlb(tlb,pte,addr)                   \
 do {                                                   \
-       pgtable_page_dtor(pte);                         \
+       pgtable_pte_page_dtor(pte);                     \
        tlb_remove_page((tlb), (pte));                  \
 } while (0)
 
@@ -79,9 +42,4 @@ do {                                                  \
 } while (0);
 #endif
 
-static inline void check_pgt_cache(void)
-{
-       quicklist_trim(QUICK_PT, NULL, 25, 16);
-}
-
 #endif /* __ASM_SH_PGALLOC_H */
index 9085d11..cbd0f3c 100644 (file)
@@ -123,11 +123,6 @@ typedef pte_t *pte_addr_t;
 
 #define pte_pfn(x)             ((unsigned long)(((x).pte_low >> PAGE_SHIFT)))
 
-/*
- * Initialise the page table caches
- */
-extern void pgtable_cache_init(void);
-
 struct vm_area_struct;
 struct mm_struct;
 
index 02ed2df..5c8a2eb 100644 (file)
@@ -1,9 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 menu "Memory management options"
 
-config QUICKLIST
-       def_bool y
-
 config MMU
         bool "Support for memory management hardware"
        depends on !CPU_SH2
index cc779a9..dca946f 100644 (file)
@@ -97,7 +97,3 @@ void __init page_table_range_init(unsigned long start, unsigned long end,
 void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 {
 }
-
-void pgtable_cache_init(void)
-{
-}
index fbc1aec..eb24cb1 100644 (file)
@@ -29,7 +29,6 @@ config SPARC
        select RTC_DRV_M48T59
        select RTC_SYSTOHC
        select HAVE_ARCH_JUMP_LABEL if SPARC64
-       select HAVE_FAST_GUP if SPARC64
        select GENERIC_IRQ_SHOW
        select ARCH_WANT_IPC_PARSE_VERSION
        select GENERIC_PCI_IOMAP
index cfec79b..4deddf4 100644 (file)
@@ -38,8 +38,6 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 #define arch_can_pci_mmap_io() 1
 #define HAVE_ARCH_PCI_GET_UNMAPPED_AREA
 #define get_pci_unmapped_area get_fb_unmapped_area
-
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
 #endif /* CONFIG_SPARC64 */
 
 #if defined(CONFIG_SPARC64) || defined(CONFIG_LEON_PCI)
index 282be50..10538a4 100644 (file)
@@ -17,8 +17,6 @@ void srmmu_free_nocache(void *addr, int size);
 
 extern struct resource sparc_iomap;
 
-#define check_pgt_cache()      do { } while (0)
-
 pgd_t *get_pgd_fast(void);
 static inline void free_pgd_fast(pgd_t *pgd)
 {
index 48abccb..9d3e5cc 100644 (file)
@@ -69,8 +69,6 @@ void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 #define pmd_populate(MM, PMD, PTE)             pmd_set(MM, PMD, PTE)
 #define pmd_pgtable(PMD)                       ((pte_t *)__pmd_page(PMD))
 
-#define check_pgt_cache()      do { } while (0)
-
 void pgtable_free(void *table, bool is_page);
 
 #ifdef CONFIG_SMP
index 4eebed6..31da448 100644 (file)
@@ -445,9 +445,4 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma,
 /* We provide our own get_unmapped_area to cope with VA holes for userland */
 #define HAVE_ARCH_UNMAPPED_AREA
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 #endif /* !(_SPARC_PGTABLE_H) */
index 1599de7..6ae8016 100644 (file)
@@ -1078,7 +1078,7 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma,
 }
 #define io_remap_pfn_range io_remap_pfn_range 
 
-static inline unsigned long untagged_addr(unsigned long start)
+static inline unsigned long __untagged_addr(unsigned long start)
 {
        if (adi_capable()) {
                long addr = start;
@@ -1098,7 +1098,8 @@ static inline unsigned long untagged_addr(unsigned long start)
 
        return start;
 }
-#define untagged_addr untagged_addr
+#define untagged_addr(addr) \
+       ((__typeof__(addr))(__untagged_addr((unsigned long)(addr))))
 
 static inline bool pte_access_permitted(pte_t pte, bool write)
 {
@@ -1135,7 +1136,6 @@ unsigned long get_fb_unmapped_area(struct file *filp, unsigned long,
                                   unsigned long);
 #define HAVE_ARCH_FB_UNMAPPED_AREA
 
-void pgtable_cache_init(void);
 void sun4v_register_fault_status(void);
 void sun4v_ktsb_register(void);
 void __init cheetah_ecache_flush_init(void);
index 046ab11..906eda1 100644 (file)
@@ -31,7 +31,6 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/vaddrs.h>
-#include <asm/pgalloc.h>       /* bug in asm-generic/tlb.h: check_pgt_cache */
 #include <asm/setup.h>
 #include <asm/tlb.h>
 #include <asm/prom.h>
index 4b099dd..e6d9181 100644 (file)
@@ -2903,7 +2903,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm)
        struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
        if (!page)
                return NULL;
-       if (!pgtable_page_ctor(page)) {
+       if (!pgtable_pte_page_ctor(page)) {
                free_unref_page(page);
                return NULL;
        }
@@ -2919,7 +2919,7 @@ static void __pte_free(pgtable_t pte)
 {
        struct page *page = virt_to_page(pte);
 
-       pgtable_page_dtor(page);
+       pgtable_pte_page_dtor(page);
        __free_page(page);
 }
 
index aaebbc0..cc3ad64 100644 (file)
@@ -378,7 +378,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm)
        if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
                return NULL;
        page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
-       if (!pgtable_page_ctor(page)) {
+       if (!pgtable_pte_page_ctor(page)) {
                __free_page(page);
                return NULL;
        }
@@ -389,7 +389,7 @@ void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
        unsigned long p;
 
-       pgtable_page_dtor(pte);
+       pgtable_pte_page_dtor(pte);
        p = (unsigned long)page_address(pte);   /* Cached address (for test) */
        if (p == 0)
                BUG();
index 612535c..6627d7c 100644 (file)
@@ -1403,8 +1403,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 
        spin_unlock_irq(&ubd_dev->lock);
 
-       if (ret < 0)
-               blk_mq_requeue_request(req, true);
+       if (ret < 0) {
+               if (ret == -ENOMEM)
+                       res = BLK_STS_RESOURCE;
+               else
+                       res = BLK_STS_DEV_RESOURCE;
+       }
 
        return res;
 }
index 023599c..881e76d 100644 (file)
@@ -29,7 +29,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
 #define __pte_free_tlb(tlb,pte, address)               \
 do {                                                   \
-       pgtable_page_dtor(pte);                         \
+       pgtable_pte_page_dtor(pte);                     \
        tlb_remove_page((tlb),(pte));                   \
 } while (0)
 
@@ -43,7 +43,5 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 #define __pmd_free_tlb(tlb,x, address)   tlb_remove_page((tlb),virt_to_page(x))
 #endif
 
-#define check_pgt_cache()      do { } while (0)
-
 #endif
 
index e4d3ed9..36a44d5 100644 (file)
@@ -32,8 +32,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 /* zero page used for uninitialized stuff */
 extern unsigned long *empty_zero_page;
 
-#define pgtable_cache_init() do ; while (0)
-
 /* Just any arbitrary offset to the start of the vmalloc VM area: the
  * current 8MB value just means that there will be a 8MB "hole" after the
  * physical memory until the kernel virtual memory starts.  That means that
index 3f0903b..ba1c9a7 100644 (file)
@@ -18,8 +18,6 @@
 #define __HAVE_ARCH_PTE_ALLOC_ONE
 #include <asm-generic/pgalloc.h>
 
-#define check_pgt_cache()              do { } while (0)
-
 #define _PAGE_USER_TABLE       (PMD_TYPE_TABLE | PMD_PRESENT)
 #define _PAGE_KERNEL_TABLE     (PMD_TYPE_TABLE | PMD_PRESENT)
 
index 126e961..c8f7ba1 100644 (file)
@@ -285,8 +285,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
 #include <asm-generic/pgtable.h>
 
-#define pgtable_cache_init() do { } while (0)
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __UNICORE_PGTABLE_H__ */
index 10d2356..4663d8c 100644 (file)
@@ -15,7 +15,7 @@
 
 #define __pte_free_tlb(tlb, pte, addr)                         \
        do {                                                    \
-               pgtable_page_dtor(pte);                         \
+               pgtable_pte_page_dtor(pte);                     \
                tlb_remove_page((tlb), (pte));                  \
        } while (0)
 
index 37ed5f5..d6e1faa 100644 (file)
@@ -2031,20 +2031,30 @@ config KEXEC_FILE
 config ARCH_HAS_KEXEC_PURGATORY
        def_bool KEXEC_FILE
 
-config KEXEC_VERIFY_SIG
+config KEXEC_SIG
        bool "Verify kernel signature during kexec_file_load() syscall"
        depends on KEXEC_FILE
        ---help---
-         This option makes kernel signature verification mandatory for
-         the kexec_file_load() syscall.
 
-         In addition to that option, you need to enable signature
+         This option makes the kexec_file_load() syscall check for a valid
+         signature of the kernel image.  The image can still be loaded without
+         a valid signature unless you also enable KEXEC_SIG_FORCE, though if
+         there's a signature that we can check, then it must be valid.
+
+         In addition to this option, you need to enable signature
          verification for the corresponding kernel image type being
          loaded in order for this to work.
 
+config KEXEC_SIG_FORCE
+       bool "Require a valid signature in kexec_file_load() syscall"
+       depends on KEXEC_SIG
+       ---help---
+         This option makes kernel signature verification mandatory for
+         the kexec_file_load() syscall.
+
 config KEXEC_BZIMAGE_VERIFY_SIG
        bool "Enable bzImage signature verification support"
-       depends on KEXEC_VERIFY_SIG
+       depends on KEXEC_SIG
        depends on SIGNED_PE_FILE_VERIFICATION
        select SYSTEM_TRUSTED_KEYRING
        ---help---
index 15255f3..25019d4 100644 (file)
  */
 struct mem_vector immovable_mem[MAX_NUMNODES*2];
 
-/*
- * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex
- * digits, and '\0' for termination.
- */
-#define MAX_ADDR_LEN 19
-
-static acpi_physical_address get_acpi_rsdp(void)
-{
-       acpi_physical_address addr = 0;
-
-#ifdef CONFIG_KEXEC
-       char val[MAX_ADDR_LEN] = { };
-       int ret;
-
-       ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN);
-       if (ret < 0)
-               return 0;
-
-       if (kstrtoull(val, 16, &addr))
-               return 0;
-#endif
-       return addr;
-}
-
 /*
  * Search EFI system tables for RSDP.  If both ACPI_20_TABLE_GUID and
  * ACPI_TABLE_GUID are found, take the former, which has more features.
@@ -278,10 +254,7 @@ acpi_physical_address get_rsdp_addr(void)
 {
        acpi_physical_address pa;
 
-       pa = get_acpi_rsdp();
-
-       if (!pa)
-               pa = boot_params->acpi_rsdp_addr;
+       pa = boot_params->acpi_rsdp_addr;
 
        /*
         * Try to get EFI data from setup_data. This can happen when we're a
@@ -301,6 +274,30 @@ acpi_physical_address get_rsdp_addr(void)
 }
 
 #if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE)
+/*
+ * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex
+ * digits, and '\0' for termination.
+ */
+#define MAX_ADDR_LEN 19
+
+static acpi_physical_address get_cmdline_acpi_rsdp(void)
+{
+       acpi_physical_address addr = 0;
+
+#ifdef CONFIG_KEXEC
+       char val[MAX_ADDR_LEN] = { };
+       int ret;
+
+       ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN);
+       if (ret < 0)
+               return 0;
+
+       if (kstrtoull(val, 16, &addr))
+               return 0;
+#endif
+       return addr;
+}
+
 /* Compute SRAT address from RSDP. */
 static unsigned long get_acpi_srat_table(void)
 {
@@ -311,7 +308,17 @@ static unsigned long get_acpi_srat_table(void)
        char arg[10];
        u8 *entry;
 
-       rsdp = (struct acpi_table_rsdp *)(long)boot_params->acpi_rsdp_addr;
+       /*
+        * Check whether we were given an RSDP on the command line. We don't
+        * stash this in boot params because the kernel itself may have
+        * different ideas about whether to trust a command-line parameter.
+        */
+       rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp();
+
+       if (!rsdp)
+               rsdp = (struct acpi_table_rsdp *)(long)
+                       boot_params->acpi_rsdp_addr;
+
        if (!rsdp)
                return 0;
 
index d6662fd..82bc60c 100644 (file)
@@ -13,6 +13,7 @@
 #include <asm/e820/types.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
+#include <asm/boot.h>
 
 #include "../string.h"
 #include "eboot.h"
@@ -813,7 +814,8 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
                status = efi_relocate_kernel(sys_table, &bzimage_addr,
                                             hdr->init_size, hdr->init_size,
                                             hdr->pref_address,
-                                            hdr->kernel_alignment);
+                                            hdr->kernel_alignment,
+                                            LOAD_PHYSICAL_ADDR);
                if (status != EFI_SUCCESS) {
                        efi_printk(sys_table, "efi_relocate_kernel() failed!\n");
                        goto fail;
index 53ac0cb..9652d5c 100644 (file)
@@ -345,6 +345,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 {
        const unsigned long kernel_total_size = VO__end - VO__text;
        unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+       unsigned long needed_size;
 
        /* Retain x86 boot parameters pointer passed from startup_32/64. */
        boot_params = rmode;
@@ -379,26 +380,38 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
        free_mem_ptr     = heap;        /* Heap */
        free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
 
+       /*
+        * The memory hole needed for the kernel is the larger of either
+        * the entire decompressed kernel plus relocation table, or the
+        * entire decompressed kernel plus .bss and .brk sections.
+        *
+        * On X86_64, the memory is mapped with PMD pages. Round the
+        * size up so that the full extent of PMD pages mapped is
+        * included in the check against the valid memory table
+        * entries. This ensures the full mapped area is usable RAM
+        * and doesn't include any reserved areas.
+        */
+       needed_size = max(output_len, kernel_total_size);
+#ifdef CONFIG_X86_64
+       needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN);
+#endif
+
        /* Report initial kernel position details. */
        debug_putaddr(input_data);
        debug_putaddr(input_len);
        debug_putaddr(output);
        debug_putaddr(output_len);
        debug_putaddr(kernel_total_size);
+       debug_putaddr(needed_size);
 
 #ifdef CONFIG_X86_64
        /* Report address of 32-bit trampoline */
        debug_putaddr(trampoline_32bit);
 #endif
 
-       /*
-        * The memory hole needed for the kernel is the larger of either
-        * the entire decompressed kernel plus relocation table, or the
-        * entire decompressed kernel plus .bss and .brk sections.
-        */
        choose_random_location((unsigned long)input_data, input_len,
                                (unsigned long *)&output,
-                               max(output_len, kernel_total_size),
+                               needed_size,
                                &virt_addr);
 
        /* Validate memory location choices. */
index e7d35f6..64c3e70 100644 (file)
@@ -5,12 +5,14 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
+#include <linux/jiffies.h>
 #include <asm/apicdef.h>
 #include <asm/nmi.h>
 
 #include "../perf_event.h"
 
-static DEFINE_PER_CPU(unsigned int, perf_nmi_counter);
+static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
+static unsigned long perf_nmi_window;
 
 static __initconst const u64 amd_hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
@@ -641,11 +643,12 @@ static void amd_pmu_disable_event(struct perf_event *event)
  * handler when multiple PMCs are active or PMC overflow while handling some
  * other source of an NMI.
  *
- * Attempt to mitigate this by using the number of active PMCs to determine
- * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset
- * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the
- * number of active PMCs or 2. The value of 2 is used in case an NMI does not
- * arrive at the LAPIC in time to be collapsed into an already pending NMI.
+ * Attempt to mitigate this by creating an NMI window in which un-handled NMIs
+ * received during this window will be claimed. This prevents extending the
+ * window past when it is possible that latent NMIs should be received. The
+ * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has
+ * handled a counter. When an un-handled NMI is received, it will be claimed
+ * only if arriving within that window.
  */
 static int amd_pmu_handle_irq(struct pt_regs *regs)
 {
@@ -663,21 +666,19 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
        handled = x86_pmu_handle_irq(regs);
 
        /*
-        * If a counter was handled, record the number of possible remaining
-        * NMIs that can occur.
+        * If a counter was handled, record a timestamp such that un-handled
+        * NMIs will be claimed if arriving within that window.
         */
        if (handled) {
-               this_cpu_write(perf_nmi_counter,
-                              min_t(unsigned int, 2, active));
+               this_cpu_write(perf_nmi_tstamp,
+                              jiffies + perf_nmi_window);
 
                return handled;
        }
 
-       if (!this_cpu_read(perf_nmi_counter))
+       if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp)))
                return NMI_DONE;
 
-       this_cpu_dec(perf_nmi_counter);
-
        return NMI_HANDLED;
 }
 
@@ -909,6 +910,9 @@ static int __init amd_core_pmu_init(void)
        if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
                return 0;
 
+       /* Avoid calulating the value each time in the NMI handler */
+       perf_nmi_window = msecs_to_jiffies(100);
+
        switch (boot_cpu_data.x86) {
        case 0x15:
                pr_cont("Fam15h ");
index 5b35b7e..26c3635 100644 (file)
@@ -377,7 +377,8 @@ static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
                                          struct hw_perf_event *hwc, u64 config)
 {
        config &= ~perf_ibs->cnt_mask;
-       wrmsrl(hwc->config_base, config);
+       if (boot_cpu_data.x86 == 0x10)
+               wrmsrl(hwc->config_base, config);
        config &= ~perf_ibs->enable_mask;
        wrmsrl(hwc->config_base, config);
 }
@@ -553,7 +554,8 @@ static struct perf_ibs perf_ibs_op = {
        },
        .msr                    = MSR_AMD64_IBSOPCTL,
        .config_mask            = IBS_OP_CONFIG_MASK,
-       .cnt_mask               = IBS_OP_MAX_CNT,
+       .cnt_mask               = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
+                                 IBS_OP_CUR_CNT_RAND,
        .enable_mask            = IBS_OP_ENABLE,
        .valid_mask             = IBS_OP_VAL,
        .max_period             = IBS_OP_MAX_CNT << 4,
@@ -614,7 +616,7 @@ fail:
        if (event->attr.sample_type & PERF_SAMPLE_RAW)
                offset_max = perf_ibs->offset_max;
        else if (check_rip)
-               offset_max = 2;
+               offset_max = 3;
        else
                offset_max = 1;
        do {
index 27ee47a..fcef678 100644 (file)
@@ -4983,6 +4983,8 @@ __init int intel_pmu_init(void)
        case INTEL_FAM6_SKYLAKE:
        case INTEL_FAM6_KABYLAKE_L:
        case INTEL_FAM6_KABYLAKE:
+       case INTEL_FAM6_COMETLAKE_L:
+       case INTEL_FAM6_COMETLAKE:
                x86_add_quirk(intel_pebs_isolation_quirk);
                x86_pmu.late_ack = true;
                memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -5031,6 +5033,8 @@ __init int intel_pmu_init(void)
                /* fall through */
        case INTEL_FAM6_ICELAKE_L:
        case INTEL_FAM6_ICELAKE:
+       case INTEL_FAM6_TIGERLAKE_L:
+       case INTEL_FAM6_TIGERLAKE:
                x86_pmu.late_ack = true;
                memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
index 9f2f390..e1daf41 100644 (file)
  *     MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
  *                            perf code: 0x01
  *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM,
                                              CNL
*                                             CNL,KBL,CML
  *                            Scope: Core
  *     MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
  *                            perf code: 0x02
  *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
- *                                             SKL,KNL,GLM,CNL
+ *                                             SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
  *                            Scope: Core
  *     MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
  *                            perf code: 0x03
- *                            Available model: SNB,IVB,HSW,BDW,SKL,CNL
+ *                            Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
+ *                                             ICL,TGL
  *                            Scope: Core
  *     MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
  *                            perf code: 0x00
- *                            Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL
+ *                            Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
+ *                                             KBL,CML,ICL,TGL
  *                            Scope: Package (physical package)
  *     MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
  *                            perf code: 0x01
  *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
- *                                             GLM,CNL
+ *                                             GLM,CNL,KBL,CML,ICL,TGL
  *                            Scope: Package (physical package)
  *     MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
  *                            perf code: 0x02
  *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
- *                                             SKL,KNL,GLM,CNL
+ *                                             SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
  *                            Scope: Package (physical package)
  *     MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
  *                            perf code: 0x03
- *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL
+ *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
+ *                                             KBL,CML,ICL,TGL
  *                            Scope: Package (physical package)
  *     MSR_PKG_C8_RESIDENCY:  Package C8 Residency Counter.
  *                            perf code: 0x04
- *                            Available model: HSW ULT,KBL,CNL
+ *                            Available model: HSW ULT,KBL,CNL,CML,ICL,TGL
  *                            Scope: Package (physical package)
  *     MSR_PKG_C9_RESIDENCY:  Package C9 Residency Counter.
  *                            perf code: 0x05
- *                            Available model: HSW ULT,KBL,CNL
+ *                            Available model: HSW ULT,KBL,CNL,CML,ICL,TGL
  *                            Scope: Package (physical package)
  *     MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
  *                            perf code: 0x06
- *                            Available model: HSW ULT,KBL,GLM,CNL
+ *                            Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL
  *                            Scope: Package (physical package)
  *
  */
@@ -544,6 +547,19 @@ static const struct cstate_model cnl_cstates __initconst = {
                                  BIT(PERF_CSTATE_PKG_C10_RES),
 };
 
+static const struct cstate_model icl_cstates __initconst = {
+       .core_events            = BIT(PERF_CSTATE_CORE_C6_RES) |
+                                 BIT(PERF_CSTATE_CORE_C7_RES),
+
+       .pkg_events             = BIT(PERF_CSTATE_PKG_C2_RES) |
+                                 BIT(PERF_CSTATE_PKG_C3_RES) |
+                                 BIT(PERF_CSTATE_PKG_C6_RES) |
+                                 BIT(PERF_CSTATE_PKG_C7_RES) |
+                                 BIT(PERF_CSTATE_PKG_C8_RES) |
+                                 BIT(PERF_CSTATE_PKG_C9_RES) |
+                                 BIT(PERF_CSTATE_PKG_C10_RES),
+};
+
 static const struct cstate_model slm_cstates __initconst = {
        .core_events            = BIT(PERF_CSTATE_CORE_C1_RES) |
                                  BIT(PERF_CSTATE_CORE_C6_RES),
@@ -614,6 +630,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 
        X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_L, hswult_cstates),
        X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE,   hswult_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE_L, hswult_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE, hswult_cstates),
 
        X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_L, cnl_cstates),
 
@@ -625,8 +643,10 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 
        X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
 
-       X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, snb_cstates),
-       X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE,   snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE,   icl_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE_L, icl_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE, icl_cstates),
        { },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
index 74e80ed..05e43d0 100644 (file)
@@ -627,7 +627,7 @@ static struct topa *topa_alloc(int cpu, gfp_t gfp)
         * link as the 2nd entry in the table
         */
        if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
-               TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p);
+               TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p) >> TOPA_SHIFT;
                TOPA_ENTRY(&tp->topa, 1)->end = 1;
        }
 
index 6fc2e06..86467f8 100644 (file)
@@ -502,10 +502,8 @@ void uncore_pmu_event_start(struct perf_event *event, int flags)
        local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
        uncore_enable_event(box, event);
 
-       if (box->n_active == 1) {
-               uncore_enable_box(box);
+       if (box->n_active == 1)
                uncore_pmu_start_hrtimer(box);
-       }
 }
 
 void uncore_pmu_event_stop(struct perf_event *event, int flags)
@@ -529,10 +527,8 @@ void uncore_pmu_event_stop(struct perf_event *event, int flags)
                WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
                hwc->state |= PERF_HES_STOPPED;
 
-               if (box->n_active == 0) {
-                       uncore_disable_box(box);
+               if (box->n_active == 0)
                        uncore_pmu_cancel_hrtimer(box);
-               }
        }
 
        if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
@@ -778,6 +774,40 @@ static int uncore_pmu_event_init(struct perf_event *event)
        return ret;
 }
 
+static void uncore_pmu_enable(struct pmu *pmu)
+{
+       struct intel_uncore_pmu *uncore_pmu;
+       struct intel_uncore_box *box;
+
+       uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
+       if (!uncore_pmu)
+               return;
+
+       box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
+       if (!box)
+               return;
+
+       if (uncore_pmu->type->ops->enable_box)
+               uncore_pmu->type->ops->enable_box(box);
+}
+
+static void uncore_pmu_disable(struct pmu *pmu)
+{
+       struct intel_uncore_pmu *uncore_pmu;
+       struct intel_uncore_box *box;
+
+       uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
+       if (!uncore_pmu)
+               return;
+
+       box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
+       if (!box)
+               return;
+
+       if (uncore_pmu->type->ops->disable_box)
+               uncore_pmu->type->ops->disable_box(box);
+}
+
 static ssize_t uncore_get_attr_cpumask(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
@@ -803,6 +833,8 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
                pmu->pmu = (struct pmu) {
                        .attr_groups    = pmu->type->attr_groups,
                        .task_ctx_nr    = perf_invalid_context,
+                       .pmu_enable     = uncore_pmu_enable,
+                       .pmu_disable    = uncore_pmu_disable,
                        .event_init     = uncore_pmu_event_init,
                        .add            = uncore_pmu_event_add,
                        .del            = uncore_pmu_event_del,
index f36f7be..bbfdaa7 100644 (file)
@@ -441,18 +441,6 @@ static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box,
        return -EINVAL;
 }
 
-static inline void uncore_disable_box(struct intel_uncore_box *box)
-{
-       if (box->pmu->type->ops->disable_box)
-               box->pmu->type->ops->disable_box(box);
-}
-
-static inline void uncore_enable_box(struct intel_uncore_box *box)
-{
-       if (box->pmu->type->ops->enable_box)
-               box->pmu->type->ops->enable_box(box);
-}
-
 static inline void uncore_disable_event(struct intel_uncore_box *box,
                                struct perf_event *event)
 {
index b1afc77..6f86650 100644 (file)
@@ -89,7 +89,14 @@ static bool test_intel(int idx, void *data)
        case INTEL_FAM6_SKYLAKE_X:
        case INTEL_FAM6_KABYLAKE_L:
        case INTEL_FAM6_KABYLAKE:
+       case INTEL_FAM6_COMETLAKE_L:
+       case INTEL_FAM6_COMETLAKE:
        case INTEL_FAM6_ICELAKE_L:
+       case INTEL_FAM6_ICELAKE:
+       case INTEL_FAM6_ICELAKE_X:
+       case INTEL_FAM6_ICELAKE_D:
+       case INTEL_FAM6_TIGERLAKE_L:
+       case INTEL_FAM6_TIGERLAKE:
                if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
                        return true;
                break;
index 5c056b8..e01078e 100644 (file)
@@ -260,11 +260,21 @@ void __init hv_apic_init(void)
        }
 
        if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) {
-               pr_info("Hyper-V: Using MSR based APIC access\n");
+               pr_info("Hyper-V: Using enlightened APIC (%s mode)",
+                       x2apic_enabled() ? "x2apic" : "xapic");
+               /*
+                * With x2apic, architectural x2apic MSRs are equivalent to the
+                * respective synthetic MSRs, so there's no need to override
+                * the apic accessors.  The only exception is
+                * hv_apic_eoi_write, because it benefits from lazy EOI when
+                * available, but it works for both xapic and x2apic modes.
+                */
                apic_set_eoi_write(hv_apic_eoi_write);
-               apic->read      = hv_apic_read;
-               apic->write     = hv_apic_write;
-               apic->icr_write = hv_apic_icr_write;
-               apic->icr_read  = hv_apic_icr_read;
+               if (!x2apic_enabled()) {
+                       apic->read      = hv_apic_read;
+                       apic->write     = hv_apic_write;
+                       apic->icr_write = hv_apic_icr_write;
+                       apic->icr_read  = hv_apic_icr_read;
+               }
        }
 }
index aac686e..bc9693c 100644 (file)
@@ -117,6 +117,12 @@ static inline bool acpi_has_cpu_in_madt(void)
        return !!acpi_lapic;
 }
 
+#define ACPI_HAVE_ARCH_SET_ROOT_POINTER
+static inline void acpi_arch_set_root_pointer(u64 addr)
+{
+       x86_init.acpi.set_root_pointer(addr);
+}
+
 #define ACPI_HAVE_ARCH_GET_ROOT_POINTER
 static inline u64 acpi_arch_get_root_pointer(void)
 {
@@ -125,6 +131,7 @@ static inline u64 acpi_arch_get_root_pointer(void)
 
 void acpi_generic_reduced_hw_init(void);
 
+void x86_default_set_root_pointer(u64 addr);
 u64 x86_default_get_root_pointer(void);
 
 #else /* !CONFIG_ACPI */
@@ -138,6 +145,8 @@ static inline void disable_acpi(void) { }
 
 static inline void acpi_generic_reduced_hw_init(void) { }
 
+static inline void x86_default_set_root_pointer(u64 addr) { }
+
 static inline u64 x86_default_get_root_pointer(void)
 {
        return 0;
index cff3f3f..8348f7d 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 
 #ifndef _ASM_X86_CPU_ENTRY_AREA_H
 #define _ASM_X86_CPU_ENTRY_AREA_H
index 7a27056..7741e21 100644 (file)
 /* Recommend using enlightened VMCS */
 #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED            BIT(14)
 
+/*
+ * Virtual processor will never share a physical core with another virtual
+ * processor, except for virtual processors that are reported as sibling SMT
+ * threads.
+ */
+#define HV_X64_NO_NONARCH_CORESHARING                  BIT(18)
+
 /* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
+#define HV_X64_NESTED_DIRECT_FLUSH                     BIT(17)
 #define HV_X64_NESTED_GUEST_MAPPING_FLUSH              BIT(18)
 #define HV_X64_NESTED_MSR_BITMAP                       BIT(19)
 
@@ -524,14 +532,24 @@ struct hv_timer_message_payload {
        __u64 delivery_time;    /* When the message was delivered */
 } __packed;
 
+struct hv_nested_enlightenments_control {
+       struct {
+               __u32 directhypercall:1;
+               __u32 reserved:31;
+       } features;
+       struct {
+               __u32 reserved;
+       } hypercallControls;
+} __packed;
+
 /* Define virtual processor assist page structure. */
 struct hv_vp_assist_page {
        __u32 apic_assist;
-       __u32 reserved;
-       __u64 vtl_control[2];
-       __u64 nested_enlightenments_control[2];
-       __u32 enlighten_vmentry;
-       __u32 padding;
+       __u32 reserved1;
+       __u64 vtl_control[3];
+       struct hv_nested_enlightenments_control nested_control;
+       __u8 enlighten_vmentry;
+       __u8 reserved2[7];
        __u64 current_nested_vmcs;
 } __packed;
 
@@ -882,4 +900,7 @@ struct hv_tlb_flush_ex {
        u64 gva_list[];
 } __packed;
 
+struct hv_partition_assist_pg {
+       u32 tlb_lock_count;
+};
 #endif
index f046225..c606c0b 100644 (file)
@@ -83,6 +83,9 @@
 #define INTEL_FAM6_TIGERLAKE_L         0x8C
 #define INTEL_FAM6_TIGERLAKE           0x8D
 
+#define INTEL_FAM6_COMETLAKE           0xA5
+#define INTEL_FAM6_COMETLAKE_L         0xA6
+
 /* "Small Core" Processors (Atom) */
 
 #define INTEL_FAM6_ATOM_BONNELL                0x1C /* Diamondville, Pineview */
index a3a3ec7..24d6598 100644 (file)
@@ -219,13 +219,6 @@ enum {
                                 PFERR_WRITE_MASK |             \
                                 PFERR_PRESENT_MASK)
 
-/*
- * The mask used to denote special SPTEs, which can be either MMIO SPTEs or
- * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting
- * with the SVE bit in EPT PTEs.
- */
-#define SPTE_SPECIAL_MASK (1ULL << 62)
-
 /* apic attention bits */
 #define KVM_APIC_CHECK_VAPIC   0
 /*
@@ -320,6 +313,7 @@ struct kvm_mmu_page {
        struct list_head link;
        struct hlist_node hash_link;
        bool unsync;
+       u8 mmu_valid_gen;
        bool mmio_cached;
 
        /*
@@ -335,7 +329,6 @@ struct kvm_mmu_page {
        int root_count;          /* Currently serving as active root */
        unsigned int unsync_children;
        struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
-       unsigned long mmu_valid_gen;
        DECLARE_BITMAP(unsync_child_bitmap, 512);
 
 #ifdef CONFIG_X86_32
@@ -844,6 +837,8 @@ struct kvm_hv {
 
        /* How many vCPUs have VP index != vCPU index */
        atomic_t num_mismatched_vp_indexes;
+
+       struct hv_partition_assist_pg *hv_pa_pg;
 };
 
 enum kvm_irqchip_mode {
@@ -857,12 +852,13 @@ struct kvm_arch {
        unsigned long n_requested_mmu_pages;
        unsigned long n_max_mmu_pages;
        unsigned int indirect_shadow_pages;
-       unsigned long mmu_valid_gen;
+       u8 mmu_valid_gen;
        struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
        /*
         * Hash table of struct kvm_mmu_page.
         */
        struct list_head active_mmu_pages;
+       struct list_head zapped_obsolete_pages;
        struct kvm_page_track_notifier_node mmu_sp_tracker;
        struct kvm_page_track_notifier_head track_notifier_head;
 
@@ -1193,7 +1189,7 @@ struct kvm_x86_ops {
        int (*set_nested_state)(struct kvm_vcpu *vcpu,
                                struct kvm_nested_state __user *user_kvm_nested_state,
                                struct kvm_nested_state *kvm_state);
-       void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+       bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
 
        int (*smi_allowed)(struct kvm_vcpu *vcpu);
        int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
@@ -1213,6 +1209,7 @@ struct kvm_x86_ops {
        bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
 
        bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
+       int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
@@ -1312,18 +1309,42 @@ extern u64  kvm_default_tsc_scaling_ratio;
 
 extern u64 kvm_mce_cap_supported;
 
-enum emulation_result {
-       EMULATE_DONE,         /* no further processing */
-       EMULATE_USER_EXIT,    /* kvm_run ready for userspace exit */
-       EMULATE_FAIL,         /* can't emulate this instruction */
-};
-
+/*
+ * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
+ *                     userspace I/O) to indicate that the emulation context
+ *                     should be resued as is, i.e. skip initialization of
+ *                     emulation context, instruction fetch and decode.
+ *
+ * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
+ *                   Indicates that only select instructions (tagged with
+ *                   EmulateOnUD) should be emulated (to minimize the emulator
+ *                   attack surface).  See also EMULTYPE_TRAP_UD_FORCED.
+ *
+ * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
+ *                decode the instruction length.  For use *only* by
+ *                kvm_x86_ops->skip_emulated_instruction() implementations.
+ *
+ * EMULTYPE_ALLOW_RETRY - Set when the emulator should resume the guest to
+ *                       retry native execution under certain conditions.
+ *
+ * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
+ *                          triggered by KVM's magic "force emulation" prefix,
+ *                          which is opt in via module param (off by default).
+ *                          Bypasses EmulateOnUD restriction despite emulating
+ *                          due to an intercepted #UD (see EMULTYPE_TRAP_UD).
+ *                          Used to test the full emulator from userspace.
+ *
+ * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
+ *                     backdoor emulation, which is opt in via module param.
+ *                     VMware backoor emulation handles select instructions
+ *                     and reinjects the #GP for all other cases.
+ */
 #define EMULTYPE_NO_DECODE         (1 << 0)
 #define EMULTYPE_TRAP_UD           (1 << 1)
 #define EMULTYPE_SKIP              (1 << 2)
 #define EMULTYPE_ALLOW_RETRY       (1 << 3)
-#define EMULTYPE_NO_UD_ON_FAIL     (1 << 4)
-#define EMULTYPE_VMWARE                    (1 << 5)
+#define EMULTYPE_TRAP_UD_FORCED            (1 << 4)
+#define EMULTYPE_VMWARE_GP         (1 << 5)
 int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
 int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
                                        void *insn, int insn_len);
@@ -1506,7 +1527,7 @@ enum {
 #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
 #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
 
-asmlinkage void __noreturn kvm_spurious_fault(void);
+asmlinkage void kvm_spurious_fault(void);
 
 /*
  * Hardware virtualization extension instructions may fault if a
@@ -1514,24 +1535,14 @@ asmlinkage void __noreturn kvm_spurious_fault(void);
  * Usually after catching the fault we just panic; during reboot
  * instead the instruction is ignored.
  */
-#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn)             \
+#define __kvm_handle_fault_on_reboot(insn)                             \
        "666: \n\t"                                                     \
        insn "\n\t"                                                     \
        "jmp    668f \n\t"                                              \
        "667: \n\t"                                                     \
        "call   kvm_spurious_fault \n\t"                                \
        "668: \n\t"                                                     \
-       ".pushsection .fixup, \"ax\" \n\t"                              \
-       "700: \n\t"                                                     \
-       cleanup_insn "\n\t"                                             \
-       "cmpb   $0, kvm_rebooting\n\t"                                  \
-       "je     667b \n\t"                                              \
-       "jmp    668b \n\t"                                              \
-       ".popsection \n\t"                                              \
-       _ASM_EXTABLE(666b, 700b)
-
-#define __kvm_handle_fault_on_reboot(insn)             \
-       ____kvm_handle_fault_on_reboot(insn, "")
+       _ASM_EXTABLE(666b, 667b)
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
index e28f8b7..9d5252c 100644 (file)
@@ -21,7 +21,7 @@
 #define MWAIT_ECX_INTERRUPT_BREAK      0x1
 #define MWAITX_ECX_TIMER_ENABLE                BIT(1)
 #define MWAITX_MAX_LOOPS               ((u32)-1)
-#define MWAITX_DISABLE_CSTATES         0xf
+#define MWAITX_DISABLE_CSTATES         0xf0
 
 static inline void __monitor(const void *eax, unsigned long ecx,
                             unsigned long edx)
index c78da8e..0dca7f7 100644 (file)
@@ -29,8 +29,6 @@ extern pgd_t swapper_pg_dir[1024];
 extern pgd_t initial_page_table[1024];
 extern pmd_t initial_pg_pmd[];
 
-static inline void pgtable_cache_init(void) { }
-static inline void check_pgt_cache(void) { }
 void paging_init(void);
 void sync_initial_page_table(void);
 
index 4990d26..0b6c404 100644 (file)
@@ -241,9 +241,6 @@ extern void cleanup_highmap(void);
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-#define pgtable_cache_init()   do { } while (0)
-#define check_pgt_cache()      do { } while (0)
-
 #define PAGE_AGP    PAGE_KERNEL_NOCACHE
 #define HAVE_PAGE_AGP 1
 
index 5df09a0..07375b4 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_X86_PTI_H
 #define _ASM_X86_PTI_H
 #ifndef __ASSEMBLY__
index dec9c1e..6ece856 100644 (file)
@@ -52,6 +52,7 @@ enum {
        INTERCEPT_MWAIT,
        INTERCEPT_MWAIT_COND,
        INTERCEPT_XSETBV,
+       INTERCEPT_RDPRU,
 };
 
 
index 35c225e..61d93f0 100644 (file)
@@ -734,5 +734,28 @@ do {                                                                               \
        if (unlikely(__gu_err)) goto err_label;                                 \
 } while (0)
 
+/*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.
+ */
+#define unsafe_copy_loop(dst, src, len, type, label)                   \
+       while (len >= sizeof(type)) {                                   \
+               unsafe_put_user(*(type *)src,(type __user *)dst,label); \
+               dst += sizeof(type);                                    \
+               src += sizeof(type);                                    \
+               len -= sizeof(type);                                    \
+       }
+
+#define unsafe_copy_to_user(_dst,_src,_len,label)                      \
+do {                                                                   \
+       char __user *__ucu_dst = (_dst);                                \
+       const char *__ucu_src = (_src);                                 \
+       size_t __ucu_len = (_len);                                      \
+       unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);  \
+       unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);  \
+       unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);  \
+       unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);   \
+} while (0)
+
 #endif /* _ASM_X86_UACCESS_H */
 
index e00c9e8..ac9fc51 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <asm/cpufeatures.h>
 #include <asm/alternative.h>
+#include <linux/stringify.h>
 
 /*
  * The hypercall definitions differ in the low word of the %edx argument
@@ -20,8 +21,8 @@
  */
 
 /* Old port-based version */
-#define VMWARE_HYPERVISOR_PORT    "0x5658"
-#define VMWARE_HYPERVISOR_PORT_HB "0x5659"
+#define VMWARE_HYPERVISOR_PORT    0x5658
+#define VMWARE_HYPERVISOR_PORT_HB 0x5659
 
 /* Current vmcall / vmmcall version */
 #define VMWARE_HYPERVISOR_HB   BIT(0)
@@ -29,7 +30,8 @@
 
 /* The low bandwidth call. The low word of edx is presumed clear. */
 #define VMWARE_HYPERCALL                                               \
-       ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT ", %%dx; inl (%%dx)", \
+       ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT) ", %%dx; " \
+                     "inl (%%dx), %%eax",                              \
                      "vmcall", X86_FEATURE_VMCALL,                     \
                      "vmmcall", X86_FEATURE_VMW_VMMCALL)
 
@@ -38,7 +40,8 @@
  * HB and OUT bits set.
  */
 #define VMWARE_HYPERCALL_HB_OUT                                                \
-       ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT_HB ", %%dx; rep outsb", \
+       ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
+                     "rep outsb",                                      \
                      "vmcall", X86_FEATURE_VMCALL,                     \
                      "vmmcall", X86_FEATURE_VMW_VMMCALL)
 
@@ -47,7 +50,8 @@
  * HB bit set.
  */
 #define VMWARE_HYPERCALL_HB_IN                                         \
-       ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT_HB ", %%dx; rep insb", \
+       ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
+                     "rep insb",                                       \
                      "vmcall", X86_FEATURE_VMCALL,                     \
                      "vmmcall", X86_FEATURE_VMW_VMMCALL)
 #endif
index b15e646..1835767 100644 (file)
@@ -69,6 +69,7 @@
 #define SECONDARY_EXEC_PT_USE_GPA              0x01000000
 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC     0x00400000
 #define SECONDARY_EXEC_TSC_SCALING              0x02000000
+#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE   0x04000000
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
 #define PIN_BASED_NMI_EXITING                   0x00000008
 #define VMX_MISC_SAVE_EFER_LMA                 0x00000020
 #define VMX_MISC_ACTIVITY_HLT                  0x00000040
 #define VMX_MISC_ZERO_LEN_INS                  0x40000000
+#define VMX_MISC_MSR_LIST_MULTIPLIER           512
 
 /* VMFUNC functions */
 #define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
index ac09341..1943585 100644 (file)
@@ -134,10 +134,12 @@ struct x86_hyper_init {
 
 /**
  * struct x86_init_acpi - x86 ACPI init functions
+ * @set_root_poitner:          set RSDP address
  * @get_root_pointer:          get RSDP address
  * @reduced_hw_early_init:     hardware reduced platform early init
  */
 struct x86_init_acpi {
+       void (*set_root_pointer)(u64 addr);
        u64 (*get_root_pointer)(void);
        void (*reduced_hw_early_init)(void);
 };
index a9731f8..2e8a30f 100644 (file)
@@ -75,6 +75,7 @@
 #define SVM_EXIT_MWAIT         0x08b
 #define SVM_EXIT_MWAIT_COND    0x08c
 #define SVM_EXIT_XSETBV        0x08d
+#define SVM_EXIT_RDPRU         0x08e
 #define SVM_EXIT_NPF           0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI           0x401
 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS     0x402
index f01950a..3eb8411 100644 (file)
@@ -86,6 +86,8 @@
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
+#define EXIT_REASON_UMWAIT              67
+#define EXIT_REASON_TPAUSE              68
 
 #define VMX_EXIT_REASONS \
        { EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
        { EXIT_REASON_RDSEED,                "RDSEED" }, \
        { EXIT_REASON_PML_FULL,              "PML_FULL" }, \
        { EXIT_REASON_XSAVES,                "XSAVES" }, \
-       { EXIT_REASON_XRSTORS,               "XRSTORS" }
+       { EXIT_REASON_XRSTORS,               "XRSTORS" }, \
+       { EXIT_REASON_UMWAIT,                "UMWAIT" }, \
+       { EXIT_REASON_TPAUSE,                "TPAUSE" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL       2
index 17b33ef..04205ce 100644 (file)
@@ -1760,6 +1760,11 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
        e820__update_table_print();
 }
 
+void x86_default_set_root_pointer(u64 addr)
+{
+       boot_params.acpi_rsdp_addr = addr;
+}
+
 u64 x86_default_get_root_pointer(void)
 {
        return boot_params.acpi_rsdp_addr;
index 45e92cb..b0889c4 100644 (file)
@@ -156,7 +156,8 @@ static int x2apic_dead_cpu(unsigned int dead_cpu)
 {
        struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
 
-       cpumask_clear_cpu(dead_cpu, &cmsk->mask);
+       if (cmsk)
+               cpumask_clear_cpu(dead_cpu, &cmsk->mask);
        free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
        return 0;
 }
index 267daad..c656d92 100644 (file)
@@ -216,6 +216,10 @@ static void __init ms_hyperv_init_platform(void)
        int hv_host_info_ecx;
        int hv_host_info_edx;
 
+#ifdef CONFIG_PARAVIRT
+       pv_info.name = "Hyper-V";
+#endif
+
        /*
         * Extract the features and hints
         */
index 32b4dc9..c222f28 100644 (file)
  */
 static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
 
+u32 get_umwait_control_msr(void)
+{
+       return umwait_control_cached;
+}
+EXPORT_SYMBOL_GPL(get_umwait_control_msr);
+
 /*
  * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
  * hardware or BIOS before kernel boot.
index 9735139..46d7326 100644 (file)
@@ -49,7 +49,7 @@
 #define VMWARE_CMD_VCPU_RESERVED 31
 
 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx)                           \
-       __asm__("inl (%%dx)" :                                          \
+       __asm__("inl (%%dx), %%eax" :                                   \
                "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :            \
                "a"(VMWARE_HYPERVISOR_MAGIC),                           \
                "c"(VMWARE_CMD_##cmd),                                  \
index 29ffa49..206a4b6 100644 (file)
@@ -222,13 +222,31 @@ unsigned long __head __startup_64(unsigned long physaddr,
         * we might write invalid pmds, when the kernel is relocated
         * cleanup_highmap() fixes this up along with the mappings
         * beyond _end.
+        *
+        * Only the region occupied by the kernel image has so far
+        * been checked against the table of usable memory regions
+        * provided by the firmware, so invalidate pages outside that
+        * region. A page table entry that maps to a reserved area of
+        * memory would allow processor speculation into that area,
+        * and on some hardware (particularly the UV platform) even
+        * speculative access to some reserved areas is caught as an
+        * error, causing the BIOS to halt the system.
         */
 
        pmd = fixup_pointer(level2_kernel_pgt, physaddr);
-       for (i = 0; i < PTRS_PER_PMD; i++) {
+
+       /* invalidate pages before the kernel image */
+       for (i = 0; i < pmd_index((unsigned long)_text); i++)
+               pmd[i] &= ~_PAGE_PRESENT;
+
+       /* fixup pages that are part of the kernel image */
+       for (; i <= pmd_index((unsigned long)_end); i++)
                if (pmd[i] & _PAGE_PRESENT)
                        pmd[i] += load_delta;
-       }
+
+       /* invalidate pages after the kernel image */
+       for (; i < PTRS_PER_PMD; i++)
+               pmd[i] &= ~_PAGE_PRESENT;
 
        /*
         * Fixup phys_base - remove the memory encryption mask to obtain
index 4c40783..4d4f5d9 100644 (file)
@@ -74,9 +74,9 @@ bool arch_ima_get_secureboot(void)
 
 /* secureboot arch rules */
 static const char * const sb_arch_rules[] = {
-#if !IS_ENABLED(CONFIG_KEXEC_VERIFY_SIG)
+#if !IS_ENABLED(CONFIG_KEXEC_SIG)
        "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig",
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
        "measure func=KEXEC_KERNEL_CHECK",
 #if !IS_ENABLED(CONFIG_MODULE_SIG)
        "appraise func=MODULE_CHECK appraise_type=imasig",
index 0fe1c87..61a89d3 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/ioport.h>
+#include <linux/security.h>
 #include <linux/smp.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
@@ -31,7 +32,8 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
 
        if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
                return -EINVAL;
-       if (turn_on && !capable(CAP_SYS_RAWIO))
+       if (turn_on && (!capable(CAP_SYS_RAWIO) ||
+                       security_locked_down(LOCKDOWN_IOPORT)))
                return -EPERM;
 
        /*
@@ -126,7 +128,8 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
                return -EINVAL;
        /* Trying to gain more privileges? */
        if (level > old) {
-               if (!capable(CAP_SYS_RAWIO))
+               if (!capable(CAP_SYS_RAWIO) ||
+                   security_locked_down(LOCKDOWN_IOPORT))
                        return -EPERM;
        }
        regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
index 5ebcd02..d2f4e70 100644 (file)
@@ -180,6 +180,7 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
        if (efi_enabled(EFI_OLD_MEMMAP))
                return 0;
 
+       params->secure_boot = boot_params.secure_boot;
        ei->efi_loader_signature = current_ei->efi_loader_signature;
        ei->efi_systab = current_ei->efi_systab;
        ei->efi_systab_hi = current_ei->efi_systab_hi;
index 3db2252..1547be3 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/notifier.h>
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
+#include <linux/security.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -79,6 +80,10 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
        int err = 0;
        ssize_t bytes = 0;
 
+       err = security_locked_down(LOCKDOWN_MSR);
+       if (err)
+               return err;
+
        if (count % 8)
                return -EINVAL; /* Invalid chunk size */
 
@@ -130,6 +135,9 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
                        err = -EFAULT;
                        break;
                }
+               err = security_locked_down(LOCKDOWN_MSR);
+               if (err)
+                       break;
                err = wrmsr_safe_regs_on_cpu(cpu, regs);
                if (err)
                        break;
index 320ab97..1d0797b 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 //
 // Code shared between 32 and 64 bit
 
index 1bef687..18a799c 100644 (file)
@@ -95,6 +95,7 @@ struct x86_init_ops x86_init __initdata = {
        },
 
        .acpi = {
+               .set_root_pointer       = x86_default_set_root_pointer,
                .get_root_pointer       = x86_default_get_root_pointer,
                .reduced_hw_early_init  = acpi_generic_reduced_hw_init,
        },
index dd5985e..f68c0c7 100644 (file)
@@ -304,7 +304,13 @@ static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function,
        case 7:
        case 0xb:
        case 0xd:
+       case 0xf:
+       case 0x10:
+       case 0x12:
        case 0x14:
+       case 0x17:
+       case 0x18:
+       case 0x1f:
        case 0x8000001d:
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                break;
@@ -357,10 +363,10 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
 
        /* cpuid 7.0.ecx*/
        const u32 kvm_cpuid_7_0_ecx_x86_features =
-               F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
+               F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
                F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
                F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
-               F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
+               F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
 
        /* cpuid 7.0.edx*/
        const u32 kvm_cpuid_7_0_edx_x86_features =
@@ -479,6 +485,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
 
        /* cpuid 0x80000008.ebx */
        const u32 kvm_cpuid_8000_0008_ebx_x86_features =
+               F(CLZERO) | F(XSAVEERPTR) |
                F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
                F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
 
@@ -612,16 +619,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
         */
        case 0x1f:
        case 0xb: {
-               int i, level_type;
+               int i;
 
-               /* read more entries until level_type is zero */
-               for (i = 1; ; ++i) {
+               /*
+                * We filled in entry[0] for CPUID(EAX=<function>,
+                * ECX=00H) above.  If its level type (ECX[15:8]) is
+                * zero, then the leaf is unimplemented, and we're
+                * done.  Otherwise, continue to populate entries
+                * until the level type (ECX[15:8]) of the previously
+                * added entry is zero.
+                */
+               for (i = 1; entry[i - 1].ecx & 0xff00; ++i) {
                        if (*nent >= maxnent)
                                goto out;
 
-                       level_type = entry[i - 1].ecx & 0xff00;
-                       if (!level_type)
-                               break;
                        do_host_cpuid(&entry[i], function, i);
                        ++*nent;
                }
@@ -963,53 +974,66 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
 
 /*
- * If no match is found, check whether we exceed the vCPU's limit
- * and return the content of the highest valid _standard_ leaf instead.
- * This is to satisfy the CPUID specification.
+ * If the basic or extended CPUID leaf requested is higher than the
+ * maximum supported basic or extended leaf, respectively, then it is
+ * out of range.
  */
-static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
-                                                  u32 function, u32 index)
+static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
 {
-       struct kvm_cpuid_entry2 *maxlevel;
-
-       maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
-       if (!maxlevel || maxlevel->eax >= function)
-               return NULL;
-       if (function & 0x80000000) {
-               maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0);
-               if (!maxlevel)
-                       return NULL;
-       }
-       return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
+       struct kvm_cpuid_entry2 *max;
+
+       max = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
+       return max && function <= max->eax;
 }
 
 bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
               u32 *ecx, u32 *edx, bool check_limit)
 {
        u32 function = *eax, index = *ecx;
-       struct kvm_cpuid_entry2 *best;
-       bool entry_found = true;
-
-       best = kvm_find_cpuid_entry(vcpu, function, index);
-
-       if (!best) {
-               entry_found = false;
-               if (!check_limit)
-                       goto out;
+       struct kvm_cpuid_entry2 *entry;
+       struct kvm_cpuid_entry2 *max;
+       bool found;
 
-               best = check_cpuid_limit(vcpu, function, index);
+       entry = kvm_find_cpuid_entry(vcpu, function, index);
+       found = entry;
+       /*
+        * Intel CPUID semantics treats any query for an out-of-range
+        * leaf as if the highest basic leaf (i.e. CPUID.0H:EAX) were
+        * requested. AMD CPUID semantics returns all zeroes for any
+        * undefined leaf, whether or not the leaf is in range.
+        */
+       if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
+           !cpuid_function_in_range(vcpu, function)) {
+               max = kvm_find_cpuid_entry(vcpu, 0, 0);
+               if (max) {
+                       function = max->eax;
+                       entry = kvm_find_cpuid_entry(vcpu, function, index);
+               }
        }
-
-out:
-       if (best) {
-               *eax = best->eax;
-               *ebx = best->ebx;
-               *ecx = best->ecx;
-               *edx = best->edx;
-       } else
+       if (entry) {
+               *eax = entry->eax;
+               *ebx = entry->ebx;
+               *ecx = entry->ecx;
+               *edx = entry->edx;
+       } else {
                *eax = *ebx = *ecx = *edx = 0;
-       trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found);
-       return entry_found;
+               /*
+                * When leaf 0BH or 1FH is defined, CL is pass-through
+                * and EDX is always the x2APIC ID, even for undefined
+                * subleaves. Index 1 will exist iff the leaf is
+                * implemented, so we pass through CL iff leaf 1
+                * exists. EDX can be copied from any existing index.
+                */
+               if (function == 0xb || function == 0x1f) {
+                       entry = kvm_find_cpuid_entry(vcpu, function, 1);
+                       if (entry) {
+                               *ecx = index & 0xff;
+                               *edx = entry->edx;
+                       }
+               }
+       }
+       trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, found);
+       return found;
 }
 EXPORT_SYMBOL_GPL(kvm_cpuid);
 
index fff790a..23ff655 100644 (file)
@@ -23,6 +23,7 @@
 #include "ioapic.h"
 #include "hyperv.h"
 
+#include <linux/cpu.h>
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
 #include <linux/sched/cputime.h>
@@ -645,7 +646,9 @@ static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer)
                .vector = stimer->config.apic_vector
        };
 
-       return !kvm_apic_set_irq(vcpu, &irq, NULL);
+       if (lapic_in_kernel(vcpu))
+               return !kvm_apic_set_irq(vcpu, &irq, NULL);
+       return 0;
 }
 
 static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
@@ -1852,7 +1855,13 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 
                        ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
                        ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
-                       ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
+
+                       /*
+                        * Direct Synthetic timers only make sense with in-kernel
+                        * LAPIC
+                        */
+                       if (lapic_in_kernel(vcpu))
+                               ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
 
                        break;
 
@@ -1864,7 +1873,8 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
                        ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
                        if (evmcs_ver)
                                ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
-
+                       if (!cpu_smt_possible())
+                               ent->eax |= HV_X64_NO_NONARCH_CORESHARING;
                        /*
                         * Default number of spinlock retry attempts, matches
                         * HyperV 2016.
index 8675458..b29d00b 100644 (file)
 #define APIC_BROADCAST                 0xFF
 #define X2APIC_BROADCAST               0xFFFFFFFFul
 
-#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
-#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000
+static bool lapic_timer_advance_dynamic __read_mostly;
+#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100     /* clock cycles */
+#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000   /* clock cycles */
+#define LAPIC_TIMER_ADVANCE_NS_INIT    1000
+#define LAPIC_TIMER_ADVANCE_NS_MAX     5000
 /* step-by-step approximation to mitigate fluctuation */
 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
 
@@ -108,11 +111,6 @@ static inline int apic_enabled(struct kvm_lapic *apic)
        (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
         APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 
-static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
-{
-       return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
-}
-
 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
 {
        return apic->vcpu->vcpu_id;
@@ -1485,26 +1483,25 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
        u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
        u64 ns;
 
+       /* Do not adjust for tiny fluctuations or large random spikes. */
+       if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
+           abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
+               return;
+
        /* too early */
        if (advance_expire_delta < 0) {
                ns = -advance_expire_delta * 1000000ULL;
                do_div(ns, vcpu->arch.virtual_tsc_khz);
-               timer_advance_ns -= min((u32)ns,
-                       timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+               timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
        } else {
        /* too late */
                ns = advance_expire_delta * 1000000ULL;
                do_div(ns, vcpu->arch.virtual_tsc_khz);
-               timer_advance_ns += min((u32)ns,
-                       timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+               timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
        }
 
-       if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
-               apic->lapic_timer.timer_advance_adjust_done = true;
-       if (unlikely(timer_advance_ns > 5000)) {
-               timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
-               apic->lapic_timer.timer_advance_adjust_done = false;
-       }
+       if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
+               timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
        apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 }
 
@@ -1524,7 +1521,7 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
        if (guest_tsc < tsc_deadline)
                __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
 
-       if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
+       if (lapic_timer_advance_dynamic)
                adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
 }
 
@@ -2301,14 +2298,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
                     HRTIMER_MODE_ABS_HARD);
        apic->lapic_timer.timer.function = apic_timer_fn;
        if (timer_advance_ns == -1) {
-               apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
-               apic->lapic_timer.timer_advance_adjust_done = false;
+               apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
+               lapic_timer_advance_dynamic = true;
        } else {
                apic->lapic_timer.timer_advance_ns = timer_advance_ns;
-               apic->lapic_timer.timer_advance_adjust_done = true;
+               lapic_timer_advance_dynamic = false;
        }
 
-
        /*
         * APIC is created enabled. This will prevent kvm_lapic_set_base from
         * thinking that APIC state has changed.
index 50053d2..1f50148 100644 (file)
@@ -35,7 +35,6 @@ struct kvm_timer {
        s64 advance_expire_delta;
        atomic_t pending;                       /* accumulated triggered timers */
        bool hv_timer_in_use;
-       bool timer_advance_adjust_done;
 };
 
 struct kvm_lapic {
@@ -243,4 +242,9 @@ static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
        return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
 }
 
+static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
+{
+       return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
+}
+
 #endif
index a10af9c..24c23c6 100644 (file)
@@ -83,7 +83,17 @@ module_param(dbg, bool, 0644);
 #define PTE_PREFETCH_NUM               8
 
 #define PT_FIRST_AVAIL_BITS_SHIFT 10
-#define PT64_SECOND_AVAIL_BITS_SHIFT 52
+#define PT64_SECOND_AVAIL_BITS_SHIFT 54
+
+/*
+ * The mask used to denote special SPTEs, which can be either MMIO SPTEs or
+ * Access Tracking SPTEs.
+ */
+#define SPTE_SPECIAL_MASK (3ULL << 52)
+#define SPTE_AD_ENABLED_MASK (0ULL << 52)
+#define SPTE_AD_DISABLED_MASK (1ULL << 52)
+#define SPTE_AD_WRPROT_ONLY_MASK (2ULL << 52)
+#define SPTE_MMIO_MASK (3ULL << 52)
 
 #define PT64_LEVEL_BITS 9
 
@@ -219,12 +229,11 @@ static u64 __read_mostly shadow_present_mask;
 static u64 __read_mostly shadow_me_mask;
 
 /*
- * SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value.
- * Non-present SPTEs with shadow_acc_track_value set are in place for access
- * tracking.
+ * SPTEs used by MMUs without A/D bits are marked with SPTE_AD_DISABLED_MASK;
+ * shadow_acc_track_mask is the set of bits to be cleared in non-accessed
+ * pages.
  */
 static u64 __read_mostly shadow_acc_track_mask;
-static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK;
 
 /*
  * The mask/shift to use for saving the original R/X bits when marking the PTE
@@ -304,7 +313,7 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask)
 {
        BUG_ON((u64)(unsigned)access_mask != access_mask);
        BUG_ON((mmio_mask & mmio_value) != mmio_value);
-       shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK;
+       shadow_mmio_value = mmio_value | SPTE_MMIO_MASK;
        shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
        shadow_mmio_access_mask = access_mask;
 }
@@ -320,10 +329,27 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
        return sp->role.ad_disabled;
 }
 
+static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
+{
+       /*
+        * When using the EPT page-modification log, the GPAs in the log
+        * would come from L2 rather than L1.  Therefore, we need to rely
+        * on write protection to record dirty pages.  This also bypasses
+        * PML, since writes now result in a vmexit.
+        */
+       return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
+}
+
 static inline bool spte_ad_enabled(u64 spte)
 {
        MMU_WARN_ON(is_mmio_spte(spte));
-       return !(spte & shadow_acc_track_value);
+       return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_DISABLED_MASK;
+}
+
+static inline bool spte_ad_need_write_protect(u64 spte)
+{
+       MMU_WARN_ON(is_mmio_spte(spte));
+       return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK;
 }
 
 static inline u64 spte_shadow_accessed_mask(u64 spte)
@@ -403,8 +429,6 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
        mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
                << shadow_nonpresent_or_rsvd_mask_len;
 
-       page_header(__pa(sptep))->mmio_cached = true;
-
        trace_mark_mmio_spte(sptep, gfn, access, gen);
        mmu_spte_set(sptep, mask);
 }
@@ -463,7 +487,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 {
        BUG_ON(!dirty_mask != !accessed_mask);
        BUG_ON(!accessed_mask && !acc_track_mask);
-       BUG_ON(acc_track_mask & shadow_acc_track_value);
+       BUG_ON(acc_track_mask & SPTE_SPECIAL_MASK);
 
        shadow_user_mask = user_mask;
        shadow_accessed_mask = accessed_mask;
@@ -1591,16 +1615,16 @@ static bool spte_clear_dirty(u64 *sptep)
 
        rmap_printk("rmap_clear_dirty: spte %p %llx\n", sptep, *sptep);
 
+       MMU_WARN_ON(!spte_ad_enabled(spte));
        spte &= ~shadow_dirty_mask;
-
        return mmu_spte_update(sptep, spte);
 }
 
-static bool wrprot_ad_disabled_spte(u64 *sptep)
+static bool spte_wrprot_for_clear_dirty(u64 *sptep)
 {
        bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT,
                                               (unsigned long *)sptep);
-       if (was_writable)
+       if (was_writable && !spte_ad_enabled(*sptep))
                kvm_set_pfn_dirty(spte_to_pfn(*sptep));
 
        return was_writable;
@@ -1619,10 +1643,10 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
        bool flush = false;
 
        for_each_rmap_spte(rmap_head, &iter, sptep)
-               if (spte_ad_enabled(*sptep))
-                       flush |= spte_clear_dirty(sptep);
+               if (spte_ad_need_write_protect(*sptep))
+                       flush |= spte_wrprot_for_clear_dirty(sptep);
                else
-                       flush |= wrprot_ad_disabled_spte(sptep);
+                       flush |= spte_clear_dirty(sptep);
 
        return flush;
 }
@@ -1633,6 +1657,11 @@ static bool spte_set_dirty(u64 *sptep)
 
        rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep);
 
+       /*
+        * Similar to the !kvm_x86_ops->slot_disable_log_dirty case,
+        * do not bother adding back write access to pages marked
+        * SPTE_AD_WRPROT_ONLY_MASK.
+        */
        spte |= shadow_dirty_mask;
 
        return mmu_spte_update(sptep, spte);
@@ -2103,6 +2132,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
         * depends on valid pages being added to the head of the list.  See
         * comments in kvm_zap_obsolete_pages().
         */
+       sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
        list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
        kvm_mod_used_mmu_pages(vcpu->kvm, +1);
        return sp;
@@ -2252,7 +2282,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 #define for_each_valid_sp(_kvm, _sp, _gfn)                             \
        hlist_for_each_entry(_sp,                                       \
          &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
-               if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) {    \
+               if (is_obsolete_sp((_kvm), (_sp))) {                    \
                } else
 
 #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)                        \
@@ -2311,7 +2341,8 @@ static void mmu_audit_disable(void) { }
 
 static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-       return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+       return sp->role.invalid ||
+              unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
 }
 
 static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
@@ -2538,7 +2569,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
                if (level > PT_PAGE_TABLE_LEVEL && need_sync)
                        flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
        }
-       sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
        clear_page(sp->spt);
        trace_kvm_mmu_get_page(sp, true);
 
@@ -2623,7 +2653,7 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
               shadow_user_mask | shadow_x_mask | shadow_me_mask;
 
        if (sp_ad_disabled(sp))
-               spte |= shadow_acc_track_value;
+               spte |= SPTE_AD_DISABLED_MASK;
        else
                spte |= shadow_accessed_mask;
 
@@ -2753,7 +2783,12 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
        } else {
                list_move(&sp->link, &kvm->arch.active_mmu_pages);
 
-               if (!sp->role.invalid)
+               /*
+                * Obsolete pages cannot be used on any vCPUs, see the comment
+                * in kvm_mmu_zap_all_fast().  Note, is_obsolete_sp() also
+                * treats invalid shadow pages as being obsolete.
+                */
+               if (!is_obsolete_sp(kvm, sp))
                        kvm_reload_remote_mmus(kvm);
        }
 
@@ -2964,7 +2999,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
        sp = page_header(__pa(sptep));
        if (sp_ad_disabled(sp))
-               spte |= shadow_acc_track_value;
+               spte |= SPTE_AD_DISABLED_MASK;
+       else if (kvm_vcpu_ad_need_write_protect(vcpu))
+               spte |= SPTE_AD_WRPROT_ONLY_MASK;
 
        /*
         * For the EPT case, shadow_present_mask is 0 if hardware
@@ -5383,7 +5420,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
                       void *insn, int insn_len)
 {
        int r, emulation_type = 0;
-       enum emulation_result er;
        bool direct = vcpu->arch.mmu->direct_map;
 
        /* With shadow page tables, fault_address contains a GVA or nGPA.  */
@@ -5450,19 +5486,8 @@ emulate:
                        return 1;
        }
 
-       er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
-
-       switch (er) {
-       case EMULATE_DONE:
-               return 1;
-       case EMULATE_USER_EXIT:
-               ++vcpu->stat.mmio_exits;
-               /* fall through */
-       case EMULATE_FAIL:
-               return 0;
-       default:
-               BUG();
-       }
+       return x86_emulate_instruction(vcpu, cr2, emulation_type, insn,
+                                      insn_len);
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
@@ -5684,12 +5709,11 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
        return ret;
 }
 
-
+#define BATCH_ZAP_PAGES        10
 static void kvm_zap_obsolete_pages(struct kvm *kvm)
 {
        struct kvm_mmu_page *sp, *node;
-       LIST_HEAD(invalid_list);
-       int ign;
+       int nr_zapped, batch = 0;
 
 restart:
        list_for_each_entry_safe_reverse(sp, node,
@@ -5702,46 +5726,39 @@ restart:
                        break;
 
                /*
-                * Do not repeatedly zap a root page to avoid unnecessary
-                * KVM_REQ_MMU_RELOAD, otherwise we may not be able to
-                * progress:
-                *    vcpu 0                        vcpu 1
-                *                         call vcpu_enter_guest():
-                *                            1): handle KVM_REQ_MMU_RELOAD
-                *                                and require mmu-lock to
-                *                                load mmu
-                * repeat:
-                *    1): zap root page and
-                *        send KVM_REQ_MMU_RELOAD
-                *
-                *    2): if (cond_resched_lock(mmu-lock))
-                *
-                *                            2): hold mmu-lock and load mmu
-                *
-                *                            3): see KVM_REQ_MMU_RELOAD bit
-                *                                on vcpu->requests is set
-                *                                then return 1 to call
-                *                                vcpu_enter_guest() again.
-                *            goto repeat;
-                *
-                * Since we are reversely walking the list and the invalid
-                * list will be moved to the head, skip the invalid page
-                * can help us to avoid the infinity list walking.
+                * Skip invalid pages with a non-zero root count, zapping pages
+                * with a non-zero root count will never succeed, i.e. the page
+                * will get thrown back on active_mmu_pages and we'll get stuck
+                * in an infinite loop.
                 */
-               if (sp->role.invalid)
+               if (sp->role.invalid && sp->root_count)
                        continue;
 
-               if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
-                       kvm_mmu_commit_zap_page(kvm, &invalid_list);
-                       cond_resched_lock(&kvm->mmu_lock);
+               /*
+                * No need to flush the TLB since we're only zapping shadow
+                * pages with an obsolete generation number and all vCPUS have
+                * loaded a new root, i.e. the shadow pages being zapped cannot
+                * be in active use by the guest.
+                */
+               if (batch >= BATCH_ZAP_PAGES &&
+                   cond_resched_lock(&kvm->mmu_lock)) {
+                       batch = 0;
                        goto restart;
                }
 
-               if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
+               if (__kvm_mmu_prepare_zap_page(kvm, sp,
+                               &kvm->arch.zapped_obsolete_pages, &nr_zapped)) {
+                       batch += nr_zapped;
                        goto restart;
+               }
        }
 
-       kvm_mmu_commit_zap_page(kvm, &invalid_list);
+       /*
+        * Trigger a remote TLB flush before freeing the page tables to ensure
+        * KVM is not in the middle of a lockless shadow page table walk, which
+        * may reference the pages.
+        */
+       kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
 }
 
 /*
@@ -5755,13 +5772,39 @@ restart:
  */
 static void kvm_mmu_zap_all_fast(struct kvm *kvm)
 {
+       lockdep_assert_held(&kvm->slots_lock);
+
        spin_lock(&kvm->mmu_lock);
-       kvm->arch.mmu_valid_gen++;
+       trace_kvm_mmu_zap_all_fast(kvm);
+
+       /*
+        * Toggle mmu_valid_gen between '0' and '1'.  Because slots_lock is
+        * held for the entire duration of zapping obsolete pages, it's
+        * impossible for there to be multiple invalid generations associated
+        * with *valid* shadow pages at any given time, i.e. there is exactly
+        * one valid generation and (at most) one invalid generation.
+        */
+       kvm->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen ? 0 : 1;
+
+       /*
+        * Notify all vcpus to reload its shadow page table and flush TLB.
+        * Then all vcpus will switch to new shadow page table with the new
+        * mmu_valid_gen.
+        *
+        * Note: we need to do this under the protection of mmu_lock,
+        * otherwise, vcpu would purge shadow page but miss tlb flush.
+        */
+       kvm_reload_remote_mmus(kvm);
 
        kvm_zap_obsolete_pages(kvm);
        spin_unlock(&kvm->mmu_lock);
 }
 
+static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
+{
+       return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
+}
+
 static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
                        struct kvm_memory_slot *slot,
                        struct kvm_page_track_notifier_node *node)
@@ -5959,7 +6002,7 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
 
-static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only)
+void kvm_mmu_zap_all(struct kvm *kvm)
 {
        struct kvm_mmu_page *sp, *node;
        LIST_HEAD(invalid_list);
@@ -5968,14 +6011,10 @@ static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only)
        spin_lock(&kvm->mmu_lock);
 restart:
        list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
-               if (mmio_only && !sp->mmio_cached)
-                       continue;
                if (sp->role.invalid && sp->root_count)
                        continue;
-               if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) {
-                       WARN_ON_ONCE(mmio_only);
+               if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
                        goto restart;
-               }
                if (cond_resched_lock(&kvm->mmu_lock))
                        goto restart;
        }
@@ -5984,11 +6023,6 @@ restart:
        spin_unlock(&kvm->mmu_lock);
 }
 
-void kvm_mmu_zap_all(struct kvm *kvm)
-{
-       return __kvm_mmu_zap_all(kvm, false);
-}
-
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
 {
        WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
@@ -6010,7 +6044,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
         */
        if (unlikely(gen == 0)) {
                kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n");
-               __kvm_mmu_zap_all(kvm, true);
+               kvm_mmu_zap_all_fast(kvm);
        }
 }
 
@@ -6041,16 +6075,24 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
                 * want to shrink a VM that only started to populate its MMU
                 * anyway.
                 */
-               if (!kvm->arch.n_used_mmu_pages)
+               if (!kvm->arch.n_used_mmu_pages &&
+                   !kvm_has_zapped_obsolete_pages(kvm))
                        continue;
 
                idx = srcu_read_lock(&kvm->srcu);
                spin_lock(&kvm->mmu_lock);
 
+               if (kvm_has_zapped_obsolete_pages(kvm)) {
+                       kvm_mmu_commit_zap_page(kvm,
+                             &kvm->arch.zapped_obsolete_pages);
+                       goto unlock;
+               }
+
                if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
                        freed++;
                kvm_mmu_commit_zap_page(kvm, &invalid_list);
 
+unlock:
                spin_unlock(&kvm->mmu_lock);
                srcu_read_unlock(&kvm->srcu, idx);
 
index d8001b4..7ca8831 100644 (file)
@@ -8,16 +8,18 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvmmmu
 
-#define KVM_MMU_PAGE_FIELDS \
-       __field(__u64, gfn) \
-       __field(__u32, role) \
-       __field(__u32, root_count) \
+#define KVM_MMU_PAGE_FIELDS            \
+       __field(__u8, mmu_valid_gen)    \
+       __field(__u64, gfn)             \
+       __field(__u32, role)            \
+       __field(__u32, root_count)      \
        __field(bool, unsync)
 
-#define KVM_MMU_PAGE_ASSIGN(sp)                             \
-       __entry->gfn = sp->gfn;                      \
-       __entry->role = sp->role.word;               \
-       __entry->root_count = sp->root_count;        \
+#define KVM_MMU_PAGE_ASSIGN(sp)                                \
+       __entry->mmu_valid_gen = sp->mmu_valid_gen;     \
+       __entry->gfn = sp->gfn;                         \
+       __entry->role = sp->role.word;                  \
+       __entry->root_count = sp->root_count;           \
        __entry->unsync = sp->unsync;
 
 #define KVM_MMU_PAGE_PRINTK() ({                                       \
@@ -29,8 +31,9 @@
                                                                        \
        role.word = __entry->role;                                      \
                                                                        \
-       trace_seq_printf(p, "sp gfn %llx l%u %u-byte q%u%s %s%s"        \
+       trace_seq_printf(p, "sp gen %u gfn %llx l%u %u-byte q%u%s %s%s" \
                         " %snxe %sad root %u %s%c",                    \
+                        __entry->mmu_valid_gen,                        \
                         __entry->gfn, role.level,                      \
                         role.gpte_is_8_bytes ? 8 : 4,                  \
                         role.quadrant,                                 \
@@ -279,6 +282,27 @@ TRACE_EVENT(
        )
 );
 
+TRACE_EVENT(
+       kvm_mmu_zap_all_fast,
+       TP_PROTO(struct kvm *kvm),
+       TP_ARGS(kvm),
+
+       TP_STRUCT__entry(
+               __field(__u8, mmu_valid_gen)
+               __field(unsigned int, mmu_used_pages)
+       ),
+
+       TP_fast_assign(
+               __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen;
+               __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages;
+       ),
+
+       TP_printk("kvm-mmu-valid-gen %u used_pages %x",
+                 __entry->mmu_valid_gen, __entry->mmu_used_pages
+       )
+);
+
+
 TRACE_EVENT(
        check_mmio_spte,
        TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen),
index 04fe218..c5673bd 100644 (file)
@@ -734,8 +734,14 @@ static int get_npt_level(struct kvm_vcpu *vcpu)
 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
        vcpu->arch.efer = efer;
-       if (!npt_enabled && !(efer & EFER_LMA))
-               efer &= ~EFER_LME;
+
+       if (!npt_enabled) {
+               /* Shadow paging assumes NX to be available.  */
+               efer |= EFER_NX;
+
+               if (!(efer & EFER_LMA))
+                       efer &= ~EFER_LME;
+       }
 
        to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
        mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
@@ -777,17 +783,18 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
                svm->next_rip = svm->vmcb->control.next_rip;
        }
 
-       if (!svm->next_rip)
-               return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP);
-
-       if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
-               printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
-                      __func__, kvm_rip_read(vcpu), svm->next_rip);
-
-       kvm_rip_write(vcpu, svm->next_rip);
+       if (!svm->next_rip) {
+               if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
+                       return 0;
+       } else {
+               if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
+                       pr_err("%s: ip 0x%lx next 0x%llx\n",
+                              __func__, kvm_rip_read(vcpu), svm->next_rip);
+               kvm_rip_write(vcpu, svm->next_rip);
+       }
        svm_set_interrupt_shadow(vcpu, 0);
 
-       return EMULATE_DONE;
+       return 1;
 }
 
 static void svm_queue_exception(struct kvm_vcpu *vcpu)
@@ -1539,6 +1546,7 @@ static void init_vmcb(struct vcpu_svm *svm)
        set_intercept(svm, INTERCEPT_SKINIT);
        set_intercept(svm, INTERCEPT_WBINVD);
        set_intercept(svm, INTERCEPT_XSETBV);
+       set_intercept(svm, INTERCEPT_RDPRU);
        set_intercept(svm, INTERCEPT_RSM);
 
        if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
@@ -2768,17 +2776,18 @@ static int gp_interception(struct vcpu_svm *svm)
 {
        struct kvm_vcpu *vcpu = &svm->vcpu;
        u32 error_code = svm->vmcb->control.exit_info_1;
-       int er;
 
        WARN_ON_ONCE(!enable_vmware_backdoor);
 
-       er = kvm_emulate_instruction(vcpu,
-               EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
-       if (er == EMULATE_USER_EXIT)
-               return 0;
-       else if (er != EMULATE_DONE)
+       /*
+        * VMware backdoor emulation on #GP interception only handles IN{S},
+        * OUT{S}, and RDPMC, none of which generate a non-zero error code.
+        */
+       if (error_code) {
                kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
-       return 1;
+               return 1;
+       }
+       return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
 }
 
 static bool is_erratum_383(void)
@@ -2876,7 +2885,7 @@ static int io_interception(struct vcpu_svm *svm)
        string = (io_info & SVM_IOIO_STR_MASK) != 0;
        in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
        if (string)
-               return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+               return kvm_emulate_instruction(vcpu, 0);
 
        port = io_info >> 16;
        size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
@@ -3830,6 +3839,12 @@ static int xsetbv_interception(struct vcpu_svm *svm)
        return 1;
 }
 
+static int rdpru_interception(struct vcpu_svm *svm)
+{
+       kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+       return 1;
+}
+
 static int task_switch_interception(struct vcpu_svm *svm)
 {
        u16 tss_selector;
@@ -3883,24 +3898,15 @@ static int task_switch_interception(struct vcpu_svm *svm)
            int_type == SVM_EXITINTINFO_TYPE_SOFT ||
            (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
             (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
-               if (skip_emulated_instruction(&svm->vcpu) != EMULATE_DONE)
-                       goto fail;
+               if (!skip_emulated_instruction(&svm->vcpu))
+                       return 0;
        }
 
        if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
                int_vec = -1;
 
-       if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
-                               has_error_code, error_code) == EMULATE_FAIL)
-               goto fail;
-
-       return 1;
-
-fail:
-       svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-       svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-       svm->vcpu.run->internal.ndata = 0;
-       return 0;
+       return kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
+                              has_error_code, error_code);
 }
 
 static int cpuid_interception(struct vcpu_svm *svm)
@@ -3921,7 +3927,7 @@ static int iret_interception(struct vcpu_svm *svm)
 static int invlpg_interception(struct vcpu_svm *svm)
 {
        if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
-               return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+               return kvm_emulate_instruction(&svm->vcpu, 0);
 
        kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
        return kvm_skip_emulated_instruction(&svm->vcpu);
@@ -3929,13 +3935,12 @@ static int invlpg_interception(struct vcpu_svm *svm)
 
 static int emulate_on_interception(struct vcpu_svm *svm)
 {
-       return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+       return kvm_emulate_instruction(&svm->vcpu, 0);
 }
 
 static int rsm_interception(struct vcpu_svm *svm)
 {
-       return kvm_emulate_instruction_from_buffer(&svm->vcpu,
-                                       rsm_ins_bytes, 2) == EMULATE_DONE;
+       return kvm_emulate_instruction_from_buffer(&svm->vcpu, rsm_ins_bytes, 2);
 }
 
 static int rdpmc_interception(struct vcpu_svm *svm)
@@ -4592,6 +4597,7 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
        int ret = 0;
        struct vcpu_svm *svm = to_svm(vcpu);
        u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
+       u32 id = kvm_xapic_id(vcpu->arch.apic);
 
        if (ldr == svm->ldr_reg)
                return 0;
@@ -4599,7 +4605,7 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
        avic_invalidate_logical_id_entry(vcpu);
 
        if (ldr)
-               ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr);
+               ret = avic_ldr_write(vcpu, id, ldr);
 
        if (!ret)
                svm->ldr_reg = ldr;
@@ -4611,8 +4617,7 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
 {
        u64 *old, *new;
        struct vcpu_svm *svm = to_svm(vcpu);
-       u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID);
-       u32 id = (apic_id_reg >> 24) & 0xff;
+       u32 id = kvm_xapic_id(vcpu->arch.apic);
 
        if (vcpu->vcpu_id == id)
                return 0;
@@ -4724,7 +4729,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
                ret = avic_unaccel_trap_write(svm);
        } else {
                /* Handling Fault */
-               ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
+               ret = kvm_emulate_instruction(&svm->vcpu, 0);
        }
 
        return ret;
@@ -4791,6 +4796,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_MONITOR]                      = monitor_interception,
        [SVM_EXIT_MWAIT]                        = mwait_interception,
        [SVM_EXIT_XSETBV]                       = xsetbv_interception,
+       [SVM_EXIT_RDPRU]                        = rdpru_interception,
        [SVM_EXIT_NPF]                          = npf_interception,
        [SVM_EXIT_RSM]                          = rsm_interception,
        [SVM_EXIT_AVIC_INCOMPLETE_IPI]          = avic_incomplete_ipi_interception,
@@ -7099,13 +7105,6 @@ failed:
        return ret;
 }
 
-static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
-                                  uint16_t *vmcs_version)
-{
-       /* Intel-only feature */
-       return -ENODEV;
-}
-
 static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
 {
        unsigned long cr4 = kvm_read_cr4(vcpu);
@@ -7311,7 +7310,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
        .mem_enc_reg_region = svm_register_enc_region,
        .mem_enc_unreg_region = svm_unregister_enc_region,
 
-       .nested_enable_evmcs = nested_enable_evmcs,
+       .nested_enable_evmcs = NULL,
        .nested_get_evmcs_version = NULL,
 
        .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
index d6664ee..7aa6971 100644 (file)
@@ -247,6 +247,12 @@ static inline bool vmx_xsaves_supported(void)
                SECONDARY_EXEC_XSAVES;
 }
 
+static inline bool vmx_waitpkg_supported(void)
+{
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+               SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+}
+
 static inline bool cpu_has_vmx_tsc_scaling(void)
 {
        return vmcs_config.cpu_based_2nd_exec_ctrl &
index 39a24ee..07ebf68 100644 (file)
@@ -178,6 +178,8 @@ static inline void evmcs_load(u64 phys_addr)
        struct hv_vp_assist_page *vp_ap =
                hv_get_vp_assist_page(smp_processor_id());
 
+       if (current_evmcs->hv_enlightenments_control.nested_flush_hypercall)
+               vp_ap->nested_control.features.directhypercall = 1;
        vp_ap->current_nested_vmcs = phys_addr;
        vp_ap->enlighten_vmentry = 1;
 }
index 1a10cd3..0e7c930 100644 (file)
@@ -198,6 +198,16 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
        pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
 }
 
+static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
+{
+       return fixed_bits_valid(control, low, high);
+}
+
+static inline u64 vmx_control_msr(u32 low, u32 high)
+{
+       return low | ((u64)high << 32);
+}
+
 static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
 {
        secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
@@ -866,16 +876,34 @@ static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
+                                      vmx->nested.msrs.misc_high);
+
+       return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
+}
+
 /*
  * Load guest's/host's msr at nested entry/exit.
  * return 0 for success, entry index for failure.
+ *
+ * One of the failure modes for MSR load/store is when a list exceeds the
+ * virtual hardware's capacity. To maintain compatibility with hardware inasmuch
+ * as possible, process all valid entries before failing rather than precheck
+ * for a capacity violation.
  */
 static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 {
        u32 i;
        struct vmx_msr_entry e;
+       u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
 
        for (i = 0; i < count; i++) {
+               if (unlikely(i >= max_msr_list_size))
+                       goto fail;
+
                if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
                                        &e, sizeof(e))) {
                        pr_debug_ratelimited(
@@ -906,8 +934,12 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
        u64 data;
        u32 i;
        struct vmx_msr_entry e;
+       u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
 
        for (i = 0; i < count; i++) {
+               if (unlikely(i >= max_msr_list_size))
+                       return -EINVAL;
+
                if (kvm_vcpu_read_guest(vcpu,
                                        gpa + i * sizeof(e),
                                        &e, 2 * sizeof(u32))) {
@@ -1013,17 +1045,6 @@ static u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
        return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid;
 }
 
-
-static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
-{
-       return fixed_bits_valid(control, low, high);
-}
-
-static inline u64 vmx_control_msr(u32 low, u32 high)
-{
-       return low | ((u64)high << 32);
-}
-
 static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
 {
        superset &= mask;
@@ -2089,6 +2110,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
                                  SECONDARY_EXEC_ENABLE_INVPCID |
                                  SECONDARY_EXEC_RDTSCP |
                                  SECONDARY_EXEC_XSAVES |
+                                 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
                                  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                                  SECONDARY_EXEC_APIC_REGISTER_VIRT |
                                  SECONDARY_EXEC_ENABLE_VMFUNC);
@@ -2588,7 +2610,7 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
 
                /* VM-entry exception error code */
                if (CC(has_error_code &&
-                      vmcs12->vm_entry_exception_error_code & GENMASK(31, 15)))
+                      vmcs12->vm_entry_exception_error_code & GENMASK(31, 16)))
                        return -EINVAL;
 
                /* VM-entry interruption-info field: reserved bits */
@@ -2642,8 +2664,23 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
            CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
                return -EINVAL;
 
-       ia32e = (vmcs12->vm_exit_controls &
-                VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
+#ifdef CONFIG_X86_64
+       ia32e = !!(vcpu->arch.efer & EFER_LMA);
+#else
+       ia32e = false;
+#endif
+
+       if (ia32e) {
+               if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
+                   CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
+                       return -EINVAL;
+       } else {
+               if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
+                   CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
+                   CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
+                   CC((vmcs12->host_rip) >> 32))
+                       return -EINVAL;
+       }
 
        if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
            CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
@@ -2662,7 +2699,8 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
            CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
            CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
            CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
-           CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)))
+           CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
+           CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
                return -EINVAL;
 #endif
 
@@ -2879,7 +2917,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
                                                 struct vmcs12 *vmcs12);
 
-static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
+static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 {
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
        struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2899,19 +2937,18 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
                        vmx->nested.apic_access_page = NULL;
                }
                page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
-               /*
-                * If translation failed, no matter: This feature asks
-                * to exit when accessing the given address, and if it
-                * can never be accessed, this feature won't do
-                * anything anyway.
-                */
                if (!is_error_page(page)) {
                        vmx->nested.apic_access_page = page;
                        hpa = page_to_phys(vmx->nested.apic_access_page);
                        vmcs_write64(APIC_ACCESS_ADDR, hpa);
                } else {
-                       secondary_exec_controls_clearbit(vmx,
-                               SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+                       pr_debug_ratelimited("%s: no backing 'struct page' for APIC-access address in vmcs12\n",
+                                            __func__);
+                       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+                       vcpu->run->internal.suberror =
+                               KVM_INTERNAL_ERROR_EMULATION;
+                       vcpu->run->internal.ndata = 0;
+                       return false;
                }
        }
 
@@ -2956,6 +2993,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
                exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
        else
                exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
+       return true;
 }
 
 /*
@@ -2994,13 +3032,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 /*
  * If from_vmentry is false, this is being called from state restore (either RSM
  * or KVM_SET_NESTED_STATE).  Otherwise it's called from vmlaunch/vmresume.
-+ *
-+ * Returns:
-+ *   0 - success, i.e. proceed with actual VMEnter
-+ *   1 - consistency check VMExit
-+ *  -1 - consistency check VMFail
+ *
+ * Returns:
+ *     NVMX_ENTRY_SUCCESS: Entered VMX non-root mode
+ *     NVMX_ENTRY_VMFAIL:  Consistency check VMFail
+ *     NVMX_ENTRY_VMEXIT:  Consistency check VMExit
+ *     NVMX_ENTRY_KVM_INTERNAL_ERROR: KVM internal error
  */
-int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
+enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+                                                       bool from_vmentry)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -3043,11 +3083,12 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
        prepare_vmcs02_early(vmx, vmcs12);
 
        if (from_vmentry) {
-               nested_get_vmcs12_pages(vcpu);
+               if (unlikely(!nested_get_vmcs12_pages(vcpu)))
+                       return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
 
                if (nested_vmx_check_vmentry_hw(vcpu)) {
                        vmx_switch_vmcs(vcpu, &vmx->vmcs01);
-                       return -1;
+                       return NVMX_VMENTRY_VMFAIL;
                }
 
                if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
@@ -3111,7 +3152,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
         * returned as far as L1 is concerned. It will only return (and set
         * the success flag) when L2 exits (see nested_vmx_vmexit()).
         */
-       return 0;
+       return NVMX_VMENTRY_SUCCESS;
 
        /*
         * A failed consistency check that leads to a VMExit during L1's
@@ -3127,14 +3168,14 @@ vmentry_fail_vmexit:
        vmx_switch_vmcs(vcpu, &vmx->vmcs01);
 
        if (!from_vmentry)
-               return 1;
+               return NVMX_VMENTRY_VMEXIT;
 
        load_vmcs12_host_state(vcpu, vmcs12);
        vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
        vmcs12->exit_qualification = exit_qual;
        if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
                vmx->nested.need_vmcs12_to_shadow_sync = true;
-       return 1;
+       return NVMX_VMENTRY_VMEXIT;
 }
 
 /*
@@ -3144,9 +3185,9 @@ vmentry_fail_vmexit:
 static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 {
        struct vmcs12 *vmcs12;
+       enum nvmx_vmentry_status status;
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
-       int ret;
 
        if (!nested_vmx_check_permission(vcpu))
                return 1;
@@ -3206,13 +3247,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
         * the nested entry.
         */
        vmx->nested.nested_run_pending = 1;
-       ret = nested_vmx_enter_non_root_mode(vcpu, true);
-       vmx->nested.nested_run_pending = !ret;
-       if (ret > 0)
-               return 1;
-       else if (ret)
-               return nested_vmx_failValid(vcpu,
-                       VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+       status = nested_vmx_enter_non_root_mode(vcpu, true);
+       if (unlikely(status != NVMX_VMENTRY_SUCCESS))
+               goto vmentry_failed;
 
        /* Hide L1D cache contents from the nested guest.  */
        vmx->vcpu.arch.l1tf_flush_l1d = true;
@@ -3243,6 +3280,15 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
                return kvm_vcpu_halt(vcpu);
        }
        return 1;
+
+vmentry_failed:
+       vmx->nested.nested_run_pending = 0;
+       if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR)
+               return 0;
+       if (status == NVMX_VMENTRY_VMEXIT)
+               return 1;
+       WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL);
+       return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 }
 
 /*
@@ -5441,6 +5487,10 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
        case EXIT_REASON_ENCLS:
                /* SGX is never exposed to L1 */
                return false;
+       case EXIT_REASON_UMWAIT:
+       case EXIT_REASON_TPAUSE:
+               return nested_cpu_has2(vmcs12,
+                       SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE);
        default:
                return true;
        }
index 187d39b..6280f33 100644 (file)
@@ -6,6 +6,16 @@
 #include "vmcs12.h"
 #include "vmx.h"
 
+/*
+ * Status returned by nested_vmx_enter_non_root_mode():
+ */
+enum nvmx_vmentry_status {
+       NVMX_VMENTRY_SUCCESS,           /* Entered VMX non-root mode */
+       NVMX_VMENTRY_VMFAIL,            /* Consistency check VMFail */
+       NVMX_VMENTRY_VMEXIT,            /* Consistency check VMExit */
+       NVMX_VMENTRY_KVM_INTERNAL_ERROR,/* KVM internal error */
+};
+
 void vmx_leave_nested(struct kvm_vcpu *vcpu);
 void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
                                bool apicv);
@@ -13,7 +23,8 @@ void nested_vmx_hardware_unsetup(void);
 __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
 void nested_vmx_vcpu_setup(void);
 void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
-int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
+enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+                                                    bool from_vmentry);
 bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
 void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
                       u32 exit_intr_info, unsigned long exit_qualification);
index 2200fb6..45eaede 100644 (file)
 #include "vmcs.h"
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
-#define __ex_clear(x, reg) \
-       ____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg)
+
+asmlinkage void vmread_error(unsigned long field, bool fault);
+void vmwrite_error(unsigned long field, unsigned long value);
+void vmclear_error(struct vmcs *vmcs, u64 phys_addr);
+void vmptrld_error(struct vmcs *vmcs, u64 phys_addr);
+void invvpid_error(unsigned long ext, u16 vpid, gva_t gva);
+void invept_error(unsigned long ext, u64 eptp, gpa_t gpa);
 
 static __always_inline void vmcs_check16(unsigned long field)
 {
@@ -62,8 +67,22 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
 {
        unsigned long value;
 
-       asm volatile (__ex_clear("vmread %1, %0", "%k0")
-                     : "=r"(value) : "r"(field));
+       asm volatile("1: vmread %2, %1\n\t"
+                    ".byte 0x3e\n\t" /* branch taken hint */
+                    "ja 3f\n\t"
+                    "mov %2, %%" _ASM_ARG1 "\n\t"
+                    "xor %%" _ASM_ARG2 ", %%" _ASM_ARG2 "\n\t"
+                    "2: call vmread_error\n\t"
+                    "xor %k1, %k1\n\t"
+                    "3:\n\t"
+
+                    ".pushsection .fixup, \"ax\"\n\t"
+                    "4: mov %2, %%" _ASM_ARG1 "\n\t"
+                    "mov $1, %%" _ASM_ARG2 "\n\t"
+                    "jmp 2b\n\t"
+                    ".popsection\n\t"
+                    _ASM_EXTABLE(1b, 4b)
+                    : ASM_CALL_CONSTRAINT, "=r"(value) : "r"(field) : "cc");
        return value;
 }
 
@@ -103,21 +122,39 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)
        return __vmcs_readl(field);
 }
 
-static noinline void vmwrite_error(unsigned long field, unsigned long value)
-{
-       printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
-              field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
-       dump_stack();
-}
+#define vmx_asm1(insn, op1, error_args...)                             \
+do {                                                                   \
+       asm_volatile_goto("1: " __stringify(insn) " %0\n\t"             \
+                         ".byte 0x2e\n\t" /* branch not taken hint */  \
+                         "jna %l[error]\n\t"                           \
+                         _ASM_EXTABLE(1b, %l[fault])                   \
+                         : : op1 : "cc" : error, fault);               \
+       return;                                                         \
+error:                                                                 \
+       insn##_error(error_args);                                       \
+       return;                                                         \
+fault:                                                                 \
+       kvm_spurious_fault();                                           \
+} while (0)
+
+#define vmx_asm2(insn, op1, op2, error_args...)                                \
+do {                                                                   \
+       asm_volatile_goto("1: "  __stringify(insn) " %1, %0\n\t"        \
+                         ".byte 0x2e\n\t" /* branch not taken hint */  \
+                         "jna %l[error]\n\t"                           \
+                         _ASM_EXTABLE(1b, %l[fault])                   \
+                         : : op1, op2 : "cc" : error, fault);          \
+       return;                                                         \
+error:                                                                 \
+       insn##_error(error_args);                                       \
+       return;                                                         \
+fault:                                                                 \
+       kvm_spurious_fault();                                           \
+} while (0)
 
 static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
 {
-       bool error;
-
-       asm volatile (__ex("vmwrite %2, %1") CC_SET(na)
-                     : CC_OUT(na) (error) : "r"(field), "rm"(value));
-       if (unlikely(error))
-               vmwrite_error(field, value);
+       vmx_asm2(vmwrite, "r"(field), "rm"(value), field, value);
 }
 
 static __always_inline void vmcs_write16(unsigned long field, u16 value)
@@ -182,28 +219,18 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
 static inline void vmcs_clear(struct vmcs *vmcs)
 {
        u64 phys_addr = __pa(vmcs);
-       bool error;
 
-       asm volatile (__ex("vmclear %1") CC_SET(na)
-                     : CC_OUT(na) (error) : "m"(phys_addr));
-       if (unlikely(error))
-               printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
-                      vmcs, phys_addr);
+       vmx_asm1(vmclear, "m"(phys_addr), vmcs, phys_addr);
 }
 
 static inline void vmcs_load(struct vmcs *vmcs)
 {
        u64 phys_addr = __pa(vmcs);
-       bool error;
 
        if (static_branch_unlikely(&enable_evmcs))
                return evmcs_load(phys_addr);
 
-       asm volatile (__ex("vmptrld %1") CC_SET(na)
-                     : CC_OUT(na) (error) : "m"(phys_addr));
-       if (unlikely(error))
-               printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
-                      vmcs, phys_addr);
+       vmx_asm1(vmptrld, "m"(phys_addr), vmcs, phys_addr);
 }
 
 static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
@@ -213,11 +240,8 @@ static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
                u64 rsvd : 48;
                u64 gva;
        } operand = { vpid, 0, gva };
-       bool error;
 
-       asm volatile (__ex("invvpid %2, %1") CC_SET(na)
-                     : CC_OUT(na) (error) : "r"(ext), "m"(operand));
-       BUG_ON(error);
+       vmx_asm2(invvpid, "r"(ext), "m"(operand), ext, vpid, gva);
 }
 
 static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
@@ -225,11 +249,8 @@ static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
        struct {
                u64 eptp, gpa;
        } operand = {eptp, gpa};
-       bool error;
 
-       asm volatile (__ex("invept %2, %1") CC_SET(na)
-                     : CC_OUT(na) (error) : "r"(ext), "m"(operand));
-       BUG_ON(error);
+       vmx_asm2(invept, "r"(ext), "m"(operand), ext, eptp, gpa);
 }
 
 static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr)
index 4dea0e0..3e9c059 100644 (file)
@@ -262,6 +262,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 {
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+       struct x86_pmu_capability x86_pmu;
        struct kvm_cpuid_entry2 *entry;
        union cpuid10_eax eax;
        union cpuid10_edx edx;
@@ -283,8 +284,10 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
        if (!pmu->version)
                return;
 
+       perf_get_x86_pmu_capability(&x86_pmu);
+
        pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
-                                       INTEL_PMC_MAX_GENERIC);
+                                        x86_pmu.num_counters_gp);
        pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
        pmu->available_event_types = ~entry->ebx &
                                        ((1ull << eax.split.mask_length) - 1);
@@ -294,7 +297,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
        } else {
                pmu->nr_arch_fixed_counters =
                        min_t(int, edx.split.num_counters_fixed,
-                               INTEL_PMC_MAX_FIXED);
+                             x86_pmu.num_counters_fixed);
                pmu->counter_bitmask[KVM_PMC_FIXED] =
                        ((u64)1 << edx.split.bit_width_fixed) - 1;
        }
index 4a99be1..5d21a4a 100644 (file)
@@ -209,6 +209,11 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
        struct page *page;
        unsigned int i;
 
+       if (!boot_cpu_has_bug(X86_BUG_L1TF)) {
+               l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+               return 0;
+       }
+
        if (!enable_ept) {
                l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
                return 0;
@@ -343,6 +348,48 @@ static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bit
 
 void vmx_vmexit(void);
 
+#define vmx_insn_failed(fmt...)                \
+do {                                   \
+       WARN_ONCE(1, fmt);              \
+       pr_warn_ratelimited(fmt);       \
+} while (0)
+
+asmlinkage void vmread_error(unsigned long field, bool fault)
+{
+       if (fault)
+               kvm_spurious_fault();
+       else
+               vmx_insn_failed("kvm: vmread failed: field=%lx\n", field);
+}
+
+noinline void vmwrite_error(unsigned long field, unsigned long value)
+{
+       vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n",
+                       field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
+}
+
+noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
+{
+       vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr);
+}
+
+noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
+{
+       vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr);
+}
+
+noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
+{
+       vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
+                       ext, vpid, gva);
+}
+
+noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
+{
+       vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
+                       ext, eptp, gpa);
+}
+
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 /*
@@ -486,6 +533,31 @@ static int hv_remote_flush_tlb(struct kvm *kvm)
        return hv_remote_flush_tlb_with_range(kvm, NULL);
 }
 
+static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
+{
+       struct hv_enlightened_vmcs *evmcs;
+       struct hv_partition_assist_pg **p_hv_pa_pg =
+                       &vcpu->kvm->arch.hyperv.hv_pa_pg;
+       /*
+        * Synthetic VM-Exit is not enabled in current code and so All
+        * evmcs in singe VM shares same assist page.
+        */
+       if (!*p_hv_pa_pg)
+               *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+
+       if (!*p_hv_pa_pg)
+               return -ENOMEM;
+
+       evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
+
+       evmcs->partition_assist_page =
+               __pa(*p_hv_pa_pg);
+       evmcs->hv_vm_id = (unsigned long)vcpu->kvm;
+       evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
+
+       return 0;
+}
+
 #endif /* IS_ENABLED(CONFIG_HYPERV) */
 
 /*
@@ -897,17 +969,9 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
        u64 guest_efer = vmx->vcpu.arch.efer;
        u64 ignore_bits = 0;
 
-       if (!enable_ept) {
-               /*
-                * NX is needed to handle CR0.WP=1, CR4.SMEP=1.  Testing
-                * host CPUID is more efficient than testing guest CPUID
-                * or CR4.  Host SMEP is anyway a requirement for guest SMEP.
-                */
-               if (boot_cpu_has(X86_FEATURE_SMEP))
-                       guest_efer |= EFER_NX;
-               else if (!(guest_efer & EFER_NX))
-                       ignore_bits |= EFER_NX;
-       }
+       /* Shadow paging assumes NX to be available.  */
+       if (!enable_ept)
+               guest_efer |= EFER_NX;
 
        /*
         * LMA and LME handled by hardware; SCE meaningless outside long mode.
@@ -1472,27 +1536,32 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
        return 0;
 }
 
-/*
- * Returns an int to be compatible with SVM implementation (which can fail).
- * Do not use directly, use skip_emulated_instruction() instead.
- */
-static int __skip_emulated_instruction(struct kvm_vcpu *vcpu)
+static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
        unsigned long rip;
 
-       rip = kvm_rip_read(vcpu);
-       rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
-       kvm_rip_write(vcpu, rip);
+       /*
+        * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
+        * undefined behavior: Intel's SDM doesn't mandate the VMCS field be
+        * set when EPT misconfig occurs.  In practice, real hardware updates
+        * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors
+        * (namely Hyper-V) don't set it due to it being undefined behavior,
+        * i.e. we end up advancing IP with some random value.
+        */
+       if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
+           to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
+               rip = kvm_rip_read(vcpu);
+               rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+               kvm_rip_write(vcpu, rip);
+       } else {
+               if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
+                       return 0;
+       }
 
        /* skipping an emulated instruction also counts */
        vmx_set_interrupt_shadow(vcpu, 0);
 
-       return EMULATE_DONE;
-}
-
-static inline void skip_emulated_instruction(struct kvm_vcpu *vcpu)
-{
-       (void)__skip_emulated_instruction(vcpu);
+       return 1;
 }
 
 static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
@@ -1527,8 +1596,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
                int inc_eip = 0;
                if (kvm_exception_is_soft(nr))
                        inc_eip = vcpu->arch.event_exit_inst_len;
-               if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
-                       kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+               kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
                return;
        }
 
@@ -1700,6 +1768,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 #endif
        case MSR_EFER:
                return kvm_get_msr_common(vcpu, msr_info);
+       case MSR_IA32_UMWAIT_CONTROL:
+               if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
+                       return 1;
+
+               msr_info->data = vmx->msr_ia32_umwait_control;
+               break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr_info->host_initiated &&
                    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -1873,6 +1947,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        return 1;
                vmcs_write64(GUEST_BNDCFGS, data);
                break;
+       case MSR_IA32_UMWAIT_CONTROL:
+               if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
+                       return 1;
+
+               /* The reserved bit 1 and non-32 bit [63:32] should be zero */
+               if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32)))
+                       return 1;
+
+               vmx->msr_ia32_umwait_control = data;
+               break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr_info->host_initiated &&
                    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -2290,6 +2374,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
                        SECONDARY_EXEC_RDRAND_EXITING |
                        SECONDARY_EXEC_ENABLE_PML |
                        SECONDARY_EXEC_TSC_SCALING |
+                       SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
                        SECONDARY_EXEC_PT_USE_GPA |
                        SECONDARY_EXEC_PT_CONCEAL_VMX |
                        SECONDARY_EXEC_ENABLE_VMFUNC |
@@ -4026,6 +4111,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
                }
        }
 
+       if (vmx_waitpkg_supported()) {
+               bool waitpkg_enabled =
+                       guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG);
+
+               if (!waitpkg_enabled)
+                       exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+
+               if (nested) {
+                       if (waitpkg_enabled)
+                               vmx->nested.msrs.secondary_ctls_high |=
+                                       SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+                       else
+                               vmx->nested.msrs.secondary_ctls_high &=
+                                       ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+               }
+       }
+
        vmx->secondary_exec_control = exec_control;
 }
 
@@ -4160,6 +4262,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
        vmx->rmode.vm86_active = 0;
        vmx->spec_ctrl = 0;
 
+       vmx->msr_ia32_umwait_control = 0;
+
        vcpu->arch.microcode_version = 0x100000000ULL;
        vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
        vmx->hv_deadline_tsc = -1;
@@ -4277,8 +4381,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
                int inc_eip = 0;
                if (vcpu->arch.interrupt.soft)
                        inc_eip = vcpu->arch.event_exit_inst_len;
-               if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
-                       kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+               kvm_inject_realmode_interrupt(vcpu, irq, inc_eip);
                return;
        }
        intr = irq | INTR_INFO_VALID_MASK;
@@ -4314,8 +4417,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
        vmx->loaded_vmcs->nmi_known_unmasked = false;
 
        if (vmx->rmode.vm86_active) {
-               if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
-                       kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+               kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0);
                return;
        }
 
@@ -4442,7 +4544,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
         * Cause the #SS fault with 0 error code in VM86 mode.
         */
        if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
-               if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
+               if (kvm_emulate_instruction(vcpu, 0)) {
                        if (vcpu->arch.halt_request) {
                                vcpu->arch.halt_request = 0;
                                return kvm_vcpu_halt(vcpu);
@@ -4493,7 +4595,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
        u32 intr_info, ex_no, error_code;
        unsigned long cr2, rip, dr6;
        u32 vect_info;
-       enum emulation_result er;
 
        vect_info = vmx->idt_vectoring_info;
        intr_info = vmx->exit_intr_info;
@@ -4510,13 +4611,17 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 
        if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
                WARN_ON_ONCE(!enable_vmware_backdoor);
-               er = kvm_emulate_instruction(vcpu,
-                       EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
-               if (er == EMULATE_USER_EXIT)
-                       return 0;
-               else if (er != EMULATE_DONE)
+
+               /*
+                * VMware backdoor emulation on #GP interception only handles
+                * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero
+                * error code on #GP.
+                */
+               if (error_code) {
                        kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
-               return 1;
+                       return 1;
+               }
+               return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
        }
 
        /*
@@ -4558,7 +4663,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
                        vcpu->arch.dr6 &= ~DR_TRAP_BITS;
                        vcpu->arch.dr6 |= dr6 | DR6_RTM;
                        if (is_icebp(intr_info))
-                               skip_emulated_instruction(vcpu);
+                               WARN_ON(!skip_emulated_instruction(vcpu));
 
                        kvm_queue_exception(vcpu, DB_VECTOR);
                        return 1;
@@ -4613,7 +4718,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
        ++vcpu->stat.io_exits;
 
        if (string)
-               return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+               return kvm_emulate_instruction(vcpu, 0);
 
        port = exit_qualification >> 16;
        size = (exit_qualification & 7) + 1;
@@ -4687,7 +4792,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
 static int handle_desc(struct kvm_vcpu *vcpu)
 {
        WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
-       return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+       return kvm_emulate_instruction(vcpu, 0);
 }
 
 static int handle_cr(struct kvm_vcpu *vcpu)
@@ -4903,7 +5008,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
 
 static int handle_invd(struct kvm_vcpu *vcpu)
 {
-       return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+       return kvm_emulate_instruction(vcpu, 0);
 }
 
 static int handle_invlpg(struct kvm_vcpu *vcpu)
@@ -4937,20 +5042,6 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
        return 1;
 }
 
-static int handle_xsaves(struct kvm_vcpu *vcpu)
-{
-       kvm_skip_emulated_instruction(vcpu);
-       WARN(1, "this should never happen\n");
-       return 1;
-}
-
-static int handle_xrstors(struct kvm_vcpu *vcpu)
-{
-       kvm_skip_emulated_instruction(vcpu);
-       WARN(1, "this should never happen\n");
-       return 1;
-}
-
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
        if (likely(fasteoi)) {
@@ -4970,7 +5061,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
                        return kvm_skip_emulated_instruction(vcpu);
                }
        }
-       return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+       return kvm_emulate_instruction(vcpu, 0);
 }
 
 static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
@@ -5039,23 +5130,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
        if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
                       type != INTR_TYPE_EXT_INTR &&
                       type != INTR_TYPE_NMI_INTR))
-               skip_emulated_instruction(vcpu);
-
-       if (kvm_task_switch(vcpu, tss_selector,
-                           type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
-                           has_error_code, error_code) == EMULATE_FAIL) {
-               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-               vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-               vcpu->run->internal.ndata = 0;
-               return 0;
-       }
+               WARN_ON(!skip_emulated_instruction(vcpu));
 
        /*
         * TODO: What about debug traps on tss switch?
         *       Are we supposed to inject them and update dr6?
         */
-
-       return 1;
+       return kvm_task_switch(vcpu, tss_selector,
+                              type == INTR_TYPE_SOFT_INTR ? idt_index : -1,
+                              reason, has_error_code, error_code);
 }
 
 static int handle_ept_violation(struct kvm_vcpu *vcpu)
@@ -5114,21 +5197,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
        if (!is_guest_mode(vcpu) &&
            !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
                trace_kvm_fast_mmio(gpa);
-               /*
-                * Doing kvm_skip_emulated_instruction() depends on undefined
-                * behavior: Intel's manual doesn't mandate
-                * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
-                * occurs and while on real hardware it was observed to be set,
-                * other hypervisors (namely Hyper-V) don't set it, we end up
-                * advancing IP with some random value. Disable fast mmio when
-                * running nested and keep it for real hardware in hope that
-                * VM_EXIT_INSTRUCTION_LEN will always be set correctly.
-                */
-               if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
-                       return kvm_skip_emulated_instruction(vcpu);
-               else
-                       return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
-                                                               EMULATE_DONE;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
@@ -5147,8 +5216,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
 static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       enum emulation_result err = EMULATE_DONE;
-       int ret = 1;
        bool intr_window_requested;
        unsigned count = 130;
 
@@ -5169,41 +5236,35 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
                if (kvm_test_request(KVM_REQ_EVENT, vcpu))
                        return 1;
 
-               err = kvm_emulate_instruction(vcpu, 0);
-
-               if (err == EMULATE_USER_EXIT) {
-                       ++vcpu->stat.mmio_exits;
-                       ret = 0;
-                       goto out;
-               }
-
-               if (err != EMULATE_DONE)
-                       goto emulation_error;
+               if (!kvm_emulate_instruction(vcpu, 0))
+                       return 0;
 
                if (vmx->emulation_required && !vmx->rmode.vm86_active &&
-                   vcpu->arch.exception.pending)
-                       goto emulation_error;
+                   vcpu->arch.exception.pending) {
+                       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+                       vcpu->run->internal.suberror =
+                                               KVM_INTERNAL_ERROR_EMULATION;
+                       vcpu->run->internal.ndata = 0;
+                       return 0;
+               }
 
                if (vcpu->arch.halt_request) {
                        vcpu->arch.halt_request = 0;
-                       ret = kvm_vcpu_halt(vcpu);
-                       goto out;
+                       return kvm_vcpu_halt(vcpu);
                }
 
+               /*
+                * Note, return 1 and not 0, vcpu_run() is responsible for
+                * morphing the pending signal into the proper return code.
+                */
                if (signal_pending(current))
-                       goto out;
+                       return 1;
+
                if (need_resched())
                        schedule();
        }
 
-out:
-       return ret;
-
-emulation_error:
-       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-       vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-       vcpu->run->internal.ndata = 0;
-       return 0;
+       return 1;
 }
 
 static void grow_ple_window(struct kvm_vcpu *vcpu)
@@ -5525,8 +5586,6 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_INVVPID]                 = handle_vmx_instruction,
        [EXIT_REASON_RDRAND]                  = handle_invalid_op,
        [EXIT_REASON_RDSEED]                  = handle_invalid_op,
-       [EXIT_REASON_XSAVES]                  = handle_xsaves,
-       [EXIT_REASON_XRSTORS]                 = handle_xrstors,
        [EXIT_REASON_PML_FULL]                = handle_pml_full,
        [EXIT_REASON_INVPCID]                 = handle_invpcid,
        [EXIT_REASON_VMFUNC]                  = handle_vmx_instruction,
@@ -6362,6 +6421,23 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
                                        msrs[i].host, false);
 }
 
+static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx)
+{
+       u32 host_umwait_control;
+
+       if (!vmx_has_waitpkg(vmx))
+               return;
+
+       host_umwait_control = get_umwait_control_msr();
+
+       if (vmx->msr_ia32_umwait_control != host_umwait_control)
+               add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL,
+                       vmx->msr_ia32_umwait_control,
+                       host_umwait_control, false);
+       else
+               clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL);
+}
+
 static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6456,6 +6532,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
        pt_guest_enter(vmx);
 
        atomic_switch_perf_msrs(vmx);
+       atomic_switch_umwait_control_msr(vmx);
 
        if (enable_preemption_timer)
                vmx_update_hv_timer(vcpu);
@@ -6511,6 +6588,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
                current_evmcs->hv_clean_fields |=
                        HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 
+       if (static_branch_unlikely(&enable_evmcs))
+               current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index;
+
        /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
        if (vmx->host_debugctlmsr)
                update_debugctlmsr(vmx->host_debugctlmsr);
@@ -6578,6 +6658,7 @@ static struct kvm *vmx_vm_alloc(void)
 
 static void vmx_vm_free(struct kvm *kvm)
 {
+       kfree(kvm->arch.hyperv.hv_pa_pg);
        vfree(to_kvm_vmx(kvm));
 }
 
@@ -7706,7 +7787,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 
        .run = vmx_vcpu_run,
        .handle_exit = vmx_handle_exit,
-       .skip_emulated_instruction = __skip_emulated_instruction,
+       .skip_emulated_instruction = skip_emulated_instruction,
        .set_interrupt_shadow = vmx_set_interrupt_shadow,
        .get_interrupt_shadow = vmx_get_interrupt_shadow,
        .patch_hypercall = vmx_patch_hypercall,
@@ -7837,6 +7918,7 @@ static void vmx_exit(void)
                        if (!vp_ap)
                                continue;
 
+                       vp_ap->nested_control.features.directhypercall = 0;
                        vp_ap->current_nested_vmcs = 0;
                        vp_ap->enlighten_vmentry = 0;
                }
@@ -7876,6 +7958,11 @@ static int __init vmx_init(void)
                        pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
                        static_branch_enable(&enable_evmcs);
                }
+
+               if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
+                       vmx_x86_ops.enable_direct_tlbflush
+                               = hv_enable_direct_tlbflush;
+
        } else {
                enlightened_vmcs = false;
        }
@@ -7893,12 +7980,10 @@ static int __init vmx_init(void)
         * contain 'auto' which will be turned into the default 'cond'
         * mitigation mode.
         */
-       if (boot_cpu_has(X86_BUG_L1TF)) {
-               r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
-               if (r) {
-                       vmx_exit();
-                       return r;
-               }
+       r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
+       if (r) {
+               vmx_exit();
+               return r;
        }
 
 #ifdef CONFIG_KEXEC_CORE
index 64d5a48..bee1668 100644 (file)
@@ -14,6 +14,8 @@
 extern const u32 vmx_msr_index[];
 extern u64 host_efer;
 
+extern u32 get_umwait_control_msr(void);
+
 #define MSR_TYPE_R     1
 #define MSR_TYPE_W     2
 #define MSR_TYPE_RW    3
@@ -211,6 +213,7 @@ struct vcpu_vmx {
 #endif
 
        u64                   spec_ctrl;
+       u32                   msr_ia32_umwait_control;
 
        u32 secondary_exec_control;
 
@@ -497,6 +500,12 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
        vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
 }
 
+static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
+{
+       return vmx->secondary_exec_control &
+               SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+}
+
 void dump_vmcs(void);
 
 #endif /* __KVM_X86_VMX_H */
index dfd6412..ff395f8 100644 (file)
@@ -92,8 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
 static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 #endif
 
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
+#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
 
 #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
                                     KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
@@ -212,7 +212,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
        { "mmu_unsync", VM_STAT(mmu_unsync) },
        { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
-       { "largepages", VM_STAT(lpages) },
+       { "largepages", VM_STAT(lpages, .mode = 0444) },
        { "max_mmu_page_hash_collisions",
                VM_STAT(max_mmu_page_hash_collisions) },
        { NULL }
@@ -360,7 +360,7 @@ EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 asmlinkage __visible void kvm_spurious_fault(void)
 {
        /* Fault while not rebooting.  We want the trace. */
-       BUG();
+       BUG_ON(!kvm_rebooting);
 }
 EXPORT_SYMBOL_GPL(kvm_spurious_fault);
 
@@ -884,34 +884,42 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 }
 EXPORT_SYMBOL_GPL(kvm_set_xcr);
 
-int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-       unsigned long old_cr4 = kvm_read_cr4(vcpu);
-       unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
-                                  X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
-
        if (cr4 & CR4_RESERVED_BITS)
-               return 1;
+               return -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
-               return 1;
+               return -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
-               return 1;
+               return -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
-               return 1;
+               return -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
-               return 1;
+               return -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))
-               return 1;
+               return -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
-               return 1;
+               return -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
+               return -EINVAL;
+
+       return 0;
+}
+
+int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+       unsigned long old_cr4 = kvm_read_cr4(vcpu);
+       unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+                                  X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
+
+       if (kvm_valid_cr4(vcpu, cr4))
                return 1;
 
        if (is_long_mode(vcpu)) {
@@ -1145,6 +1153,30 @@ static u32 msrs_to_save[] = {
        MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
        MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
        MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
+       MSR_IA32_UMWAIT_CONTROL,
+
+       MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
+       MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
+       MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
+       MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
+       MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
+       MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
+       MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
+       MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
+       MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
+       MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
+       MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
+       MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
+       MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
+       MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
 };
 
 static unsigned num_msrs_to_save;
@@ -2504,6 +2536,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 static void kvmclock_reset(struct kvm_vcpu *vcpu)
 {
        vcpu->arch.pv_time_enabled = false;
+       vcpu->arch.time = 0;
 }
 
 static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
@@ -2669,8 +2702,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_KVM_SYSTEM_TIME: {
                struct kvm_arch *ka = &vcpu->kvm->arch;
 
-               kvmclock_reset(vcpu);
-
                if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
                        bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
 
@@ -2684,14 +2715,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
 
                /* we verify if the enable bit is set... */
+               vcpu->arch.pv_time_enabled = false;
                if (!(data & 1))
                        break;
 
-               if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+               if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
                     &vcpu->arch.pv_time, data & ~1ULL,
                     sizeof(struct pvclock_vcpu_time_info)))
-                       vcpu->arch.pv_time_enabled = false;
-               else
                        vcpu->arch.pv_time_enabled = true;
 
                break;
@@ -3169,7 +3199,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_HYPERV_EVENTFD:
        case KVM_CAP_HYPERV_TLBFLUSH:
        case KVM_CAP_HYPERV_SEND_IPI:
-       case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
        case KVM_CAP_HYPERV_CPUID:
        case KVM_CAP_PCI_SEGMENT:
        case KVM_CAP_DEBUGREGS:
@@ -3246,6 +3275,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                r = kvm_x86_ops->get_nested_state ?
                        kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
                break;
+       case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
+               r = kvm_x86_ops->enable_direct_tlbflush != NULL;
+               break;
+       case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
+               r = kvm_x86_ops->nested_enable_evmcs != NULL;
+               break;
        default:
                break;
        }
@@ -4019,6 +4054,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
                                r = -EFAULT;
                }
                return r;
+       case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
+               if (!kvm_x86_ops->enable_direct_tlbflush)
+                       return -ENOTTY;
+
+               return kvm_x86_ops->enable_direct_tlbflush(vcpu);
 
        default:
                return -EINVAL;
@@ -5048,9 +5088,15 @@ out:
 
 static void kvm_init_msr_list(void)
 {
+       struct x86_pmu_capability x86_pmu;
        u32 dummy[2];
        unsigned i, j;
 
+       BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
+                        "Please update the fixed PMCs in msrs_to_save[]");
+
+       perf_get_x86_pmu_capability(&x86_pmu);
+
        for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
                if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
                        continue;
@@ -5091,6 +5137,15 @@ static void kvm_init_msr_list(void)
                                intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
                                continue;
                        break;
+               case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
+                       if (msrs_to_save[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
+                           min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
+                               continue;
+                       break;
+               case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
+                       if (msrs_to_save[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
+                           min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
+                               continue;
                }
                default:
                        break;
@@ -5389,7 +5444,6 @@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
 int handle_ud(struct kvm_vcpu *vcpu)
 {
        int emul_type = EMULTYPE_TRAP_UD;
-       enum emulation_result er;
        char sig[5]; /* ud2; .ascii "kvm" */
        struct x86_exception e;
 
@@ -5398,15 +5452,10 @@ int handle_ud(struct kvm_vcpu *vcpu)
                                sig, sizeof(sig), &e) == 0 &&
            memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
                kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
-               emul_type = 0;
+               emul_type = EMULTYPE_TRAP_UD_FORCED;
        }
 
-       er = kvm_emulate_instruction(vcpu, emul_type);
-       if (er == EMULATE_USER_EXIT)
-               return 0;
-       if (er != EMULATE_DONE)
-               kvm_queue_exception(vcpu, UD_VECTOR);
-       return 1;
+       return kvm_emulate_instruction(vcpu, emul_type);
 }
 EXPORT_SYMBOL_GPL(handle_ud);
 
@@ -6228,7 +6277,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
        vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
 }
 
-int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
+void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 {
        struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
        int ret;
@@ -6240,37 +6289,43 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
        ctxt->_eip = ctxt->eip + inc_eip;
        ret = emulate_int_real(ctxt, irq);
 
-       if (ret != X86EMUL_CONTINUE)
-               return EMULATE_FAIL;
-
-       ctxt->eip = ctxt->_eip;
-       kvm_rip_write(vcpu, ctxt->eip);
-       kvm_set_rflags(vcpu, ctxt->eflags);
-
-       return EMULATE_DONE;
+       if (ret != X86EMUL_CONTINUE) {
+               kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+       } else {
+               ctxt->eip = ctxt->_eip;
+               kvm_rip_write(vcpu, ctxt->eip);
+               kvm_set_rflags(vcpu, ctxt->eflags);
+       }
 }
 EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
 
 static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
 {
-       int r = EMULATE_DONE;
-
        ++vcpu->stat.insn_emulation_fail;
        trace_kvm_emulate_insn_failed(vcpu);
 
-       if (emulation_type & EMULTYPE_NO_UD_ON_FAIL)
-               return EMULATE_FAIL;
+       if (emulation_type & EMULTYPE_VMWARE_GP) {
+               kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+               return 1;
+       }
 
-       if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
+       if (emulation_type & EMULTYPE_SKIP) {
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
                vcpu->run->internal.ndata = 0;
-               r = EMULATE_USER_EXIT;
+               return 0;
        }
 
        kvm_queue_exception(vcpu, UD_VECTOR);
 
-       return r;
+       if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
+               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+               vcpu->run->internal.ndata = 0;
+               return 0;
+       }
+
+       return 1;
 }
 
 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
@@ -6425,7 +6480,7 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
        return dr6;
 }
 
-static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
+static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *kvm_run = vcpu->run;
 
@@ -6434,10 +6489,10 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
                kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
                kvm_run->debug.arch.exception = DB_VECTOR;
                kvm_run->exit_reason = KVM_EXIT_DEBUG;
-               *r = EMULATE_USER_EXIT;
-       } else {
-               kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
+               return 0;
        }
+       kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
+       return 1;
 }
 
 int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
@@ -6446,7 +6501,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
        int r;
 
        r = kvm_x86_ops->skip_emulated_instruction(vcpu);
-       if (unlikely(r != EMULATE_DONE))
+       if (unlikely(!r))
                return 0;
 
        /*
@@ -6458,8 +6513,8 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
         * that sets the TF flag".
         */
        if (unlikely(rflags & X86_EFLAGS_TF))
-               kvm_vcpu_do_singlestep(vcpu, &r);
-       return r == EMULATE_DONE;
+               r = kvm_vcpu_do_singlestep(vcpu);
+       return r;
 }
 EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
 
@@ -6478,7 +6533,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
                        kvm_run->debug.arch.pc = eip;
                        kvm_run->debug.arch.exception = DB_VECTOR;
                        kvm_run->exit_reason = KVM_EXIT_DEBUG;
-                       *r = EMULATE_USER_EXIT;
+                       *r = 0;
                        return true;
                }
        }
@@ -6494,7 +6549,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
                        vcpu->arch.dr6 &= ~DR_TRAP_BITS;
                        vcpu->arch.dr6 |= dr6 | DR6_RTM;
                        kvm_queue_exception(vcpu, DB_VECTOR);
-                       *r = EMULATE_DONE;
+                       *r = 1;
                        return true;
                }
        }
@@ -6578,11 +6633,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                trace_kvm_emulate_insn_start(vcpu);
                ++vcpu->stat.insn_emulation;
                if (r != EMULATION_OK)  {
-                       if (emulation_type & EMULTYPE_TRAP_UD)
-                               return EMULATE_FAIL;
+                       if ((emulation_type & EMULTYPE_TRAP_UD) ||
+                           (emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
+                               kvm_queue_exception(vcpu, UD_VECTOR);
+                               return 1;
+                       }
                        if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
                                                emulation_type))
-                               return EMULATE_DONE;
+                               return 1;
                        if (ctxt->have_exception) {
                                /*
                                 * #UD should result in just EMULATION_FAILED, and trap-like
@@ -6591,28 +6649,32 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                                WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
                                             exception_type(ctxt->exception.vector) == EXCPT_TRAP);
                                inject_emulated_exception(vcpu);
-                               return EMULATE_DONE;
+                               return 1;
                        }
-                       if (emulation_type & EMULTYPE_SKIP)
-                               return EMULATE_FAIL;
                        return handle_emulation_failure(vcpu, emulation_type);
                }
        }
 
-       if ((emulation_type & EMULTYPE_VMWARE) &&
-           !is_vmware_backdoor_opcode(ctxt))
-               return EMULATE_FAIL;
+       if ((emulation_type & EMULTYPE_VMWARE_GP) &&
+           !is_vmware_backdoor_opcode(ctxt)) {
+               kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+               return 1;
+       }
 
+       /*
+        * Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks
+        * for kvm_skip_emulated_instruction().  The caller is responsible for
+        * updating interruptibility state and injecting single-step #DBs.
+        */
        if (emulation_type & EMULTYPE_SKIP) {
                kvm_rip_write(vcpu, ctxt->_eip);
                if (ctxt->eflags & X86_EFLAGS_RF)
                        kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
-               kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
-               return EMULATE_DONE;
+               return 1;
        }
 
        if (retry_instruction(ctxt, cr2, emulation_type))
-               return EMULATE_DONE;
+               return 1;
 
        /* this is needed for vmware backdoor interface to work since it
           changes registers values  during IO operation */
@@ -6628,18 +6690,18 @@ restart:
        r = x86_emulate_insn(ctxt);
 
        if (r == EMULATION_INTERCEPTED)
-               return EMULATE_DONE;
+               return 1;
 
        if (r == EMULATION_FAILED) {
                if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
                                        emulation_type))
-                       return EMULATE_DONE;
+                       return 1;
 
                return handle_emulation_failure(vcpu, emulation_type);
        }
 
        if (ctxt->have_exception) {
-               r = EMULATE_DONE;
+               r = 1;
                if (inject_emulated_exception(vcpu))
                        return r;
        } else if (vcpu->arch.pio.count) {
@@ -6650,16 +6712,18 @@ restart:
                        writeback = false;
                        vcpu->arch.complete_userspace_io = complete_emulated_pio;
                }
-               r = EMULATE_USER_EXIT;
+               r = 0;
        } else if (vcpu->mmio_needed) {
+               ++vcpu->stat.mmio_exits;
+
                if (!vcpu->mmio_is_write)
                        writeback = false;
-               r = EMULATE_USER_EXIT;
+               r = 0;
                vcpu->arch.complete_userspace_io = complete_emulated_mmio;
        } else if (r == EMULATION_RESTART)
                goto restart;
        else
-               r = EMULATE_DONE;
+               r = 1;
 
        if (writeback) {
                unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
@@ -6668,8 +6732,8 @@ restart:
                if (!ctxt->have_exception ||
                    exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
                        kvm_rip_write(vcpu, ctxt->eip);
-                       if (r == EMULATE_DONE && ctxt->tf)
-                               kvm_vcpu_do_singlestep(vcpu, &r);
+                       if (r && ctxt->tf)
+                               r = kvm_vcpu_do_singlestep(vcpu);
                        __kvm_set_rflags(vcpu, ctxt->eflags);
                }
 
@@ -7874,8 +7938,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        bool req_immediate_exit = false;
 
        if (kvm_request_pending(vcpu)) {
-               if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
-                       kvm_x86_ops->get_vmcs12_pages(vcpu);
+               if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
+                       if (unlikely(!kvm_x86_ops->get_vmcs12_pages(vcpu))) {
+                               r = 0;
+                               goto out;
+                       }
+               }
                if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
                        kvm_mmu_unload(vcpu);
                if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
@@ -8263,12 +8331,11 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
 {
        int r;
+
        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
        r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-       if (r != EMULATE_DONE)
-               return 0;
-       return 1;
+       return r;
 }
 
 static int complete_emulated_pio(struct kvm_vcpu *vcpu)
@@ -8636,23 +8703,22 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 
        ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
                                   has_error_code, error_code);
-
-       if (ret)
-               return EMULATE_FAIL;
+       if (ret) {
+               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+               vcpu->run->internal.ndata = 0;
+               return 0;
+       }
 
        kvm_rip_write(vcpu, ctxt->eip);
        kvm_set_rflags(vcpu, ctxt->eflags);
        kvm_make_request(KVM_REQ_EVENT, vcpu);
-       return EMULATE_DONE;
+       return 1;
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
 
 static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
-       if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-                       (sregs->cr4 & X86_CR4_OSXSAVE))
-               return  -EINVAL;
-
        if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
                /*
                 * When EFER.LME and CR0.PG are set, the processor is in
@@ -8671,7 +8737,7 @@ static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
                        return -EINVAL;
        }
 
-       return 0;
+       return kvm_valid_cr4(vcpu, sregs->cr4);
 }
 
 static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
@@ -9361,6 +9427,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
        INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+       INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
        INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
        atomic_set(&kvm->arch.noncoherent_dma_count, 0);
 
@@ -9690,8 +9757,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
         * Scan sptes if dirty logging has been stopped, dropping those
         * which can be collapsed into a single large-page spte.  Later
         * page faults will create the large-page sptes.
+        *
+        * There is no need to do this in any of the following cases:
+        * CREATE:      No dirty mappings will already exist.
+        * MOVE/DELETE: The old mappings will already have been cleaned up by
+        *              kvm_arch_flush_shadow_memslot()
         */
-       if ((change != KVM_MR_DELETE) &&
+       if (change == KVM_MR_FLAGS_ONLY &&
                (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
                !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
                kvm_mmu_zap_collapsible_sptes(kvm, new);
index b5274e2..dbf7442 100644 (file)
@@ -261,7 +261,7 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
 }
 
 void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
-int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
+void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
 
 void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
 u64 get_kvmclock_ns(struct kvm *kvm);
index b7375dc..c126571 100644 (file)
@@ -113,8 +113,8 @@ static void delay_mwaitx(unsigned long __loops)
                __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
 
                /*
-                * AMD, like Intel, supports the EAX hint and EAX=0xf
-                * means, do not enter any deep C-state and we use it
+                * AMD, like Intel's MWAIT version, supports the EAX hint and
+                * EAX=0xf0 means, do not enter any deep C-state and we use it
                 * here in delay() to minimize wakeup latency.
                 */
                __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
index fa16036..65ebe4b 100644 (file)
@@ -54,23 +54,10 @@ static u64 get_subtree_max_end(struct rb_node *node)
        return ret;
 }
 
-static u64 compute_subtree_max_end(struct memtype *data)
-{
-       u64 max_end = data->end, child_max_end;
-
-       child_max_end = get_subtree_max_end(data->rb.rb_right);
-       if (child_max_end > max_end)
-               max_end = child_max_end;
-
-       child_max_end = get_subtree_max_end(data->rb.rb_left);
-       if (child_max_end > max_end)
-               max_end = child_max_end;
-
-       return max_end;
-}
+#define NODE_END(node) ((node)->end)
 
-RB_DECLARE_CALLBACKS(static, memtype_rb_augment_cb, struct memtype, rb,
-                    u64, subtree_max_end, compute_subtree_max_end)
+RB_DECLARE_CALLBACKS_MAX(static, memtype_rb_augment_cb,
+                        struct memtype, rb, u64, subtree_max_end, NODE_END)
 
 /* Find the first (lowest start addr) overlapping range from rb tree */
 static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
index 44816ff..3e4b903 100644 (file)
@@ -45,7 +45,7 @@ early_param("userpte", setup_userpte);
 
 void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
-       pgtable_page_dtor(pte);
+       pgtable_pte_page_dtor(pte);
        paravirt_release_pte(page_to_pfn(pte));
        paravirt_tlb_remove_table(tlb, pte);
 }
@@ -357,7 +357,7 @@ static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
 
 static struct kmem_cache *pgd_cache;
 
-void __init pgd_cache_init(void)
+void __init pgtable_cache_init(void)
 {
        /*
         * When PAE kernel is running as a Xen domain, it does not use
@@ -402,10 +402,6 @@ static inline void _pgd_free(pgd_t *pgd)
 }
 #else
 
-void __init pgd_cache_init(void)
-{
-}
-
 static inline pgd_t *_pgd_alloc(void)
 {
        return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
index 0881e1f..a8bd952 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/mmiotrace.h>
+#include <linux/security.h>
 
 static unsigned long mmio_address;
 module_param_hw(mmio_address, ulong, iomem, 0);
@@ -115,6 +116,10 @@ static void do_test_bulk_ioremapping(void)
 static int __init init(void)
 {
        unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
+       int ret = security_locked_down(LOCKDOWN_MMIOTRACE);
+
+       if (ret)
+               return ret;
 
        if (mmio_address == 0) {
                pr_err("you have to use the module argument mmio_address.\n");
index c202e1b..425e025 100644 (file)
@@ -917,9 +917,6 @@ static void __init kexec_enter_virtual_mode(void)
 
        if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
                runtime_code_page_mkexec();
-
-       /* clean DUMMY object */
-       efi_delete_dummy_variable();
 #endif
 }
 
index 5277490..fb4ee54 100644 (file)
@@ -25,6 +25,7 @@ KCOV_INSTRUMENT := n
 
 PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
 PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
+PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN)
 
 # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
 # in turn leaves some undefined symbols like __fentry__ in purgatory and not
index 0d3365c..a04551e 100644 (file)
@@ -57,19 +57,7 @@ static efi_system_table_t __init *xen_efi_probe(void)
                return NULL;
 
        /* Here we know that Xen runs on EFI platform. */
-
-       efi.get_time                 = xen_efi_get_time;
-       efi.set_time                 = xen_efi_set_time;
-       efi.get_wakeup_time          = xen_efi_get_wakeup_time;
-       efi.set_wakeup_time          = xen_efi_set_wakeup_time;
-       efi.get_variable             = xen_efi_get_variable;
-       efi.get_next_variable        = xen_efi_get_next_variable;
-       efi.set_variable             = xen_efi_set_variable;
-       efi.query_variable_info      = xen_efi_query_variable_info;
-       efi.update_capsule           = xen_efi_update_capsule;
-       efi.query_capsule_caps       = xen_efi_query_capsule_caps;
-       efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
-       efi.reset_system             = xen_efi_reset_system;
+       xen_efi_runtime_setup();
 
        efi_systab_xen.tables = info->cfg.addr;
        efi_systab_xen.nr_tables = info->cfg.nent;
index 750f46a..205b117 100644 (file)
@@ -269,19 +269,41 @@ void xen_reboot(int reason)
                BUG();
 }
 
+static int reboot_reason = SHUTDOWN_reboot;
+static bool xen_legacy_crash;
 void xen_emergency_restart(void)
 {
-       xen_reboot(SHUTDOWN_reboot);
+       xen_reboot(reboot_reason);
 }
 
 static int
 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
-       if (!kexec_crash_loaded())
-               xen_reboot(SHUTDOWN_crash);
+       if (!kexec_crash_loaded()) {
+               if (xen_legacy_crash)
+                       xen_reboot(SHUTDOWN_crash);
+
+               reboot_reason = SHUTDOWN_crash;
+
+               /*
+                * If panic_timeout==0 then we are supposed to wait forever.
+                * However, to preserve original dom0 behavior we have to drop
+                * into hypervisor. (domU behavior is controlled by its
+                * config file)
+                */
+               if (panic_timeout == 0)
+                       panic_timeout = -1;
+       }
        return NOTIFY_DONE;
 }
 
+static int __init parse_xen_legacy_crash(char *arg)
+{
+       xen_legacy_crash = true;
+       return 0;
+}
+early_param("xen_legacy_crash", parse_xen_legacy_crash);
+
 static struct notifier_block xen_panic_block = {
        .notifier_call = xen_panic_event,
        .priority = INT_MIN
index 58f79ab..5bfea37 100644 (file)
@@ -117,6 +117,14 @@ static void __init xen_banner(void)
        printk(KERN_INFO "Xen version: %d.%d%s%s\n",
               version >> 16, version & 0xffff, extra.extraversion,
               xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
+
+#ifdef CONFIG_X86_32
+       pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"
+               "Support for running as 32-bit PV-guest under Xen will soon be removed\n"
+               "from the Linux kernel!\n"
+               "Please use either a 64-bit kernel or switch to HVM or PVH mode!\n"
+               "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n");
+#endif
 }
 
 static void __init xen_pv_init_platform(void)
index a9dcd87..611b98a 100644 (file)
@@ -56,7 +56,7 @@
                reg = <0xf0100000 0x03f00000>;
 
                     // BUS_ADDRESS(3)  CPU_PHYSICAL(1)  SIZE(2)
-               ranges = <0x01000000 0x0 0xf0000000  0xf0000000  0x0 0x00010000>,
+               ranges = <0x01000000 0x0 0x00000000  0xf0000000  0x0 0x00010000>,
                         <0x02000000 0x0 0xf4000000  0xf4000000  0x0 0x08000000>;
 
                     // PCI_DEVICE(3)  INT#(1)  CONTROLLER(PHANDLE)  CONTROLLER_DATA(2)
index aeb15f4..be8b2be 100644 (file)
@@ -148,7 +148,7 @@ static inline void change_bit(unsigned int bit, volatile unsigned long *p)
                        "       getex   %0\n"
                        "       beqz    %0, 1b\n"
                        : "=&a" (tmp)
-                       : "a" (~mask), "a" (p)
+                       : "a" (mask), "a" (p)
                        : "memory");
 }
 
index dd744aa..1d38f0e 100644 (file)
@@ -55,7 +55,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
        if (!pte)
                return NULL;
        page = virt_to_page(pte);
-       if (!pgtable_page_ctor(page)) {
+       if (!pgtable_pte_page_ctor(page)) {
                __free_page(page);
                return NULL;
        }
@@ -69,7 +69,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
-       pgtable_page_dtor(pte);
+       pgtable_pte_page_dtor(pte);
        __free_page(pte);
 }
 #define pmd_pgtable(pmd) pmd_page(pmd)
index ce3ff5e..3f7fe5a 100644 (file)
@@ -238,7 +238,6 @@ extern void paging_init(void);
 # define swapper_pg_dir NULL
 static inline void paging_init(void) { }
 #endif
-static inline void pgtable_cache_init(void) { }
 
 /*
  * The pmd contains the kernel virtual address of the pte page.
index 06875fe..856e2da 100644 (file)
@@ -160,9 +160,6 @@ static inline void invalidate_dtlb_mapping (unsigned address)
                invalidate_dtlb_entry(tlb_entry);
 }
 
-#define check_pgt_cache()      do { } while (0)
-
-
 /*
  * DO NOT USE THESE FUNCTIONS.  These instructions aren't part of the Xtensa
  * ISA and exist only for test purposes..
index 6792928..3f80386 100644 (file)
@@ -100,7 +100,7 @@ do {                                                                        \
        case 4: __put_user_asm(x, ptr, retval, 4, "s32i", __cb); break; \
        case 8: {                                                       \
                     __typeof__(*ptr) __v64 = x;                        \
-                    retval = __copy_to_user(ptr, &__v64, 8);           \
+                    retval = __copy_to_user(ptr, &__v64, 8) ? -EFAULT : 0;     \
                     break;                                             \
                }                                                       \
        default: __put_user_bad();                                      \
@@ -132,14 +132,14 @@ do {                                                                      \
 #define __check_align_1  ""
 
 #define __check_align_2                                \
-       "   _bbci.l %3,  0, 1f          \n"     \
-       "   movi    %0, %4              \n"     \
+       "   _bbci.l %[addr], 0, 1f      \n"     \
+       "   movi    %[err], %[efault]   \n"     \
        "   _j      2f                  \n"
 
 #define __check_align_4                                \
-       "   _bbsi.l %3,  0, 0f          \n"     \
-       "   _bbci.l %3,  1, 1f          \n"     \
-       "0: movi    %0, %4              \n"     \
+       "   _bbsi.l %[addr], 0, 0f      \n"     \
+       "   _bbci.l %[addr], 1, 1f      \n"     \
+       "0: movi    %[err], %[efault]   \n"     \
        "   _j      2f                  \n"
 
 
@@ -151,40 +151,40 @@ do {                                                                      \
  * WARNING: If you modify this macro at all, verify that the
  * __check_align_* macros still work.
  */
-#define __put_user_asm(x, addr, err, align, insn, cb)  \
+#define __put_user_asm(x_, addr_, err_, align, insn, cb)\
 __asm__ __volatile__(                                  \
        __check_align_##align                           \
-       "1: "insn"  %2, %3, 0           \n"             \
+       "1: "insn"  %[x], %[addr], 0    \n"             \
        "2:                             \n"             \
        "   .section  .fixup,\"ax\"     \n"             \
        "   .align 4                    \n"             \
        "   .literal_position           \n"             \
        "5:                             \n"             \
-       "   movi   %1, 2b               \n"             \
-       "   movi   %0, %4               \n"             \
-       "   jx     %1                   \n"             \
+       "   movi   %[tmp], 2b           \n"             \
+       "   movi   %[err], %[efault]    \n"             \
+       "   jx     %[tmp]               \n"             \
        "   .previous                   \n"             \
        "   .section  __ex_table,\"a\"  \n"             \
        "   .long       1b, 5b          \n"             \
        "   .previous"                                  \
-       :"=r" (err), "=r" (cb)                          \
-       :"r" ((int)(x)), "r" (addr), "i" (-EFAULT), "0" (err))
+       :[err] "+r"(err_), [tmp] "=r"(cb)               \
+       :[x] "r"(x_), [addr] "r"(addr_), [efault] "i"(-EFAULT))
 
 #define __get_user_nocheck(x, ptr, size)                       \
 ({                                                             \
-       long __gu_err, __gu_val;                                \
-       __get_user_size(__gu_val, (ptr), (size), __gu_err);     \
-       (x) = (__force __typeof__(*(ptr)))__gu_val;             \
+       long __gu_err;                                          \
+       __get_user_size((x), (ptr), (size), __gu_err);          \
        __gu_err;                                               \
 })
 
 #define __get_user_check(x, ptr, size)                                 \
 ({                                                                     \
-       long __gu_err = -EFAULT, __gu_val = 0;                          \
+       long __gu_err = -EFAULT;                                        \
        const __typeof__(*(ptr)) *__gu_addr = (ptr);                    \
-       if (access_ok(__gu_addr, size))                 \
-               __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
-       (x) = (__force __typeof__(*(ptr)))__gu_val;                     \
+       if (access_ok(__gu_addr, size))                                 \
+               __get_user_size((x), __gu_addr, (size), __gu_err);      \
+       else                                                            \
+               (x) = 0;                                                \
        __gu_err;                                                       \
 })
 
@@ -198,8 +198,17 @@ do {                                                                       \
        case 1: __get_user_asm(x, ptr, retval, 1, "l8ui", __cb);  break;\
        case 2: __get_user_asm(x, ptr, retval, 2, "l16ui", __cb); break;\
        case 4: __get_user_asm(x, ptr, retval, 4, "l32i", __cb);  break;\
-       case 8: retval = __copy_from_user(&x, ptr, 8);    break;        \
-       default: (x) = __get_user_bad();                                \
+       case 8: {                                                       \
+               u64 __x;                                                \
+               if (unlikely(__copy_from_user(&__x, ptr, 8))) {         \
+                       retval = -EFAULT;                               \
+                       (x) = 0;                                        \
+               } else {                                                \
+                       (x) = *(__force __typeof__((ptr)))&__x;         \
+               }                                                       \
+               break;                                                  \
+       }                                                               \
+       default: (x) = 0; __get_user_bad();                             \
        }                                                               \
 } while (0)
 
@@ -208,25 +217,28 @@ do {                                                                      \
  * WARNING: If you modify this macro at all, verify that the
  * __check_align_* macros still work.
  */
-#define __get_user_asm(x, addr, err, align, insn, cb) \
-__asm__ __volatile__(                  \
-       __check_align_##align                   \
-       "1: "insn"  %2, %3, 0           \n"     \
-       "2:                             \n"     \
-       "   .section  .fixup,\"ax\"     \n"     \
-       "   .align 4                    \n"     \
-       "   .literal_position           \n"     \
-       "5:                             \n"     \
-       "   movi   %1, 2b               \n"     \
-       "   movi   %2, 0                \n"     \
-       "   movi   %0, %4               \n"     \
-       "   jx     %1                   \n"     \
-       "   .previous                   \n"     \
-       "   .section  __ex_table,\"a\"  \n"     \
-       "   .long       1b, 5b          \n"     \
-       "   .previous"                          \
-       :"=r" (err), "=r" (cb), "=r" (x)        \
-       :"r" (addr), "i" (-EFAULT), "0" (err))
+#define __get_user_asm(x_, addr_, err_, align, insn, cb) \
+do {                                                   \
+       u32 __x = 0;                                    \
+       __asm__ __volatile__(                           \
+               __check_align_##align                   \
+               "1: "insn"  %[x], %[addr], 0    \n"     \
+               "2:                             \n"     \
+               "   .section  .fixup,\"ax\"     \n"     \
+               "   .align 4                    \n"     \
+               "   .literal_position           \n"     \
+               "5:                             \n"     \
+               "   movi   %[tmp], 2b           \n"     \
+               "   movi   %[err], %[efault]    \n"     \
+               "   jx     %[tmp]               \n"     \
+               "   .previous                   \n"     \
+               "   .section  __ex_table,\"a\"  \n"     \
+               "   .long       1b, 5b          \n"     \
+               "   .previous"                          \
+               :[err] "+r"(err_), [tmp] "=r"(cb), [x] "+r"(__x) \
+               :[addr] "r"(addr_), [efault] "i"(-EFAULT)); \
+       (x_) = (__force __typeof__(*(addr_)))__x;       \
+} while (0)
 
 
 /*
index ebbb488..e5e6437 100644 (file)
 #define MADV_WIPEONFORK 18             /* Zero memory on fork, child only */
 #define MADV_KEEPONFORK 19             /* Undo MADV_WIPEONFORK */
 
+#define MADV_COLD      20              /* deactivate these pages */
+#define MADV_PAGEOUT   21              /* reclaim these pages */
+
 /* compatibility flags */
 #define MAP_FILE       0
 
index 04f19de..4092555 100644 (file)
@@ -119,13 +119,6 @@ EXPORT_SYMBOL(__invalidate_icache_range);
 // FIXME EXPORT_SYMBOL(screen_info);
 #endif
 
-EXPORT_SYMBOL(outsb);
-EXPORT_SYMBOL(outsw);
-EXPORT_SYMBOL(outsl);
-EXPORT_SYMBOL(insb);
-EXPORT_SYMBOL(insw);
-EXPORT_SYMBOL(insl);
-
 extern long common_exception_return;
 EXPORT_SYMBOL(common_exception_return);
 
index b33be92..0319d63 100644 (file)
@@ -2016,7 +2016,7 @@ static void bfq_add_request(struct request *rq)
                     (bfqq->last_serv_time_ns > 0 &&
                      bfqd->rqs_injected && bfqd->rq_in_driver > 0)) &&
                    time_is_before_eq_jiffies(bfqq->decrease_time_jif +
-                                             msecs_to_jiffies(100))) {
+                                             msecs_to_jiffies(10))) {
                        bfqd->last_empty_occupied_ns = ktime_get_ns();
                        /*
                         * Start the state machine for measuring the
@@ -2025,7 +2025,21 @@ static void bfq_add_request(struct request *rq)
                         * be set when rq will be dispatched.
                         */
                        bfqd->wait_dispatch = true;
-                       bfqd->rqs_injected = false;
+                       /*
+                        * If there is no I/O in service in the drive,
+                        * then possible injection occurred before the
+                        * arrival of rq will not affect the total
+                        * service time of rq. So the injection limit
+                        * must not be updated as a function of such
+                        * total service time, unless new injection
+                        * occurs before rq is completed. To have the
+                        * injection limit updated only in the latter
+                        * case, reset rqs_injected here (rqs_injected
+                        * will be set in case injection is performed
+                        * on bfqq before rq is completed).
+                        */
+                       if (bfqd->rq_in_driver == 0)
+                               bfqd->rqs_injected = false;
                }
        }
 
@@ -5784,14 +5798,14 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd,
        u64 tot_time_ns = ktime_get_ns() - bfqd->last_empty_occupied_ns;
        unsigned int old_limit = bfqq->inject_limit;
 
-       if (bfqq->last_serv_time_ns > 0) {
+       if (bfqq->last_serv_time_ns > 0 && bfqd->rqs_injected) {
                u64 threshold = (bfqq->last_serv_time_ns * 3)>>1;
 
                if (tot_time_ns >= threshold && old_limit > 0) {
                        bfqq->inject_limit--;
                        bfqq->decrease_time_jif = jiffies;
                } else if (tot_time_ns < threshold &&
-                          old_limit < bfqd->max_rq_in_driver<<1)
+                          old_limit <= bfqd->max_rq_in_driver)
                        bfqq->inject_limit++;
        }
 
@@ -5809,12 +5823,14 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd,
         */
        if ((bfqq->last_serv_time_ns == 0 && bfqd->rq_in_driver == 1) ||
            tot_time_ns < bfqq->last_serv_time_ns) {
+               if (bfqq->last_serv_time_ns == 0) {
+                       /*
+                        * Now we certainly have a base value: make sure we
+                        * start trying injection.
+                        */
+                       bfqq->inject_limit = max_t(unsigned int, 1, old_limit);
+               }
                bfqq->last_serv_time_ns = tot_time_ns;
-               /*
-                * Now we certainly have a base value: make sure we
-                * start trying injection.
-                */
-               bfqq->inject_limit = max_t(unsigned int, 1, old_limit);
        } else if (!bfqd->rqs_injected && bfqd->rq_in_driver == 1)
                /*
                 * No I/O injected and no request still in service in
@@ -5830,6 +5846,7 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd,
 
        /* update complete, not waiting for any request completion any longer */
        bfqd->waited_rq = NULL;
+       bfqd->rqs_injected = false;
 }
 
 /*
index b6f20be..5d21027 100644 (file)
@@ -1362,7 +1362,7 @@ int blkcg_activate_policy(struct request_queue *q,
                          const struct blkcg_policy *pol)
 {
        struct blkg_policy_data *pd_prealloc = NULL;
-       struct blkcg_gq *blkg;
+       struct blkcg_gq *blkg, *pinned_blkg = NULL;
        int ret;
 
        if (blkcg_policy_enabled(q, pol))
@@ -1370,49 +1370,82 @@ int blkcg_activate_policy(struct request_queue *q,
 
        if (queue_is_mq(q))
                blk_mq_freeze_queue(q);
-pd_prealloc:
-       if (!pd_prealloc) {
-               pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, &blkcg_root);
-               if (!pd_prealloc) {
-                       ret = -ENOMEM;
-                       goto out_bypass_end;
-               }
-       }
-
+retry:
        spin_lock_irq(&q->queue_lock);
 
-       /* blkg_list is pushed at the head, reverse walk to init parents first */
+       /* blkg_list is pushed at the head, reverse walk to allocate parents first */
        list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
                struct blkg_policy_data *pd;
 
                if (blkg->pd[pol->plid])
                        continue;
 
-               pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, &blkcg_root);
-               if (!pd)
-                       swap(pd, pd_prealloc);
+               /* If prealloc matches, use it; otherwise try GFP_NOWAIT */
+               if (blkg == pinned_blkg) {
+                       pd = pd_prealloc;
+                       pd_prealloc = NULL;
+               } else {
+                       pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q,
+                                             blkg->blkcg);
+               }
+
                if (!pd) {
+                       /*
+                        * GFP_NOWAIT failed.  Free the existing one and
+                        * prealloc for @blkg w/ GFP_KERNEL.
+                        */
+                       if (pinned_blkg)
+                               blkg_put(pinned_blkg);
+                       blkg_get(blkg);
+                       pinned_blkg = blkg;
+
                        spin_unlock_irq(&q->queue_lock);
-                       goto pd_prealloc;
+
+                       if (pd_prealloc)
+                               pol->pd_free_fn(pd_prealloc);
+                       pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q,
+                                                      blkg->blkcg);
+                       if (pd_prealloc)
+                               goto retry;
+                       else
+                               goto enomem;
                }
 
                blkg->pd[pol->plid] = pd;
                pd->blkg = blkg;
                pd->plid = pol->plid;
-               if (pol->pd_init_fn)
-                       pol->pd_init_fn(pd);
        }
 
+       /* all allocated, init in the same order */
+       if (pol->pd_init_fn)
+               list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
+                       pol->pd_init_fn(blkg->pd[pol->plid]);
+
        __set_bit(pol->plid, q->blkcg_pols);
        ret = 0;
 
        spin_unlock_irq(&q->queue_lock);
-out_bypass_end:
+out:
        if (queue_is_mq(q))
                blk_mq_unfreeze_queue(q);
+       if (pinned_blkg)
+               blkg_put(pinned_blkg);
        if (pd_prealloc)
                pol->pd_free_fn(pd_prealloc);
        return ret;
+
+enomem:
+       /* alloc failed, nothing's initialized yet, free everything */
+       spin_lock_irq(&q->queue_lock);
+       list_for_each_entry(blkg, &q->blkg_list, q_node) {
+               if (blkg->pd[pol->plid]) {
+                       pol->pd_free_fn(blkg->pd[pol->plid]);
+                       blkg->pd[pol->plid] = NULL;
+               }
+       }
+       spin_unlock_irq(&q->queue_lock);
+       ret = -ENOMEM;
+       goto out;
 }
 EXPORT_SYMBOL_GPL(blkcg_activate_policy);
 
index 875e8d1..d5e668e 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/ratelimit.h>
 #include <linux/pm_runtime.h>
 #include <linux/blk-cgroup.h>
+#include <linux/t10-pi.h>
 #include <linux/debugfs.h>
 #include <linux/bpf.h>
 #include <linux/psi.h>
@@ -1436,6 +1437,12 @@ bool blk_update_request(struct request *req, blk_status_t error,
        if (!req->bio)
                return false;
 
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+       if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
+           error == BLK_STS_OK)
+               req->q->integrity.profile->complete_fn(req, nr_bytes);
+#endif
+
        if (unlikely(error && !blk_rq_is_passthrough(req) &&
                     !(req->rq_flags & RQF_QUIET)))
                print_req_error(req, error, __func__);
index aedd932..1eec9cb 100644 (file)
@@ -214,6 +214,16 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
 
        /* release the tag's ownership to the req cloned from */
        spin_lock_irqsave(&fq->mq_flush_lock, flags);
+
+       if (!refcount_dec_and_test(&flush_rq->ref)) {
+               fq->rq_status = error;
+               spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
+               return;
+       }
+
+       if (fq->rq_status != BLK_STS_OK)
+               error = fq->rq_status;
+
        hctx = flush_rq->mq_hctx;
        if (!q->elevator) {
                blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
index ca39b46..ff1070e 100644 (file)
@@ -368,10 +368,21 @@ static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
        return BLK_STS_OK;
 }
 
+static void blk_integrity_nop_prepare(struct request *rq)
+{
+}
+
+static void blk_integrity_nop_complete(struct request *rq,
+               unsigned int nr_bytes)
+{
+}
+
 static const struct blk_integrity_profile nop_profile = {
        .name = "nop",
        .generate_fn = blk_integrity_nop_fn,
        .verify_fn = blk_integrity_nop_fn,
+       .prepare_fn = blk_integrity_nop_prepare,
+       .complete_fn = blk_integrity_nop_complete,
 };
 
 /**
index 3b39deb..a7ed434 100644 (file)
@@ -529,8 +529,8 @@ struct iocg_wake_ctx {
 static const struct ioc_params autop[] = {
        [AUTOP_HDD] = {
                .qos                            = {
-                       [QOS_RLAT]              =         50000, /* 50ms */
-                       [QOS_WLAT]              =         50000,
+                       [QOS_RLAT]              =        250000, /* 250ms */
+                       [QOS_WLAT]              =        250000,
                        [QOS_MIN]               = VRATE_MIN_PPM,
                        [QOS_MAX]               = VRATE_MAX_PPM,
                },
@@ -1343,7 +1343,7 @@ static void ioc_timer_fn(struct timer_list *timer)
        u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM];
        u32 missed_ppm[2], rq_wait_pct;
        u64 period_vtime;
-       int i;
+       int prev_busy_level, i;
 
        /* how were the latencies during the period? */
        ioc_lat_stat(ioc, missed_ppm, &rq_wait_pct);
@@ -1407,7 +1407,8 @@ static void ioc_timer_fn(struct timer_list *timer)
                 * comparing vdone against period start.  If lagging behind
                 * IOs from past periods, don't increase vrate.
                 */
-               if (!atomic_read(&iocg_to_blkg(iocg)->use_delay) &&
+               if ((ppm_rthr != MILLION || ppm_wthr != MILLION) &&
+                   !atomic_read(&iocg_to_blkg(iocg)->use_delay) &&
                    time_after64(vtime, vdone) &&
                    time_after64(vtime, now.vnow -
                                 MAX_LAGGING_PERIODS * period_vtime) &&
@@ -1531,26 +1532,29 @@ skip_surplus_transfers:
         * and experiencing shortages but not surpluses, we're too stingy
         * and should increase vtime rate.
         */
+       prev_busy_level = ioc->busy_level;
        if (rq_wait_pct > RQ_WAIT_BUSY_PCT ||
            missed_ppm[READ] > ppm_rthr ||
            missed_ppm[WRITE] > ppm_wthr) {
                ioc->busy_level = max(ioc->busy_level, 0);
                ioc->busy_level++;
-       } else if (nr_lagging) {
-               ioc->busy_level = max(ioc->busy_level, 0);
-       } else if (nr_shortages && !nr_surpluses &&
-                  rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 &&
+       } else if (rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 &&
                   missed_ppm[READ] <= ppm_rthr * UNBUSY_THR_PCT / 100 &&
                   missed_ppm[WRITE] <= ppm_wthr * UNBUSY_THR_PCT / 100) {
-               ioc->busy_level = min(ioc->busy_level, 0);
-               ioc->busy_level--;
+               /* take action iff there is contention */
+               if (nr_shortages && !nr_lagging) {
+                       ioc->busy_level = min(ioc->busy_level, 0);
+                       /* redistribute surpluses first */
+                       if (!nr_surpluses)
+                               ioc->busy_level--;
+               }
        } else {
                ioc->busy_level = 0;
        }
 
        ioc->busy_level = clamp(ioc->busy_level, -1000, 1000);
 
-       if (ioc->busy_level) {
+       if (ioc->busy_level > 0 || (ioc->busy_level < 0 && !nr_lagging)) {
                u64 vrate = atomic64_read(&ioc->vtime_rate);
                u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max;
 
@@ -1592,6 +1596,10 @@ skip_surplus_transfers:
                atomic64_set(&ioc->vtime_rate, vrate);
                ioc->inuse_margin_vtime = DIV64_U64_ROUND_UP(
                        ioc->period_us * vrate * INUSE_MARGIN_PCT, 100);
+       } else if (ioc->busy_level != prev_busy_level || nr_lagging) {
+               trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate),
+                                          &missed_ppm, rq_wait_pct, nr_lagging,
+                                          nr_shortages, nr_surpluses);
        }
 
        ioc_refresh_params(ioc, false);
@@ -2102,10 +2110,10 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
                        goto einval;
        }
 
-       spin_lock_irq(&iocg->ioc->lock);
+       spin_lock(&iocg->ioc->lock);
        iocg->cfg_weight = v;
        weight_updated(iocg);
-       spin_unlock_irq(&iocg->ioc->lock);
+       spin_unlock(&iocg->ioc->lock);
 
        blkg_conf_finish(&ctx);
        return nbytes;
index c9d183d..ca22afd 100644 (file)
@@ -555,8 +555,6 @@ void blk_mq_sched_free_requests(struct request_queue *q)
        struct blk_mq_hw_ctx *hctx;
        int i;
 
-       lockdep_assert_held(&q->sysfs_lock);
-
        queue_for_each_hw_ctx(q, hctx, i) {
                if (hctx->sched_tags)
                        blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i);
index 20a49be..ec79115 100644 (file)
@@ -30,6 +30,7 @@
 #include <trace/events/block.h>
 
 #include <linux/blk-mq.h>
+#include <linux/t10-pi.h>
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-debugfs.h"
@@ -700,6 +701,11 @@ void blk_mq_start_request(struct request *rq)
                 */
                rq->nr_phys_segments++;
        }
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+       if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
+               q->integrity.profile->prepare_fn(rq);
+#endif
 }
 EXPORT_SYMBOL(blk_mq_start_request);
 
@@ -912,7 +918,10 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
         */
        if (blk_mq_req_expired(rq, next))
                blk_mq_rq_timed_out(rq, reserved);
-       if (refcount_dec_and_test(&rq->ref))
+
+       if (is_flush_rq(rq, hctx))
+               rq->end_io(rq, 0);
+       else if (refcount_dec_and_test(&rq->ref))
                __blk_mq_free_request(rq);
 
        return true;
@@ -1983,10 +1992,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
                /* bypass scheduler for flush rq */
                blk_insert_flush(rq);
                blk_mq_run_hw_queue(data.hctx, true);
-       } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) {
+       } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs ||
+                               !blk_queue_nonrot(q))) {
                /*
                 * Use plugging if we have a ->commit_rqs() hook as well, as
                 * we know the driver uses bd->last in a smart fashion.
+                *
+                * Use normal plugging if this disk is slow HDD, as sequential
+                * IO may benefit a lot from plug merging.
                 */
                unsigned int request_count = plug->rq_count;
                struct request *last = NULL;
@@ -2003,6 +2016,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
                }
 
                blk_add_rq_to_plug(plug, rq);
+       } else if (q->elevator) {
+               blk_mq_sched_insert_request(rq, false, true, true);
        } else if (plug && !blk_queue_nomerges(q)) {
                /*
                 * We do limited plugging. If the bio can be merged, do that.
@@ -2026,8 +2041,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
                        blk_mq_try_issue_directly(data.hctx, same_queue_rq,
                                        &cookie);
                }
-       } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
-                       !data.hctx->dispatch_busy)) {
+       } else if ((q->nr_hw_queues > 1 && is_sync) ||
+                       !data.hctx->dispatch_busy) {
                blk_mq_try_issue_directly(data.hctx, rq, &cookie);
        } else {
                blk_mq_sched_insert_request(rq, false, true, true);
index 61b635b..6564606 100644 (file)
@@ -160,24 +160,27 @@ bool rq_depth_calc_max_depth(struct rq_depth *rqd)
        return ret;
 }
 
-void rq_depth_scale_up(struct rq_depth *rqd)
+/* Returns true on success and false if scaling up wasn't possible */
+bool rq_depth_scale_up(struct rq_depth *rqd)
 {
        /*
         * Hit max in previous round, stop here
         */
        if (rqd->scaled_max)
-               return;
+               return false;
 
        rqd->scale_step--;
 
        rqd->scaled_max = rq_depth_calc_max_depth(rqd);
+       return true;
 }
 
 /*
  * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
- * had a latency violation.
+ * had a latency violation. Returns true on success and returns false if
+ * scaling down wasn't possible.
  */
-void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
+bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
 {
        /*
         * Stop scaling down when we've hit the limit. This also prevents
@@ -185,7 +188,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
         * keep up.
         */
        if (rqd->max_depth == 1)
-               return;
+               return false;
 
        if (rqd->scale_step < 0 && hard_throttle)
                rqd->scale_step = 0;
@@ -194,6 +197,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
 
        rqd->scaled_max = false;
        rq_depth_calc_max_depth(rqd);
+       return true;
 }
 
 struct rq_qos_wait_data {
index 08a09db..2bc43e9 100644 (file)
@@ -108,16 +108,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 
 static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
 {
-       struct rq_qos *cur, *prev = NULL;
-       for (cur = q->rq_qos; cur; cur = cur->next) {
-               if (cur == rqos) {
-                       if (prev)
-                               prev->next = rqos->next;
-                       else
-                               q->rq_qos = cur;
+       struct rq_qos **cur;
+
+       for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
+               if (*cur == rqos) {
+                       *cur = rqos->next;
                        break;
                }
-               prev = cur;
        }
 
        blk_mq_debugfs_unregister_rqos(rqos);
@@ -130,8 +127,8 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
                 acquire_inflight_cb_t *acquire_inflight_cb,
                 cleanup_cb_t *cleanup_cb);
 bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit);
-void rq_depth_scale_up(struct rq_depth *rqd);
-void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
+bool rq_depth_scale_up(struct rq_depth *rqd);
+bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
 bool rq_depth_calc_max_depth(struct rq_depth *rqd);
 
 void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio);
index b82736c..46f5198 100644 (file)
@@ -482,7 +482,6 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
        blk_mq_quiesce_queue(q);
 
        wbt_set_min_lat(q, val);
-       wbt_update_limits(q);
 
        blk_mq_unquiesce_queue(q);
        blk_mq_unfreeze_queue(q);
@@ -989,13 +988,11 @@ int blk_register_queue(struct gendisk *disk)
                blk_mq_debugfs_register(q);
        }
 
-       /*
-        * The flag of QUEUE_FLAG_REGISTERED isn't set yet, so elevator
-        * switch won't happen at all.
-        */
+       mutex_lock(&q->sysfs_lock);
        if (q->elevator) {
                ret = elv_register_queue(q, false);
                if (ret) {
+                       mutex_unlock(&q->sysfs_lock);
                        mutex_unlock(&q->sysfs_dir_lock);
                        kobject_del(&q->kobj);
                        blk_trace_remove_sysfs(dev);
@@ -1005,7 +1002,6 @@ int blk_register_queue(struct gendisk *disk)
                has_elevator = true;
        }
 
-       mutex_lock(&q->sysfs_lock);
        blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
        wbt_enable_default(q);
        blk_throtl_register_queue(q);
@@ -1062,12 +1058,10 @@ void blk_unregister_queue(struct gendisk *disk)
        kobject_del(&q->kobj);
        blk_trace_remove_sysfs(disk_to_dev(disk));
 
-       /*
-        * q->kobj has been removed, so it is safe to check if elevator
-        * exists without holding q->sysfs_lock.
-        */
+       mutex_lock(&q->sysfs_lock);
        if (q->elevator)
                elv_unregister_queue(q);
+       mutex_unlock(&q->sysfs_lock);
        mutex_unlock(&q->sysfs_dir_lock);
 
        kobject_put(&disk_to_dev(disk)->kobj);
index 8af553a..8641ba9 100644 (file)
@@ -308,7 +308,8 @@ static void calc_wb_limits(struct rq_wb *rwb)
 
 static void scale_up(struct rq_wb *rwb)
 {
-       rq_depth_scale_up(&rwb->rq_depth);
+       if (!rq_depth_scale_up(&rwb->rq_depth))
+               return;
        calc_wb_limits(rwb);
        rwb->unknown_cnt = 0;
        rwb_wake_all(rwb);
@@ -317,7 +318,8 @@ static void scale_up(struct rq_wb *rwb)
 
 static void scale_down(struct rq_wb *rwb, bool hard_throttle)
 {
-       rq_depth_scale_down(&rwb->rq_depth, hard_throttle);
+       if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle))
+               return;
        calc_wb_limits(rwb);
        rwb->unknown_cnt = 0;
        rwb_trace_step(rwb, "scale down");
index ed347f7..47fba93 100644 (file)
@@ -19,6 +19,7 @@ struct blk_flush_queue {
        unsigned int            flush_queue_delayed:1;
        unsigned int            flush_pending_idx:1;
        unsigned int            flush_running_idx:1;
+       blk_status_t            rq_status;
        unsigned long           flush_pending_since;
        struct list_head        flush_queue[2];
        struct list_head        flush_data_in_flight;
@@ -47,6 +48,12 @@ static inline void __blk_get_queue(struct request_queue *q)
        kobject_get(&q->kobj);
 }
 
+static inline bool
+is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx)
+{
+       return hctx->fq->flush_rq == req;
+}
+
 struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
                int node, int cmd_size, gfp_t flags);
 void blk_free_flush_queue(struct blk_flush_queue *q);
@@ -194,6 +201,8 @@ void elv_unregister_queue(struct request_queue *q);
 static inline void elevator_exit(struct request_queue *q,
                struct elevator_queue *e)
 {
+       lockdep_assert_held(&q->sysfs_lock);
+
        blk_mq_sched_free_requests(q);
        __elevator_exit(q, e);
 }
index 785dd58..347dda1 100644 (file)
@@ -266,6 +266,7 @@ static blk_status_t bsg_queue_rq(struct blk_mq_hw_ctx *hctx,
        struct request *req = bd->rq;
        struct bsg_set *bset =
                container_of(q->tag_set, struct bsg_set, tag_set);
+       int sts = BLK_STS_IOERR;
        int ret;
 
        blk_mq_start_request(req);
@@ -274,14 +275,15 @@ static blk_status_t bsg_queue_rq(struct blk_mq_hw_ctx *hctx,
                return BLK_STS_IOERR;
 
        if (!bsg_prepare_job(dev, req))
-               return BLK_STS_IOERR;
+               goto out;
 
        ret = bset->job_fn(blk_mq_rq_to_pdu(req));
-       if (ret)
-               return BLK_STS_IOERR;
+       if (!ret)
+               sts = BLK_STS_OK;
 
+out:
        put_device(dev);
-       return BLK_STS_OK;
+       return sts;
 }
 
 /* called right after the request is allocated for the request_queue */
index bba10e8..076ba73 100644 (file)
@@ -503,9 +503,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
                if (uevent)
                        kobject_uevent(&e->kobj, KOBJ_ADD);
 
-               mutex_lock(&q->sysfs_lock);
                e->registered = 1;
-               mutex_unlock(&q->sysfs_lock);
        }
        return error;
 }
@@ -523,11 +521,9 @@ void elv_unregister_queue(struct request_queue *q)
                kobject_uevent(&e->kobj, KOBJ_REMOVE);
                kobject_del(&e->kobj);
 
-               mutex_lock(&q->sysfs_lock);
                e->registered = 0;
                /* Re-enable throttling in case elevator disabled it */
                wbt_enable_default(q);
-               mutex_unlock(&q->sysfs_lock);
        }
 }
 
@@ -590,32 +586,11 @@ int elevator_switch_mq(struct request_queue *q,
        lockdep_assert_held(&q->sysfs_lock);
 
        if (q->elevator) {
-               if (q->elevator->registered) {
-                       mutex_unlock(&q->sysfs_lock);
-
-                       /*
-                        * Concurrent elevator switch can't happen becasue
-                        * sysfs write is always exclusively on same file.
-                        *
-                        * Also the elevator queue won't be freed after
-                        * sysfs_lock is released becasue kobject_del() in
-                        * blk_unregister_queue() waits for completion of
-                        * .store & .show on its attributes.
-                        */
+               if (q->elevator->registered)
                        elv_unregister_queue(q);
 
-                       mutex_lock(&q->sysfs_lock);
-               }
                ioc_clear_queue(q);
                elevator_exit(q, q->elevator);
-
-               /*
-                * sysfs_lock may be dropped, so re-check if queue is
-                * unregistered. If yes, don't switch to new elevator
-                * any more
-                */
-               if (!blk_queue_registered(q))
-                       return 0;
        }
 
        ret = blk_mq_init_sched(q, new_e);
@@ -623,11 +598,7 @@ int elevator_switch_mq(struct request_queue *q,
                goto out;
 
        if (new_e) {
-               mutex_unlock(&q->sysfs_lock);
-
                ret = elv_register_queue(q, true);
-
-               mutex_lock(&q->sysfs_lock);
                if (ret) {
                        elevator_exit(q, q->elevator);
                        goto out;
@@ -645,7 +616,8 @@ out:
 
 static inline bool elv_support_iosched(struct request_queue *q)
 {
-       if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))
+       if (!q->mq_ops ||
+           (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)))
                return false;
        return true;
 }
index 4e95a97..b4c7619 100644 (file)
@@ -129,7 +129,7 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = {
                { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x84, 0x01 },
 
        /* tables */
-       [OPAL_TABLE_TABLE]
+       [OPAL_TABLE_TABLE] =
                { 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01 },
        [OPAL_LOCKINGRANGE_GLOBAL] =
                { 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x01 },
@@ -372,8 +372,8 @@ static void check_geometry(struct opal_dev *dev, const void *data)
 {
        const struct d0_geometry_features *geo = data;
 
-       dev->align = geo->alignment_granularity;
-       dev->lowest_lba = geo->lowest_aligned_lba;
+       dev->align = be64_to_cpu(geo->alignment_granularity);
+       dev->lowest_lba = be64_to_cpu(geo->lowest_aligned_lba);
 }
 
 static int execute_step(struct opal_dev *dev,
index 0c00946..9803c7e 100644 (file)
@@ -27,7 +27,7 @@ static __be16 t10_pi_ip_fn(void *data, unsigned int len)
  * tag.
  */
 static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
-               csum_fn *fn, unsigned int type)
+               csum_fn *fn, enum t10_dif_type type)
 {
        unsigned int i;
 
@@ -37,7 +37,7 @@ static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
                pi->guard_tag = fn(iter->data_buf, iter->interval);
                pi->app_tag = 0;
 
-               if (type == 1)
+               if (type == T10_PI_TYPE1_PROTECTION)
                        pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed));
                else
                        pi->ref_tag = 0;
@@ -51,17 +51,18 @@ static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
 }
 
 static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
-               csum_fn *fn, unsigned int type)
+               csum_fn *fn, enum t10_dif_type type)
 {
        unsigned int i;
 
+       BUG_ON(type == T10_PI_TYPE0_PROTECTION);
+
        for (i = 0 ; i < iter->data_size ; i += iter->interval) {
                struct t10_pi_tuple *pi = iter->prot_buf;
                __be16 csum;
 
-               switch (type) {
-               case 1:
-               case 2:
+               if (type == T10_PI_TYPE1_PROTECTION ||
+                   type == T10_PI_TYPE2_PROTECTION) {
                        if (pi->app_tag == T10_PI_APP_ESCAPE)
                                goto next;
 
@@ -73,12 +74,10 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
                                       iter->seed, be32_to_cpu(pi->ref_tag));
                                return BLK_STS_PROTECTION;
                        }
-                       break;
-               case 3:
+               } else if (type == T10_PI_TYPE3_PROTECTION) {
                        if (pi->app_tag == T10_PI_APP_ESCAPE &&
                            pi->ref_tag == T10_PI_REF_ESCAPE)
                                goto next;
-                       break;
                }
 
                csum = fn(iter->data_buf, iter->interval);
@@ -102,94 +101,40 @@ next:
 
 static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
 {
-       return t10_pi_generate(iter, t10_pi_crc_fn, 1);
+       return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE1_PROTECTION);
 }
 
 static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
 {
-       return t10_pi_generate(iter, t10_pi_ip_fn, 1);
+       return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION);
 }
 
 static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
 {
-       return t10_pi_verify(iter, t10_pi_crc_fn, 1);
+       return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE1_PROTECTION);
 }
 
 static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
 {
-       return t10_pi_verify(iter, t10_pi_ip_fn, 1);
-}
-
-static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
-{
-       return t10_pi_generate(iter, t10_pi_crc_fn, 3);
-}
-
-static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
-{
-       return t10_pi_generate(iter, t10_pi_ip_fn, 3);
-}
-
-static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
-{
-       return t10_pi_verify(iter, t10_pi_crc_fn, 3);
+       return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION);
 }
 
-static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
-{
-       return t10_pi_verify(iter, t10_pi_ip_fn, 3);
-}
-
-const struct blk_integrity_profile t10_pi_type1_crc = {
-       .name                   = "T10-DIF-TYPE1-CRC",
-       .generate_fn            = t10_pi_type1_generate_crc,
-       .verify_fn              = t10_pi_type1_verify_crc,
-};
-EXPORT_SYMBOL(t10_pi_type1_crc);
-
-const struct blk_integrity_profile t10_pi_type1_ip = {
-       .name                   = "T10-DIF-TYPE1-IP",
-       .generate_fn            = t10_pi_type1_generate_ip,
-       .verify_fn              = t10_pi_type1_verify_ip,
-};
-EXPORT_SYMBOL(t10_pi_type1_ip);
-
-const struct blk_integrity_profile t10_pi_type3_crc = {
-       .name                   = "T10-DIF-TYPE3-CRC",
-       .generate_fn            = t10_pi_type3_generate_crc,
-       .verify_fn              = t10_pi_type3_verify_crc,
-};
-EXPORT_SYMBOL(t10_pi_type3_crc);
-
-const struct blk_integrity_profile t10_pi_type3_ip = {
-       .name                   = "T10-DIF-TYPE3-IP",
-       .generate_fn            = t10_pi_type3_generate_ip,
-       .verify_fn              = t10_pi_type3_verify_ip,
-};
-EXPORT_SYMBOL(t10_pi_type3_ip);
-
 /**
- * t10_pi_prepare - prepare PI prior submitting request to device
+ * t10_pi_type1_prepare - prepare PI prior submitting request to device
  * @rq:              request with PI that should be prepared
- * @protection_type: PI type (Type 1/Type 2/Type 3)
  *
  * For Type 1/Type 2, the virtual start sector is the one that was
  * originally submitted by the block layer for the ref_tag usage. Due to
  * partitioning, MD/DM cloning, etc. the actual physical start sector is
  * likely to be different. Remap protection information to match the
  * physical LBA.
- *
- * Type 3 does not have a reference tag so no remapping is required.
  */
-void t10_pi_prepare(struct request *rq, u8 protection_type)
+static void t10_pi_type1_prepare(struct request *rq)
 {
        const int tuple_sz = rq->q->integrity.tuple_size;
        u32 ref_tag = t10_pi_ref_tag(rq);
        struct bio *bio;
 
-       if (protection_type == T10_PI_TYPE3_PROTECTION)
-               return;
-
        __rq_for_each_bio(bio, rq) {
                struct bio_integrity_payload *bip = bio_integrity(bio);
                u32 virt = bip_get_seed(bip) & 0xffffffff;
@@ -222,13 +167,11 @@ void t10_pi_prepare(struct request *rq, u8 protection_type)
                bip->bip_flags |= BIP_MAPPED_INTEGRITY;
        }
 }
-EXPORT_SYMBOL(t10_pi_prepare);
 
 /**
- * t10_pi_complete - prepare PI prior returning request to the block layer
+ * t10_pi_type1_complete - prepare PI prior returning request to the blk layer
  * @rq:              request with PI that should be prepared
- * @protection_type: PI type (Type 1/Type 2/Type 3)
- * @intervals:       total elements to prepare
+ * @nr_bytes:        total bytes to prepare
  *
  * For Type 1/Type 2, the virtual start sector is the one that was
  * originally submitted by the block layer for the ref_tag usage. Due to
@@ -236,19 +179,14 @@ EXPORT_SYMBOL(t10_pi_prepare);
  * likely to be different. Since the physical start sector was submitted
  * to the device, we should remap it back to virtual values expected by the
  * block layer.
- *
- * Type 3 does not have a reference tag so no remapping is required.
  */
-void t10_pi_complete(struct request *rq, u8 protection_type,
-                    unsigned int intervals)
+static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
 {
+       unsigned intervals = nr_bytes >> rq->q->integrity.interval_exp;
        const int tuple_sz = rq->q->integrity.tuple_size;
        u32 ref_tag = t10_pi_ref_tag(rq);
        struct bio *bio;
 
-       if (protection_type == T10_PI_TYPE3_PROTECTION)
-               return;
-
        __rq_for_each_bio(bio, rq) {
                struct bio_integrity_payload *bip = bio_integrity(bio);
                u32 virt = bip_get_seed(bip) & 0xffffffff;
@@ -276,4 +214,73 @@ void t10_pi_complete(struct request *rq, u8 protection_type,
                }
        }
 }
-EXPORT_SYMBOL(t10_pi_complete);
+
+static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+{
+       return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
+{
+       return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+{
+       return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
+{
+       return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+/**
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+static void t10_pi_type3_prepare(struct request *rq)
+{
+}
+
+/**
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+static void t10_pi_type3_complete(struct request *rq, unsigned int nr_bytes)
+{
+}
+
+const struct blk_integrity_profile t10_pi_type1_crc = {
+       .name                   = "T10-DIF-TYPE1-CRC",
+       .generate_fn            = t10_pi_type1_generate_crc,
+       .verify_fn              = t10_pi_type1_verify_crc,
+       .prepare_fn             = t10_pi_type1_prepare,
+       .complete_fn            = t10_pi_type1_complete,
+};
+EXPORT_SYMBOL(t10_pi_type1_crc);
+
+const struct blk_integrity_profile t10_pi_type1_ip = {
+       .name                   = "T10-DIF-TYPE1-IP",
+       .generate_fn            = t10_pi_type1_generate_ip,
+       .verify_fn              = t10_pi_type1_verify_ip,
+       .prepare_fn             = t10_pi_type1_prepare,
+       .complete_fn            = t10_pi_type1_complete,
+};
+EXPORT_SYMBOL(t10_pi_type1_ip);
+
+const struct blk_integrity_profile t10_pi_type3_crc = {
+       .name                   = "T10-DIF-TYPE3-CRC",
+       .generate_fn            = t10_pi_type3_generate_crc,
+       .verify_fn              = t10_pi_type3_verify_crc,
+       .prepare_fn             = t10_pi_type3_prepare,
+       .complete_fn            = t10_pi_type3_complete,
+};
+EXPORT_SYMBOL(t10_pi_type3_crc);
+
+const struct blk_integrity_profile t10_pi_type3_ip = {
+       .name                   = "T10-DIF-TYPE3-IP",
+       .generate_fn            = t10_pi_type3_generate_ip,
+       .verify_fn              = t10_pi_type3_verify_ip,
+       .prepare_fn             = t10_pi_type3_prepare,
+       .complete_fn            = t10_pi_type3_complete,
+};
+EXPORT_SYMBOL(t10_pi_type3_ip);
index 1eba08a..7982911 100644 (file)
@@ -190,33 +190,27 @@ late_initcall(load_system_certificate_list);
 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
 
 /**
- * verify_pkcs7_signature - Verify a PKCS#7-based signature on system data.
+ * verify_pkcs7_message_sig - Verify a PKCS#7-based signature on system data.
  * @data: The data to be verified (NULL if expecting internal data).
  * @len: Size of @data.
- * @raw_pkcs7: The PKCS#7 message that is the signature.
- * @pkcs7_len: The size of @raw_pkcs7.
+ * @pkcs7: The PKCS#7 message that is the signature.
  * @trusted_keys: Trusted keys to use (NULL for builtin trusted keys only,
  *                                     (void *)1UL for all trusted keys).
  * @usage: The use to which the key is being put.
  * @view_content: Callback to gain access to content.
  * @ctx: Context for callback.
  */
-int verify_pkcs7_signature(const void *data, size_t len,
-                          const void *raw_pkcs7, size_t pkcs7_len,
-                          struct key *trusted_keys,
-                          enum key_being_used_for usage,
-                          int (*view_content)(void *ctx,
-                                              const void *data, size_t len,
-                                              size_t asn1hdrlen),
-                          void *ctx)
+int verify_pkcs7_message_sig(const void *data, size_t len,
+                            struct pkcs7_message *pkcs7,
+                            struct key *trusted_keys,
+                            enum key_being_used_for usage,
+                            int (*view_content)(void *ctx,
+                                                const void *data, size_t len,
+                                                size_t asn1hdrlen),
+                            void *ctx)
 {
-       struct pkcs7_message *pkcs7;
        int ret;
 
-       pkcs7 = pkcs7_parse_message(raw_pkcs7, pkcs7_len);
-       if (IS_ERR(pkcs7))
-               return PTR_ERR(pkcs7);
-
        /* The data should be detached - so we need to supply it. */
        if (data && pkcs7_supply_detached_data(pkcs7, data, len) < 0) {
                pr_err("PKCS#7 signature with non-detached data\n");
@@ -269,6 +263,41 @@ int verify_pkcs7_signature(const void *data, size_t len,
        }
 
 error:
+       pr_devel("<==%s() = %d\n", __func__, ret);
+       return ret;
+}
+
+/**
+ * verify_pkcs7_signature - Verify a PKCS#7-based signature on system data.
+ * @data: The data to be verified (NULL if expecting internal data).
+ * @len: Size of @data.
+ * @raw_pkcs7: The PKCS#7 message that is the signature.
+ * @pkcs7_len: The size of @raw_pkcs7.
+ * @trusted_keys: Trusted keys to use (NULL for builtin trusted keys only,
+ *                                     (void *)1UL for all trusted keys).
+ * @usage: The use to which the key is being put.
+ * @view_content: Callback to gain access to content.
+ * @ctx: Context for callback.
+ */
+int verify_pkcs7_signature(const void *data, size_t len,
+                          const void *raw_pkcs7, size_t pkcs7_len,
+                          struct key *trusted_keys,
+                          enum key_being_used_for usage,
+                          int (*view_content)(void *ctx,
+                                              const void *data, size_t len,
+                                              size_t asn1hdrlen),
+                          void *ctx)
+{
+       struct pkcs7_message *pkcs7;
+       int ret;
+
+       pkcs7 = pkcs7_parse_message(raw_pkcs7, pkcs7_len);
+       if (IS_ERR(pkcs7))
+               return PTR_ERR(pkcs7);
+
+       ret = verify_pkcs7_message_sig(data, len, pkcs7, trusted_keys, usage,
+                                      view_content, ctx);
+
        pkcs7_free_message(pkcs7);
        pr_devel("<==%s() = %d\n", __func__, ret);
        return ret;
index 11bee67..ce49820 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/err.h>
 #include <linux/asn1.h>
 #include <crypto/hash.h>
+#include <crypto/hash_info.h>
 #include <crypto/public_key.h>
 #include "pkcs7_parser.h"
 
@@ -29,6 +30,10 @@ static int pkcs7_digest(struct pkcs7_message *pkcs7,
 
        kenter(",%u,%s", sinfo->index, sinfo->sig->hash_algo);
 
+       /* The digest was calculated already. */
+       if (sig->digest)
+               return 0;
+
        if (!sinfo->sig->hash_algo)
                return -ENOPKG;
 
@@ -117,6 +122,34 @@ error_no_desc:
        return ret;
 }
 
+int pkcs7_get_digest(struct pkcs7_message *pkcs7, const u8 **buf, u32 *len,
+                    enum hash_algo *hash_algo)
+{
+       struct pkcs7_signed_info *sinfo = pkcs7->signed_infos;
+       int i, ret;
+
+       /*
+        * This function doesn't support messages with more than one signature.
+        */
+       if (sinfo == NULL || sinfo->next != NULL)
+               return -EBADMSG;
+
+       ret = pkcs7_digest(pkcs7, sinfo);
+       if (ret)
+               return ret;
+
+       *buf = sinfo->sig->digest;
+       *len = sinfo->sig->digest_size;
+
+       for (i = 0; i < HASH_ALGO__LAST; i++)
+               if (!strcmp(hash_algo_name[i], sinfo->sig->hash_algo)) {
+                       *hash_algo = i;
+                       break;
+               }
+
+       return 0;
+}
+
 /*
  * Find the key (X.509 certificate) to use to verify a PKCS#7 message.  PKCS#7
  * uses the issuer's name and the issuing certificate serial number for
index 3b303fe..cc9dbce 100644 (file)
@@ -96,7 +96,7 @@ static int pefile_parse_binary(const void *pebuf, unsigned int pelen,
 
        if (!ddir->certs.virtual_address || !ddir->certs.size) {
                pr_debug("Unsigned PE binary\n");
-               return -EKEYREJECTED;
+               return -ENODATA;
        }
 
        chkaddr(ctx->header_size, ddir->certs.virtual_address,
@@ -403,6 +403,8 @@ error_no_desc:
  *  (*) 0 if at least one signature chain intersects with the keys in the trust
  *     keyring, or:
  *
+ *  (*) -ENODATA if there is no signature present.
+ *
  *  (*) -ENOPKG if a suitable crypto module couldn't be found for a check on a
  *     chain.
  *
index 7cd0c9a..71511ae 100644 (file)
@@ -160,11 +160,17 @@ static const struct apd_device_desc hip08_i2c_desc = {
        .setup = acpi_apd_setup,
        .fixed_clk_rate = 250000000,
 };
+
 static const struct apd_device_desc thunderx2_i2c_desc = {
        .setup = acpi_apd_setup,
        .fixed_clk_rate = 125000000,
 };
 
+static const struct apd_device_desc nxp_i2c_desc = {
+       .setup = acpi_apd_setup,
+       .fixed_clk_rate = 350000000,
+};
+
 static const struct apd_device_desc hip08_spi_desc = {
        .setup = acpi_apd_setup,
        .fixed_clk_rate = 250000000,
@@ -238,6 +244,7 @@ static const struct acpi_device_id acpi_apd_device_ids[] = {
        { "HISI02A1", APD_ADDR(hip07_i2c_desc) },
        { "HISI02A2", APD_ADDR(hip08_i2c_desc) },
        { "HISI0173", APD_ADDR(hip08_spi_desc) },
+       { "NXP0001", APD_ADDR(nxp_i2c_desc) },
 #endif
        { }
 };
index 3b25259..a1a858a 100644 (file)
@@ -905,8 +905,8 @@ void acpi_cppc_processor_exit(struct acpi_processor *pr)
                        pcc_data[pcc_ss_id]->refcount--;
                        if (!pcc_data[pcc_ss_id]->refcount) {
                                pcc_mbox_free_channel(pcc_data[pcc_ss_id]->pcc_channel);
-                               pcc_data[pcc_ss_id]->pcc_channel_acquired = 0;
                                kfree(pcc_data[pcc_ss_id]);
+                               pcc_data[pcc_ss_id] = NULL;
                        }
                }
        }
index fd66a73..b097ef2 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/uaccess.h>
 #include <linux/debugfs.h>
 #include <linux/acpi.h>
+#include <linux/security.h>
 
 #include "internal.h"
 
@@ -29,6 +30,11 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf,
 
        struct acpi_table_header table;
        acpi_status status;
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_ACPI_TABLES);
+       if (ret)
+               return ret;
 
        if (!(*ppos)) {
                /* parse the table header to get the table length */
index 8f9a28a..8b0de8a 100644 (file)
@@ -403,7 +403,7 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade
                pr_info("HMAT: Memory Flags:%04x Processor Domain:%d Memory Domain:%d\n",
                        p->flags, p->processor_PD, p->memory_PD);
 
-       if (p->flags & ACPI_HMAT_MEMORY_PD_VALID) {
+       if (p->flags & ACPI_HMAT_MEMORY_PD_VALID && hmat_revision == 1) {
                target = find_mem_target(p->memory_PD);
                if (!target) {
                        pr_debug("HMAT: Memory Domain missing from SRAT\n");
index 1413324..14e68f2 100644 (file)
@@ -1322,7 +1322,7 @@ static ssize_t scrub_show(struct device *dev,
        nfit_device_lock(dev);
        nd_desc = dev_get_drvdata(dev);
        if (!nd_desc) {
-               device_unlock(dev);
+               nfit_device_unlock(dev);
                return rc;
        }
        acpi_desc = to_acpi_desc(nd_desc);
index 2f9d0d2..a2e844a 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/list.h>
 #include <linux/jiffies.h>
 #include <linux/semaphore.h>
+#include <linux/security.h>
 
 #include <asm/io.h>
 #include <linux/uaccess.h>
@@ -182,8 +183,19 @@ acpi_physical_address __init acpi_os_get_root_pointer(void)
        acpi_physical_address pa;
 
 #ifdef CONFIG_KEXEC
-       if (acpi_rsdp)
+       /*
+        * We may have been provided with an RSDP on the command line,
+        * but if a malicious user has done so they may be pointing us
+        * at modified ACPI tables that could alter kernel behaviour -
+        * so, we check the lockdown status before making use of
+        * it. If we trust it then also stash it in an architecture
+        * specific location (if appropriate) so it can be carried
+        * over further kexec()s.
+        */
+       if (acpi_rsdp && !security_locked_down(LOCKDOWN_ACPI_TABLES)) {
+               acpi_arch_set_root_pointer(acpi_rsdp);
                return acpi_rsdp;
+       }
 #endif
        pa = acpi_arch_get_root_pointer();
        if (pa)
index 314a187..d1e666e 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/pm_runtime.h>
 #include <linux/pci.h>
 #include <linux/pci-acpi.h>
-#include <linux/pci-aspm.h>
 #include <linux/dmar.h>
 #include <linux/acpi.h>
 #include <linux/slab.h>
index 08da9c2..62114a0 100644 (file)
@@ -290,14 +290,13 @@ static int acpi_processor_notifier(struct notifier_block *nb,
                                   unsigned long event, void *data)
 {
        struct cpufreq_policy *policy = data;
-       int cpu = policy->cpu;
 
        if (event == CPUFREQ_CREATE_POLICY) {
-               acpi_thermal_cpufreq_init(cpu);
-               acpi_processor_ppc_init(cpu);
+               acpi_thermal_cpufreq_init(policy);
+               acpi_processor_ppc_init(policy);
        } else if (event == CPUFREQ_REMOVE_POLICY) {
-               acpi_processor_ppc_exit(cpu);
-               acpi_thermal_cpufreq_exit(cpu);
+               acpi_processor_ppc_exit(policy);
+               acpi_thermal_cpufreq_exit(policy);
        }
 
        return 0;
index 2261713..5909e8f 100644 (file)
@@ -81,10 +81,10 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
        pr->performance_platform_limit = (int)ppc;
 
        if (ppc >= pr->performance->state_count ||
-           unlikely(!dev_pm_qos_request_active(&pr->perflib_req)))
+           unlikely(!freq_qos_request_active(&pr->perflib_req)))
                return 0;
 
-       ret = dev_pm_qos_update_request(&pr->perflib_req,
+       ret = freq_qos_update_request(&pr->perflib_req,
                        pr->performance->states[ppc].core_frequency * 1000);
        if (ret < 0) {
                pr_warn("Failed to update perflib freq constraint: CPU%d (%d)\n",
@@ -157,26 +157,36 @@ void acpi_processor_ignore_ppc_init(void)
                ignore_ppc = 0;
 }
 
-void acpi_processor_ppc_init(int cpu)
+void acpi_processor_ppc_init(struct cpufreq_policy *policy)
 {
-       struct acpi_processor *pr = per_cpu(processors, cpu);
-       int ret;
+       unsigned int cpu;
 
-       ret = dev_pm_qos_add_request(get_cpu_device(cpu),
-                                    &pr->perflib_req, DEV_PM_QOS_MAX_FREQUENCY,
-                                    INT_MAX);
-       if (ret < 0) {
-               pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu,
-                      ret);
-               return;
+       for_each_cpu(cpu, policy->related_cpus) {
+               struct acpi_processor *pr = per_cpu(processors, cpu);
+               int ret;
+
+               if (!pr)
+                       continue;
+
+               ret = freq_qos_add_request(&policy->constraints,
+                                          &pr->perflib_req,
+                                          FREQ_QOS_MAX, INT_MAX);
+               if (ret < 0)
+                       pr_err("Failed to add freq constraint for CPU%d (%d)\n",
+                              cpu, ret);
        }
 }
 
-void acpi_processor_ppc_exit(int cpu)
+void acpi_processor_ppc_exit(struct cpufreq_policy *policy)
 {
-       struct acpi_processor *pr = per_cpu(processors, cpu);
+       unsigned int cpu;
 
-       dev_pm_qos_remove_request(&pr->perflib_req);
+       for_each_cpu(cpu, policy->related_cpus) {
+               struct acpi_processor *pr = per_cpu(processors, cpu);
+
+               if (pr)
+                       freq_qos_remove_request(&pr->perflib_req);
+       }
 }
 
 static int acpi_processor_get_performance_control(struct acpi_processor *pr)
index ec2638f..41feb88 100644 (file)
@@ -105,7 +105,7 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state)
 
                pr = per_cpu(processors, i);
 
-               if (unlikely(!dev_pm_qos_request_active(&pr->thermal_req)))
+               if (unlikely(!freq_qos_request_active(&pr->thermal_req)))
                        continue;
 
                policy = cpufreq_cpu_get(i);
@@ -116,7 +116,7 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state)
 
                cpufreq_cpu_put(policy);
 
-               ret = dev_pm_qos_update_request(&pr->thermal_req, max_freq);
+               ret = freq_qos_update_request(&pr->thermal_req, max_freq);
                if (ret < 0) {
                        pr_warn("Failed to update thermal freq constraint: CPU%d (%d)\n",
                                pr->id, ret);
@@ -125,26 +125,36 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state)
        return 0;
 }
 
-void acpi_thermal_cpufreq_init(int cpu)
+void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy)
 {
-       struct acpi_processor *pr = per_cpu(processors, cpu);
-       int ret;
-
-       ret = dev_pm_qos_add_request(get_cpu_device(cpu),
-                                    &pr->thermal_req, DEV_PM_QOS_MAX_FREQUENCY,
-                                    INT_MAX);
-       if (ret < 0) {
-               pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu,
-                      ret);
-               return;
+       unsigned int cpu;
+
+       for_each_cpu(cpu, policy->related_cpus) {
+               struct acpi_processor *pr = per_cpu(processors, cpu);
+               int ret;
+
+               if (!pr)
+                       continue;
+
+               ret = freq_qos_add_request(&policy->constraints,
+                                          &pr->thermal_req,
+                                          FREQ_QOS_MAX, INT_MAX);
+               if (ret < 0)
+                       pr_err("Failed to add freq constraint for CPU%d (%d)\n",
+                              cpu, ret);
        }
 }
 
-void acpi_thermal_cpufreq_exit(int cpu)
+void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy)
 {
-       struct acpi_processor *pr = per_cpu(processors, cpu);
+       unsigned int cpu;
+
+       for_each_cpu(cpu, policy->related_cpus) {
+               struct acpi_processor *pr = per_cpu(processors, policy->cpu);
 
-       dev_pm_qos_remove_request(&pr->thermal_req);
+               if (pr)
+                       freq_qos_remove_request(&pr->thermal_req);
+       }
 }
 #else                          /* ! CONFIG_CPU_FREQ */
 static int cpufreq_get_max_state(unsigned int cpu)
index 9fa77d7..2af937a 100644 (file)
@@ -361,19 +361,6 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = {
                DMI_MATCH(DMI_PRODUCT_NAME, "80E3"),
                },
        },
-       /*
-        * https://bugzilla.kernel.org/show_bug.cgi?id=196907
-        * Some Dell XPS13 9360 cannot do suspend-to-idle using the Low Power
-        * S0 Idle firmware interface.
-        */
-       {
-       .callback = init_default_s3,
-       .ident = "Dell XPS13 9360",
-       .matches = {
-               DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-               DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9360"),
-               },
-       },
        /*
         * ThinkPad X1 Tablet(2016) cannot do suspend-to-idle using
         * the Low Power S0 Idle firmware interface (see
index b323277..180ac43 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/memblock.h>
 #include <linux/earlycpio.h>
 #include <linux/initrd.h>
+#include <linux/security.h>
 #include "internal.h"
 
 #ifdef CONFIG_ACPI_CUSTOM_DSDT
@@ -578,6 +579,11 @@ void __init acpi_table_upgrade(void)
        if (table_nr == 0)
                return;
 
+       if (security_locked_down(LOCKDOWN_ACPI_TABLES)) {
+               pr_notice("kernel is locked down, ignoring table override\n");
+               return;
+       }
+
        acpi_tables_addr =
                memblock_find_in_range(0, ACPI_TABLE_UPGRADE_MAX_PHYS,
                                       all_tables_size, PAGE_SIZE);
index f39f075..fe15236 100644 (file)
@@ -409,9 +409,11 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
                 */
                rstc = of_reset_control_array_get_optional_shared(dev->dev.of_node);
                if (IS_ERR(rstc)) {
-                       if (PTR_ERR(rstc) != -EPROBE_DEFER)
-                               dev_err(&dev->dev, "Can't get amba reset!\n");
-                       return PTR_ERR(rstc);
+                       ret = PTR_ERR(rstc);
+                       if (ret != -EPROBE_DEFER)
+                               dev_err(&dev->dev, "can't get reset: %d\n",
+                                       ret);
+                       goto err_reset;
                }
                reset_control_deassert(rstc);
                reset_control_put(rstc);
@@ -472,6 +474,12 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
        release_resource(&dev->res);
  err_out:
        return ret;
+
+ err_reset:
+       amba_put_disable_pclk(dev);
+       iounmap(tmp);
+       dev_pm_domain_detach(&dev->dev, true);
+       goto err_release;
 }
 
 /*
index c0a4912..265d9dd 100644 (file)
@@ -57,6 +57,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/seq_file.h>
+#include <linux/string.h>
 #include <linux/uaccess.h>
 #include <linux/pid_namespace.h>
 #include <linux/security.h>
@@ -66,6 +67,7 @@
 #include <linux/task_work.h>
 
 #include <uapi/linux/android/binder.h>
+#include <uapi/linux/android/binderfs.h>
 
 #include <asm/cacheflush.h>
 
@@ -95,10 +97,6 @@ DEFINE_SHOW_ATTRIBUTE(proc);
 #define SZ_1K                               0x400
 #endif
 
-#ifndef SZ_4M
-#define SZ_4M                               0x400000
-#endif
-
 #define FORBIDDEN_MMAP_FLAGS                (VM_WRITE)
 
 enum {
@@ -2876,7 +2874,7 @@ static void binder_transaction(struct binder_proc *proc,
        e->target_handle = tr->target.handle;
        e->data_size = tr->data_size;
        e->offsets_size = tr->offsets_size;
-       e->context_name = proc->context->name;
+       strscpy(e->context_name, proc->context->name, BINDERFS_MAX_NAME);
 
        if (reply) {
                binder_inner_proc_lock(proc);
@@ -5175,9 +5173,6 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
        if (proc->tsk != current->group_leader)
                return -EINVAL;
 
-       if ((vma->vm_end - vma->vm_start) > SZ_4M)
-               vma->vm_end = vma->vm_start + SZ_4M;
-
        binder_debug(BINDER_DEBUG_OPEN_CLOSE,
                     "%s: %d %lx-%lx (%ld K) vma %lx pagep %lx\n",
                     __func__, proc->pid, vma->vm_start, vma->vm_end,
index 6d79a1b..eb76a82 100644 (file)
@@ -22,6 +22,7 @@
 #include <asm/cacheflush.h>
 #include <linux/uaccess.h>
 #include <linux/highmem.h>
+#include <linux/sizes.h>
 #include "binder_alloc.h"
 #include "binder_trace.h"
 
@@ -156,7 +157,7 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked(
 }
 
 /**
- * binder_alloc_buffer_lookup() - get buffer given user ptr
+ * binder_alloc_prepare_to_free() - get buffer given user ptr
  * @alloc:     binder_alloc for this proc
  * @user_ptr:  User pointer to buffer data
  *
@@ -689,7 +690,9 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc,
        alloc->buffer = (void __user *)vma->vm_start;
        mutex_unlock(&binder_alloc_mmap_lock);
 
-       alloc->pages = kcalloc((vma->vm_end - vma->vm_start) / PAGE_SIZE,
+       alloc->buffer_size = min_t(unsigned long, vma->vm_end - vma->vm_start,
+                                  SZ_4M);
+       alloc->pages = kcalloc(alloc->buffer_size / PAGE_SIZE,
                               sizeof(alloc->pages[0]),
                               GFP_KERNEL);
        if (alloc->pages == NULL) {
@@ -697,7 +700,6 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc,
                failure_string = "alloc page array";
                goto err_alloc_pages_failed;
        }
-       alloc->buffer_size = vma->vm_end - vma->vm_start;
 
        buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
        if (!buffer) {
index bd47f7f..ae99109 100644 (file)
@@ -130,7 +130,7 @@ struct binder_transaction_log_entry {
        int return_error_line;
        uint32_t return_error;
        uint32_t return_error_param;
-       const char *context_name;
+       char context_name[BINDERFS_MAX_NAME + 1];
 };
 
 struct binder_transaction_log {
index dd92faf..05c2b32 100644 (file)
@@ -1600,7 +1600,9 @@ static void ahci_intel_pcs_quirk(struct pci_dev *pdev, struct ahci_host_priv *hp
         */
        if (!id || id->vendor != PCI_VENDOR_ID_INTEL)
                return;
-       if (((enum board_ids) id->driver_data) < board_ahci_pcs7)
+
+       /* Skip applying the quirk on Denverton and beyond */
+       if (((enum board_ids) id->driver_data) >= board_ahci_pcs7)
                return;
 
        /*
index 9e9583a..8befce0 100644 (file)
@@ -153,17 +153,13 @@ int ahci_platform_enable_regulators(struct ahci_host_priv *hpriv)
 {
        int rc, i;
 
-       if (hpriv->ahci_regulator) {
-               rc = regulator_enable(hpriv->ahci_regulator);
-               if (rc)
-                       return rc;
-       }
+       rc = regulator_enable(hpriv->ahci_regulator);
+       if (rc)
+               return rc;
 
-       if (hpriv->phy_regulator) {
-               rc = regulator_enable(hpriv->phy_regulator);
-               if (rc)
-                       goto disable_ahci_pwrs;
-       }
+       rc = regulator_enable(hpriv->phy_regulator);
+       if (rc)
+               goto disable_ahci_pwrs;
 
        for (i = 0; i < hpriv->nports; i++) {
                if (!hpriv->target_pwrs[i])
@@ -181,11 +177,9 @@ disable_target_pwrs:
                if (hpriv->target_pwrs[i])
                        regulator_disable(hpriv->target_pwrs[i]);
 
-       if (hpriv->phy_regulator)
-               regulator_disable(hpriv->phy_regulator);
+       regulator_disable(hpriv->phy_regulator);
 disable_ahci_pwrs:
-       if (hpriv->ahci_regulator)
-               regulator_disable(hpriv->ahci_regulator);
+       regulator_disable(hpriv->ahci_regulator);
        return rc;
 }
 EXPORT_SYMBOL_GPL(ahci_platform_enable_regulators);
@@ -207,10 +201,8 @@ void ahci_platform_disable_regulators(struct ahci_host_priv *hpriv)
                regulator_disable(hpriv->target_pwrs[i]);
        }
 
-       if (hpriv->ahci_regulator)
-               regulator_disable(hpriv->ahci_regulator);
-       if (hpriv->phy_regulator)
-               regulator_disable(hpriv->phy_regulator);
+       regulator_disable(hpriv->ahci_regulator);
+       regulator_disable(hpriv->phy_regulator);
 }
 EXPORT_SYMBOL_GPL(ahci_platform_disable_regulators);
 /**
@@ -359,7 +351,7 @@ static int ahci_platform_get_regulator(struct ahci_host_priv *hpriv, u32 port,
        struct regulator *target_pwr;
        int rc = 0;
 
-       target_pwr = regulator_get_optional(dev, "target");
+       target_pwr = regulator_get(dev, "target");
 
        if (!IS_ERR(target_pwr))
                hpriv->target_pwrs[port] = target_pwr;
@@ -436,16 +428,14 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
                hpriv->clks[i] = clk;
        }
 
-       hpriv->ahci_regulator = devm_regulator_get_optional(dev, "ahci");
+       hpriv->ahci_regulator = devm_regulator_get(dev, "ahci");
        if (IS_ERR(hpriv->ahci_regulator)) {
                rc = PTR_ERR(hpriv->ahci_regulator);
-               if (rc == -EPROBE_DEFER)
+               if (rc != 0)
                        goto err_out;
-               rc = 0;
-               hpriv->ahci_regulator = NULL;
        }
 
-       hpriv->phy_regulator = devm_regulator_get_optional(dev, "phy");
+       hpriv->phy_regulator = devm_regulator_get(dev, "phy");
        if (IS_ERR(hpriv->phy_regulator)) {
                rc = PTR_ERR(hpriv->phy_regulator);
                if (rc == -EPROBE_DEFER)
@@ -497,6 +487,7 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
 
                        if (of_property_read_u32(child, "reg", &port)) {
                                rc = -EINVAL;
+                               of_node_put(child);
                                goto err_out;
                        }
 
@@ -514,14 +505,18 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
                        if (port_dev) {
                                rc = ahci_platform_get_regulator(hpriv, port,
                                                                &port_dev->dev);
-                               if (rc == -EPROBE_DEFER)
+                               if (rc == -EPROBE_DEFER) {
+                                       of_node_put(child);
                                        goto err_out;
+                               }
                        }
 #endif
 
                        rc = ahci_platform_get_phy(hpriv, port, dev, child);
-                       if (rc)
+                       if (rc) {
+                               of_node_put(child);
                                goto err_out;
+                       }
 
                        enabled_ports++;
                }
index 76d0f9d..58e09ff 100644 (file)
@@ -4791,27 +4791,6 @@ void ata_scsi_hotplug(struct work_struct *work)
                return;
        }
 
-       /*
-        * XXX - UGLY HACK
-        *
-        * The block layer suspend/resume path is fundamentally broken due
-        * to freezable kthreads and workqueue and may deadlock if a block
-        * device gets removed while resume is in progress.  I don't know
-        * what the solution is short of removing freezable kthreads and
-        * workqueues altogether.
-        *
-        * The following is an ugly hack to avoid kicking off device
-        * removal while freezer is active.  This is a joke but does avoid
-        * this particular deadlock scenario.
-        *
-        * https://bugzilla.kernel.org/show_bug.cgi?id=62801
-        * http://marc.info/?l=linux-kernel&m=138695698516487
-        */
-#ifdef CONFIG_FREEZER
-       while (pm_freezing)
-               msleep(10);
-#endif
-
        DPRINTK("ENTER\n");
        mutex_lock(&ap->scsi_scan_mutex);
 
index 70b00ae..8af793f 100644 (file)
@@ -1690,7 +1690,7 @@ he_service_rbrq(struct he_dev *he_dev, int group)
 
                if (RBRQ_HBUF_ERR(he_dev->rbrq_head)) {
                        hprintk("HBUF_ERR!  (cid 0x%x)\n", cid);
-                               atomic_inc(&vcc->stats->rx_drop);
+                       atomic_inc(&vcc->stats->rx_drop);
                        goto return_host_buffers;
                }
 
index 2db62d9..7bd9cd3 100644 (file)
@@ -9,6 +9,7 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/cpufreq.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/fwnode.h>
@@ -3179,6 +3180,8 @@ void device_shutdown(void)
        wait_for_device_probe();
        device_block_probing();
 
+       cpufreq_suspend();
+
        spin_lock(&devices_kset->list_lock);
        /*
         * Walk the devices list backward, shutting down each in turn.
index 20c39d1..55907c2 100644 (file)
@@ -100,26 +100,9 @@ unsigned long __weak memory_block_size_bytes(void)
 }
 EXPORT_SYMBOL_GPL(memory_block_size_bytes);
 
-static unsigned long get_memory_block_size(void)
-{
-       unsigned long block_sz;
-
-       block_sz = memory_block_size_bytes();
-
-       /* Validate blk_sz is a power of 2 and not less than section size */
-       if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) {
-               WARN_ON(1);
-               block_sz = MIN_MEMORY_BLOCK_SIZE;
-       }
-
-       return block_sz;
-}
-
 /*
- * use this as the physical section index that this memsection
- * uses.
+ * Show the first physical section index (number) of this memory block.
  */
-
 static ssize_t phys_index_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
 {
@@ -131,7 +114,10 @@ static ssize_t phys_index_show(struct device *dev,
 }
 
 /*
- * Show whether the section of memory is likely to be hot-removable
+ * Show whether the memory block is likely to be offlineable (or is already
+ * offline). Once offline, the memory block could be removed. The return
+ * value does, however, not indicate that there is a way to remove the
+ * memory block.
  */
 static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
                              char *buf)
@@ -455,12 +441,12 @@ static DEVICE_ATTR_RO(phys_device);
 static DEVICE_ATTR_RO(removable);
 
 /*
- * Block size attribute stuff
+ * Show the memory block size (shared by all memory blocks).
  */
 static ssize_t block_size_bytes_show(struct device *dev,
                                     struct device_attribute *attr, char *buf)
 {
-       return sprintf(buf, "%lx\n", get_memory_block_size());
+       return sprintf(buf, "%lx\n", memory_block_size_bytes());
 }
 
 static DEVICE_ATTR_RO(block_size_bytes);
@@ -554,6 +540,9 @@ static ssize_t soft_offline_page_store(struct device *dev,
        pfn >>= PAGE_SHIFT;
        if (!pfn_valid(pfn))
                return -ENXIO;
+       /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */
+       if (!pfn_to_online_page(pfn))
+               return -EIO;
        ret = soft_offline_page(pfn_to_page(pfn), 0);
        return ret == 0 ? count : ret;
 }
@@ -670,10 +659,10 @@ static int init_memory_block(struct memory_block **memory,
                return -ENOMEM;
 
        mem->start_section_nr = block_id * sections_per_block;
-       mem->end_section_nr = mem->start_section_nr + sections_per_block - 1;
        mem->state = state;
        start_pfn = section_nr_to_pfn(mem->start_section_nr);
        mem->phys_device = arch_get_memory_phys_device(start_pfn);
+       mem->nid = NUMA_NO_NODE;
 
        ret = register_memory(mem);
 
@@ -810,19 +799,22 @@ static const struct attribute_group *memory_root_attr_groups[] = {
 /*
  * Initialize the sysfs support for memory devices...
  */
-int __init memory_dev_init(void)
+void __init memory_dev_init(void)
 {
        int ret;
        int err;
        unsigned long block_sz, nr;
 
+       /* Validate the configured memory block size */
+       block_sz = memory_block_size_bytes();
+       if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE)
+               panic("Memory block size not suitable: 0x%lx\n", block_sz);
+       sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+
        ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);
        if (ret)
                goto out;
 
-       block_sz = get_memory_block_size();
-       sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
-
        /*
         * Create entries for memory sections that were found
         * during boot and have been initialized
@@ -838,8 +830,7 @@ int __init memory_dev_init(void)
 
 out:
        if (ret)
-               printk(KERN_ERR "%s() failed: %d\n", __func__, ret);
-       return ret;
+               panic("%s() failed: %d\n", __func__, ret);
 }
 
 /**
index 75b7e6f..296546f 100644 (file)
@@ -427,6 +427,8 @@ static ssize_t node_read_meminfo(struct device *dev,
                       "Node %d AnonHugePages:  %8lu kB\n"
                       "Node %d ShmemHugePages: %8lu kB\n"
                       "Node %d ShmemPmdMapped: %8lu kB\n"
+                      "Node %d FileHugePages: %8lu kB\n"
+                      "Node %d FilePmdMapped: %8lu kB\n"
 #endif
                        ,
                       nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
@@ -452,6 +454,10 @@ static ssize_t node_read_meminfo(struct device *dev,
                       nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
                                       HPAGE_PMD_NR),
                       nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
+                                      HPAGE_PMD_NR),
+                      nid, K(node_page_state(pgdat, NR_FILE_THPS) *
+                                      HPAGE_PMD_NR),
+                      nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED) *
                                       HPAGE_PMD_NR)
 #endif
                       );
@@ -756,15 +762,13 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
 static int register_mem_sect_under_node(struct memory_block *mem_blk,
                                         void *arg)
 {
+       unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE;
+       unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
+       unsigned long end_pfn = start_pfn + memory_block_pfns - 1;
        int ret, nid = *(int *)arg;
-       unsigned long pfn, sect_start_pfn, sect_end_pfn;
+       unsigned long pfn;
 
-       mem_blk->nid = nid;
-
-       sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
-       sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
-       sect_end_pfn += PAGES_PER_SECTION - 1;
-       for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
+       for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
                int page_nid;
 
                /*
@@ -789,6 +793,13 @@ static int register_mem_sect_under_node(struct memory_block *mem_blk,
                        if (page_nid != nid)
                                continue;
                }
+
+               /*
+                * If this memory block spans multiple nodes, we only indicate
+                * the last processed node.
+                */
+               mem_blk->nid = nid;
+
                ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
                                        &mem_blk->dev.kobj,
                                        kobject_name(&mem_blk->dev.kobj));
@@ -804,32 +815,18 @@ static int register_mem_sect_under_node(struct memory_block *mem_blk,
 }
 
 /*
- * Unregister memory block device under all nodes that it spans.
- * Has to be called with mem_sysfs_mutex held (due to unlinked_nodes).
+ * Unregister a memory block device under the node it spans. Memory blocks
+ * with multiple nodes cannot be offlined and therefore also never be removed.
  */
 void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
 {
-       unsigned long pfn, sect_start_pfn, sect_end_pfn;
-       static nodemask_t unlinked_nodes;
-
-       nodes_clear(unlinked_nodes);
-       sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
-       sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
-       for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
-               int nid;
+       if (mem_blk->nid == NUMA_NO_NODE)
+               return;
 
-               nid = get_nid_for_pfn(pfn);
-               if (nid < 0)
-                       continue;
-               if (!node_online(nid))
-                       continue;
-               if (node_test_and_set(nid, unlinked_nodes))
-                       continue;
-               sysfs_remove_link(&node_devices[nid]->dev.kobj,
-                        kobject_name(&mem_blk->dev.kobj));
-               sysfs_remove_link(&mem_blk->dev.kobj,
-                        kobject_name(&node_devices[nid]->dev.kobj));
-       }
+       sysfs_remove_link(&node_devices[mem_blk->nid]->dev.kobj,
+                         kobject_name(&mem_blk->dev.kobj));
+       sysfs_remove_link(&mem_blk->dev.kobj,
+                         kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
 }
 
 int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn)
index b6c6c7d..b230beb 100644 (file)
@@ -241,12 +241,8 @@ struct resource *platform_get_resource_byname(struct platform_device *dev,
 }
 EXPORT_SYMBOL_GPL(platform_get_resource_byname);
 
-/**
- * platform_get_irq_byname - get an IRQ for a device by name
- * @dev: platform device
- * @name: IRQ name
- */
-int platform_get_irq_byname(struct platform_device *dev, const char *name)
+static int __platform_get_irq_byname(struct platform_device *dev,
+                                    const char *name)
 {
        struct resource *r;
 
@@ -262,11 +258,47 @@ int platform_get_irq_byname(struct platform_device *dev, const char *name)
        if (r)
                return r->start;
 
-       dev_err(&dev->dev, "IRQ %s not found\n", name);
        return -ENXIO;
 }
+
+/**
+ * platform_get_irq_byname - get an IRQ for a device by name
+ * @dev: platform device
+ * @name: IRQ name
+ *
+ * Get an IRQ like platform_get_irq(), but then by name rather then by index.
+ *
+ * Return: IRQ number on success, negative error number on failure.
+ */
+int platform_get_irq_byname(struct platform_device *dev, const char *name)
+{
+       int ret;
+
+       ret = __platform_get_irq_byname(dev, name);
+       if (ret < 0 && ret != -EPROBE_DEFER)
+               dev_err(&dev->dev, "IRQ %s not found\n", name);
+
+       return ret;
+}
 EXPORT_SYMBOL_GPL(platform_get_irq_byname);
 
+/**
+ * platform_get_irq_byname_optional - get an optional IRQ for a device by name
+ * @dev: platform device
+ * @name: IRQ name
+ *
+ * Get an optional IRQ by name like platform_get_irq_byname(). Except that it
+ * does not print an error message if an IRQ can not be obtained.
+ *
+ * Return: IRQ number on success, negative error number on failure.
+ */
+int platform_get_irq_byname_optional(struct platform_device *dev,
+                                    const char *name)
+{
+       return __platform_get_irq_byname(dev, name);
+}
+EXPORT_SYMBOL_GPL(platform_get_irq_byname_optional);
+
 /**
  * platform_add_devices - add a numbers of platform devices
  * @devs: array of platform devices to add
index 6c90fd7..350dcaf 100644 (file)
@@ -115,20 +115,10 @@ s32 dev_pm_qos_read_value(struct device *dev, enum dev_pm_qos_req_type type)
 
        spin_lock_irqsave(&dev->power.lock, flags);
 
-       switch (type) {
-       case DEV_PM_QOS_RESUME_LATENCY:
+       if (type == DEV_PM_QOS_RESUME_LATENCY) {
                ret = IS_ERR_OR_NULL(qos) ? PM_QOS_RESUME_LATENCY_NO_CONSTRAINT
                        : pm_qos_read_value(&qos->resume_latency);
-               break;
-       case DEV_PM_QOS_MIN_FREQUENCY:
-               ret = IS_ERR_OR_NULL(qos) ? PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE
-                       : pm_qos_read_value(&qos->min_frequency);
-               break;
-       case DEV_PM_QOS_MAX_FREQUENCY:
-               ret = IS_ERR_OR_NULL(qos) ? PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE
-                       : pm_qos_read_value(&qos->max_frequency);
-               break;
-       default:
+       } else {
                WARN_ON(1);
                ret = 0;
        }
@@ -169,14 +159,6 @@ static int apply_constraint(struct dev_pm_qos_request *req,
                        req->dev->power.set_latency_tolerance(req->dev, value);
                }
                break;
-       case DEV_PM_QOS_MIN_FREQUENCY:
-               ret = pm_qos_update_target(&qos->min_frequency,
-                                          &req->data.pnode, action, value);
-               break;
-       case DEV_PM_QOS_MAX_FREQUENCY:
-               ret = pm_qos_update_target(&qos->max_frequency,
-                                          &req->data.pnode, action, value);
-               break;
        case DEV_PM_QOS_FLAGS:
                ret = pm_qos_update_flags(&qos->flags, &req->data.flr,
                                          action, value);
@@ -227,24 +209,6 @@ static int dev_pm_qos_constraints_allocate(struct device *dev)
        c->no_constraint_value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT;
        c->type = PM_QOS_MIN;
 
-       c = &qos->min_frequency;
-       plist_head_init(&c->list);
-       c->target_value = PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE;
-       c->default_value = PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE;
-       c->no_constraint_value = PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE;
-       c->type = PM_QOS_MAX;
-       c->notifiers = ++n;
-       BLOCKING_INIT_NOTIFIER_HEAD(n);
-
-       c = &qos->max_frequency;
-       plist_head_init(&c->list);
-       c->target_value = PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE;
-       c->default_value = PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE;
-       c->no_constraint_value = PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE;
-       c->type = PM_QOS_MIN;
-       c->notifiers = ++n;
-       BLOCKING_INIT_NOTIFIER_HEAD(n);
-
        INIT_LIST_HEAD(&qos->flags.list);
 
        spin_lock_irq(&dev->power.lock);
@@ -305,18 +269,6 @@ void dev_pm_qos_constraints_destroy(struct device *dev)
                memset(req, 0, sizeof(*req));
        }
 
-       c = &qos->min_frequency;
-       plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) {
-               apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE);
-               memset(req, 0, sizeof(*req));
-       }
-
-       c = &qos->max_frequency;
-       plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) {
-               apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
-               memset(req, 0, sizeof(*req));
-       }
-
        f = &qos->flags;
        list_for_each_entry_safe(req, tmp, &f->list, data.flr.node) {
                apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
@@ -428,8 +380,6 @@ static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req,
        switch(req->type) {
        case DEV_PM_QOS_RESUME_LATENCY:
        case DEV_PM_QOS_LATENCY_TOLERANCE:
-       case DEV_PM_QOS_MIN_FREQUENCY:
-       case DEV_PM_QOS_MAX_FREQUENCY:
                curr_value = req->data.pnode.prio;
                break;
        case DEV_PM_QOS_FLAGS:
@@ -557,14 +507,6 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier,
                ret = blocking_notifier_chain_register(dev->power.qos->resume_latency.notifiers,
                                                       notifier);
                break;
-       case DEV_PM_QOS_MIN_FREQUENCY:
-               ret = blocking_notifier_chain_register(dev->power.qos->min_frequency.notifiers,
-                                                      notifier);
-               break;
-       case DEV_PM_QOS_MAX_FREQUENCY:
-               ret = blocking_notifier_chain_register(dev->power.qos->max_frequency.notifiers,
-                                                      notifier);
-               break;
        default:
                WARN_ON(1);
                ret = -EINVAL;
@@ -604,14 +546,6 @@ int dev_pm_qos_remove_notifier(struct device *dev,
                ret = blocking_notifier_chain_unregister(dev->power.qos->resume_latency.notifiers,
                                                         notifier);
                break;
-       case DEV_PM_QOS_MIN_FREQUENCY:
-               ret = blocking_notifier_chain_unregister(dev->power.qos->min_frequency.notifiers,
-                                                        notifier);
-               break;
-       case DEV_PM_QOS_MAX_FREQUENCY:
-               ret = blocking_notifier_chain_unregister(dev->power.qos->max_frequency.notifiers,
-                                                        notifier);
-               break;
        default:
                WARN_ON(1);
                ret = -EINVAL;
index c589865..651bd02 100644 (file)
@@ -13,33 +13,10 @@ sector_t interval_end(struct rb_node *node)
        return this->end;
 }
 
-/**
- * compute_subtree_last  -  compute end of @node
- *
- * The end of an interval is the highest (start + (size >> 9)) value of this
- * node and of its children.  Called for @node and its parents whenever the end
- * may have changed.
- */
-static inline sector_t
-compute_subtree_last(struct drbd_interval *node)
-{
-       sector_t max = node->sector + (node->size >> 9);
-
-       if (node->rb.rb_left) {
-               sector_t left = interval_end(node->rb.rb_left);
-               if (left > max)
-                       max = left;
-       }
-       if (node->rb.rb_right) {
-               sector_t right = interval_end(node->rb.rb_right);
-               if (right > max)
-                       max = right;
-       }
-       return max;
-}
+#define NODE_END(node) ((node)->sector + ((node)->size >> 9))
 
-RB_DECLARE_CALLBACKS(static, augment_callbacks, struct drbd_interval, rb,
-                    sector_t, end, compute_subtree_last);
+RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks,
+                        struct drbd_interval, rb, sector_t, end, NODE_END);
 
 /**
  * drbd_insert_interval  -  insert a new interval into a tree
index 1410fa8..f6f77ea 100644 (file)
@@ -994,6 +994,16 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
        if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
                blk_queue_write_cache(lo->lo_queue, true, false);
 
+       if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) {
+               /* In case of direct I/O, match underlying block size */
+               unsigned short bsize = bdev_logical_block_size(
+                       inode->i_sb->s_bdev);
+
+               blk_queue_logical_block_size(lo->lo_queue, bsize);
+               blk_queue_physical_block_size(lo->lo_queue, bsize);
+               blk_queue_io_min(lo->lo_queue, bsize);
+       }
+
        loop_update_rotational(lo);
        loop_update_dio(lo);
        set_capacity(lo->lo_disk, size);
index a8e3815..a94ee45 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/ioctl.h>
 #include <linux/mutex.h>
 #include <linux/compiler.h>
+#include <linux/completion.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -71,14 +72,17 @@ struct link_dead_args {
        int index;
 };
 
-#define NBD_TIMEDOUT                   0
+#define NBD_RT_TIMEDOUT                        0
+#define NBD_RT_DISCONNECT_REQUESTED    1
+#define NBD_RT_DISCONNECTED            2
+#define NBD_RT_HAS_PID_FILE            3
+#define NBD_RT_HAS_CONFIG_REF          4
+#define NBD_RT_BOUND                   5
+#define NBD_RT_DESTROY_ON_DISCONNECT   6
+#define NBD_RT_DISCONNECT_ON_CLOSE     7
+
+#define NBD_DESTROY_ON_DISCONNECT      0
 #define NBD_DISCONNECT_REQUESTED       1
-#define NBD_DISCONNECTED               2
-#define NBD_HAS_PID_FILE               3
-#define NBD_HAS_CONFIG_REF             4
-#define NBD_BOUND                      5
-#define NBD_DESTROY_ON_DISCONNECT      6
-#define NBD_DISCONNECT_ON_CLOSE        7
 
 struct nbd_config {
        u32 flags;
@@ -113,6 +117,9 @@ struct nbd_device {
        struct list_head list;
        struct task_struct *task_recv;
        struct task_struct *task_setup;
+
+       struct completion *destroy_complete;
+       unsigned long flags;
 };
 
 #define NBD_CMD_REQUEUED       1
@@ -223,6 +230,16 @@ static void nbd_dev_remove(struct nbd_device *nbd)
                disk->private_data = NULL;
                put_disk(disk);
        }
+
+       /*
+        * Place this in the last just before the nbd is freed to
+        * make sure that the disk and the related kobject are also
+        * totally removed to avoid duplicate creation of the same
+        * one.
+        */
+       if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
+               complete(nbd->destroy_complete);
+
        kfree(nbd);
 }
 
@@ -231,15 +248,15 @@ static void nbd_put(struct nbd_device *nbd)
        if (refcount_dec_and_mutex_lock(&nbd->refs,
                                        &nbd_index_mutex)) {
                idr_remove(&nbd_index_idr, nbd->index);
-               mutex_unlock(&nbd_index_mutex);
                nbd_dev_remove(nbd);
+               mutex_unlock(&nbd_index_mutex);
        }
 }
 
 static int nbd_disconnected(struct nbd_config *config)
 {
-       return test_bit(NBD_DISCONNECTED, &config->runtime_flags) ||
-               test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
+       return test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags) ||
+               test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
 }
 
 static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
@@ -257,9 +274,9 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
        if (!nsock->dead) {
                kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
                if (atomic_dec_return(&nbd->config->live_connections) == 0) {
-                       if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED,
+                       if (test_and_clear_bit(NBD_RT_DISCONNECT_REQUESTED,
                                               &nbd->config->runtime_flags)) {
-                               set_bit(NBD_DISCONNECTED,
+                               set_bit(NBD_RT_DISCONNECTED,
                                        &nbd->config->runtime_flags);
                                dev_info(nbd_to_dev(nbd),
                                        "Disconnected due to user request.\n");
@@ -333,7 +350,7 @@ static void sock_shutdown(struct nbd_device *nbd)
 
        if (config->num_connections == 0)
                return;
-       if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags))
+       if (test_and_set_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
                return;
 
        for (i = 0; i < config->num_connections; i++) {
@@ -368,17 +385,16 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
        struct nbd_device *nbd = cmd->nbd;
        struct nbd_config *config;
 
+       if (!mutex_trylock(&cmd->lock))
+               return BLK_EH_RESET_TIMER;
+
        if (!refcount_inc_not_zero(&nbd->config_refs)) {
                cmd->status = BLK_STS_TIMEOUT;
+               mutex_unlock(&cmd->lock);
                goto done;
        }
        config = nbd->config;
 
-       if (!mutex_trylock(&cmd->lock)) {
-               nbd_config_put(nbd);
-               return BLK_EH_RESET_TIMER;
-       }
-
        if (config->num_connections > 1) {
                dev_err_ratelimited(nbd_to_dev(nbd),
                                    "Connection timed out, retrying (%d/%d alive)\n",
@@ -427,7 +443,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
        }
 
        dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
-       set_bit(NBD_TIMEDOUT, &config->runtime_flags);
+       set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
        cmd->status = BLK_STS_IOERR;
        mutex_unlock(&cmd->lock);
        sock_shutdown(nbd);
@@ -694,6 +710,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
                ret = -ENOENT;
                goto out;
        }
+       if (cmd->status != BLK_STS_OK) {
+               dev_err(disk_to_dev(nbd->disk), "Command already handled %p\n",
+                       req);
+               ret = -ENOENT;
+               goto out;
+       }
        if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) {
                dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n",
                        req);
@@ -775,7 +797,10 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved)
 {
        struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
 
+       mutex_lock(&cmd->lock);
        cmd->status = BLK_STS_IOERR;
+       mutex_unlock(&cmd->lock);
+
        blk_mq_complete_request(req);
        return true;
 }
@@ -795,7 +820,7 @@ static int find_fallback(struct nbd_device *nbd, int index)
        struct nbd_sock *nsock = config->socks[index];
        int fallback = nsock->fallback_index;
 
-       if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+       if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
                return new_index;
 
        if (config->num_connections <= 1) {
@@ -836,7 +861,7 @@ static int wait_for_reconnect(struct nbd_device *nbd)
        struct nbd_config *config = nbd->config;
        if (!config->dead_conn_timeout)
                return 0;
-       if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+       if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
                return 0;
        return wait_event_timeout(config->conn_wait,
                                  atomic_read(&config->live_connections) > 0,
@@ -955,6 +980,25 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
        return ret;
 }
 
+static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd,
+                                    int *err)
+{
+       struct socket *sock;
+
+       *err = 0;
+       sock = sockfd_lookup(fd, err);
+       if (!sock)
+               return NULL;
+
+       if (sock->ops->shutdown == sock_no_shutdown) {
+               dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n");
+               *err = -EINVAL;
+               return NULL;
+       }
+
+       return sock;
+}
+
 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
                          bool netlink)
 {
@@ -964,17 +1008,17 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
        struct nbd_sock *nsock;
        int err;
 
-       sock = sockfd_lookup(arg, &err);
+       sock = nbd_get_socket(nbd, arg, &err);
        if (!sock)
                return err;
 
        if (!netlink && !nbd->task_setup &&
-           !test_bit(NBD_BOUND, &config->runtime_flags))
+           !test_bit(NBD_RT_BOUND, &config->runtime_flags))
                nbd->task_setup = current;
 
        if (!netlink &&
            (nbd->task_setup != current ||
-            test_bit(NBD_BOUND, &config->runtime_flags))) {
+            test_bit(NBD_RT_BOUND, &config->runtime_flags))) {
                dev_err(disk_to_dev(nbd->disk),
                        "Device being setup by another task");
                sockfd_put(sock);
@@ -1016,7 +1060,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
        int i;
        int err;
 
-       sock = sockfd_lookup(arg, &err);
+       sock = nbd_get_socket(nbd, arg, &err);
        if (!sock)
                return err;
 
@@ -1053,7 +1097,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
                mutex_unlock(&nsock->tx_lock);
                sockfd_put(old);
 
-               clear_bit(NBD_DISCONNECTED, &config->runtime_flags);
+               clear_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
 
                /* We take the tx_mutex in an error path in the recv_work, so we
                 * need to queue_work outside of the tx_mutex.
@@ -1124,7 +1168,8 @@ static int nbd_disconnect(struct nbd_device *nbd)
        struct nbd_config *config = nbd->config;
 
        dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
-       set_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
+       set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
+       set_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags);
        send_disconnects(nbd);
        return 0;
 }
@@ -1143,7 +1188,7 @@ static void nbd_config_put(struct nbd_device *nbd)
                struct nbd_config *config = nbd->config;
                nbd_dev_dbg_close(nbd);
                nbd_size_clear(nbd);
-               if (test_and_clear_bit(NBD_HAS_PID_FILE,
+               if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
                                       &config->runtime_flags))
                        device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
                nbd->task_recv = NULL;
@@ -1209,7 +1254,7 @@ static int nbd_start_device(struct nbd_device *nbd)
                dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
                return error;
        }
-       set_bit(NBD_HAS_PID_FILE, &config->runtime_flags);
+       set_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags);
 
        nbd_dev_dbg_init(nbd);
        for (i = 0; i < num_connections; i++) {
@@ -1256,9 +1301,9 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
        mutex_lock(&nbd->config_lock);
        nbd_bdev_reset(bdev);
        /* user requested, ignore socket errors */
-       if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags))
+       if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
                ret = 0;
-       if (test_bit(NBD_TIMEDOUT, &config->runtime_flags))
+       if (test_bit(NBD_RT_TIMEDOUT, &config->runtime_flags))
                ret = -ETIMEDOUT;
        return ret;
 }
@@ -1269,7 +1314,7 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
        sock_shutdown(nbd);
        __invalidate_device(bdev, true);
        nbd_bdev_reset(bdev);
-       if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+       if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
                               &nbd->config->runtime_flags))
                nbd_config_put(nbd);
 }
@@ -1364,7 +1409,7 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
        /* Don't allow ioctl operations on a nbd device that was created with
         * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine.
         */
-       if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+       if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
            (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK))
                error = __nbd_ioctl(bdev, nbd, cmd, arg);
        else
@@ -1435,7 +1480,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
        struct nbd_device *nbd = disk->private_data;
        struct block_device *bdev = bdget_disk(disk, 0);
 
-       if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
+       if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
                        bdev->bd_openers == 0)
                nbd_disconnect_and_put(nbd);
 
@@ -1636,6 +1681,7 @@ static int nbd_dev_add(int index)
        nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
                BLK_MQ_F_BLOCKING;
        nbd->tag_set.driver_data = nbd;
+       nbd->destroy_complete = NULL;
 
        err = blk_mq_alloc_tag_set(&nbd->tag_set);
        if (err)
@@ -1750,6 +1796,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
 
 static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 {
+       DECLARE_COMPLETION_ONSTACK(destroy_complete);
        struct nbd_device *nbd = NULL;
        struct nbd_config *config;
        int index = -1;
@@ -1801,6 +1848,17 @@ again:
                mutex_unlock(&nbd_index_mutex);
                return -EINVAL;
        }
+
+       if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
+           test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
+               nbd->destroy_complete = &destroy_complete;
+               mutex_unlock(&nbd_index_mutex);
+
+               /* Wait untill the the nbd stuff is totally destroyed */
+               wait_for_completion(&destroy_complete);
+               goto again;
+       }
+
        if (!refcount_inc_not_zero(&nbd->refs)) {
                mutex_unlock(&nbd_index_mutex);
                if (index == -1)
@@ -1833,7 +1891,7 @@ again:
                return -ENOMEM;
        }
        refcount_set(&nbd->config_refs, 1);
-       set_bit(NBD_BOUND, &config->runtime_flags);
+       set_bit(NBD_RT_BOUND, &config->runtime_flags);
 
        ret = nbd_genl_size_set(info, nbd);
        if (ret)
@@ -1853,12 +1911,15 @@ again:
        if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
                u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
                if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
-                       set_bit(NBD_DESTROY_ON_DISCONNECT,
+                       set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
                                &config->runtime_flags);
+                       set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
                        put_dev = true;
+               } else {
+                       clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
                }
                if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
-                       set_bit(NBD_DISCONNECT_ON_CLOSE,
+                       set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
                                &config->runtime_flags);
                }
        }
@@ -1897,7 +1958,7 @@ again:
 out:
        mutex_unlock(&nbd->config_lock);
        if (!ret) {
-               set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags);
+               set_bit(NBD_RT_HAS_CONFIG_REF, &config->runtime_flags);
                refcount_inc(&nbd->config_refs);
                nbd_connect_reply(info, nbd->index);
        }
@@ -1919,7 +1980,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
         * queue.
         */
        flush_workqueue(nbd->recv_workq);
-       if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+       if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
                               &nbd->config->runtime_flags))
                nbd_config_put(nbd);
 }
@@ -2003,7 +2064,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
 
        mutex_lock(&nbd->config_lock);
        config = nbd->config;
-       if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+       if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
            !nbd->task_recv) {
                dev_err(nbd_to_dev(nbd),
                        "not configured, cannot reconfigure\n");
@@ -2026,20 +2087,22 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
        if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
                u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
                if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
-                       if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
+                       if (!test_and_set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
                                              &config->runtime_flags))
                                put_dev = true;
+                       set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
                } else {
-                       if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
+                       if (test_and_clear_bit(NBD_RT_DESTROY_ON_DISCONNECT,
                                               &config->runtime_flags))
                                refcount_inc(&nbd->refs);
+                       clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
                }
 
                if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
-                       set_bit(NBD_DISCONNECT_ON_CLOSE,
+                       set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
                                        &config->runtime_flags);
                } else {
-                       clear_bit(NBD_DISCONNECT_ON_CLOSE,
+                       clear_bit(NBD_RT_DISCONNECT_ON_CLOSE,
                                        &config->runtime_flags);
                }
        }
index eabc116..3d7fdea 100644 (file)
@@ -142,8 +142,7 @@ static blk_status_t null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
                zone->wp = zone->start;
                break;
        default:
-               cmd->error = BLK_STS_NOTSUPP;
-               break;
+               return BLK_STS_NOTSUPP;
        }
        return BLK_STS_OK;
 }
index 0240601..7645700 100644 (file)
@@ -2594,7 +2594,6 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
        if (ret)
                return ret;
        if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) {
-               WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
                blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
                return -EINVAL;
        }
index c8fb886..3913667 100644 (file)
@@ -1754,8 +1754,6 @@ static struct rbd_img_request *rbd_img_request_create(
        mutex_init(&img_request->state_mutex);
        kref_init(&img_request->kref);
 
-       dout("%s: rbd_dev %p %s -> img %p\n", __func__, rbd_dev,
-            obj_op_name(op_type), img_request);
        return img_request;
 }
 
@@ -2944,6 +2942,9 @@ static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
        __set_bit(IMG_REQ_CHILD, &child_img_req->flags);
        child_img_req->obj_request = obj_req;
 
+       dout("%s child_img_req %p for obj_req %p\n", __func__, child_img_req,
+            obj_req);
+
        if (!rbd_img_is_write(img_req)) {
                switch (img_req->data_type) {
                case OBJ_REQUEST_BIO:
@@ -4877,6 +4878,9 @@ static void rbd_queue_workfn(struct work_struct *work)
        img_request->rq = rq;
        snapc = NULL; /* img_request consumes a ref */
 
+       dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev,
+            img_request, obj_op_name(op_type), offset, length);
+
        if (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_ZEROOUT)
                result = rbd_img_fill_nodata(img_request, offset, length);
        else
@@ -5669,17 +5673,20 @@ static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
 
 static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
 {
+       size_t size;
        void *reply_buf;
        int ret;
        void *p;
 
-       reply_buf = kzalloc(RBD_OBJ_PREFIX_LEN_MAX, GFP_KERNEL);
+       /* Response will be an encoded string, which includes a length */
+       size = sizeof(__le32) + RBD_OBJ_PREFIX_LEN_MAX;
+       reply_buf = kzalloc(size, GFP_KERNEL);
        if (!reply_buf)
                return -ENOMEM;
 
        ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
                                  &rbd_dev->header_oloc, "get_object_prefix",
-                                 NULL, 0, reply_buf, RBD_OBJ_PREFIX_LEN_MAX);
+                                 NULL, 0, reply_buf, size);
        dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                goto out;
@@ -6632,10 +6639,13 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
        queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
        ret = wait_for_completion_killable_timeout(&rbd_dev->acquire_wait,
                            ceph_timeout_jiffies(rbd_dev->opts->lock_timeout));
-       if (ret > 0)
+       if (ret > 0) {
                ret = rbd_dev->acquire_err;
-       else if (!ret)
-               ret = -ETIMEDOUT;
+       } else {
+               cancel_delayed_work_sync(&rbd_dev->lock_dwork);
+               if (!ret)
+                       ret = -ETIMEDOUT;
+       }
 
        if (ret) {
                rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret);
@@ -6696,7 +6706,6 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
        dout("rbd id object name is %s\n", oid.name);
 
        /* Response will be an encoded string, which includes a length */
-
        size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX;
        response = kzalloc(size, GFP_NOIO);
        if (!response) {
@@ -6708,7 +6717,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
 
        ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc,
                                  "get_id", NULL, 0,
-                                 response, RBD_IMAGE_ID_LEN_MAX);
+                                 response, size);
        dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret == -ENOENT) {
                image_id = kstrdup("", GFP_KERNEL);
index d58a359..4285e75 100644 (file)
@@ -413,13 +413,14 @@ static void reset_bdev(struct zram *zram)
 static ssize_t backing_dev_show(struct device *dev,
                struct device_attribute *attr, char *buf)
 {
+       struct file *file;
        struct zram *zram = dev_to_zram(dev);
-       struct file *file = zram->backing_dev;
        char *p;
        ssize_t ret;
 
        down_read(&zram->init_lock);
-       if (!zram->backing_dev) {
+       file = zram->backing_dev;
+       if (!file) {
                memcpy(buf, "none\n", 5);
                up_read(&zram->init_lock);
                return 5;
index 9207ac2..2b6670d 100644 (file)
@@ -74,6 +74,7 @@ static const char * const clock_names[SYSC_MAX_CLOCKS] = {
  * @clk_disable_quirk: module specific clock disable quirk
  * @reset_done_quirk: module specific reset done quirk
  * @module_enable_quirk: module specific enable quirk
+ * @module_disable_quirk: module specific disable quirk
  */
 struct sysc {
        struct device *dev;
@@ -100,6 +101,7 @@ struct sysc {
        void (*clk_disable_quirk)(struct sysc *sysc);
        void (*reset_done_quirk)(struct sysc *sysc);
        void (*module_enable_quirk)(struct sysc *sysc);
+       void (*module_disable_quirk)(struct sysc *sysc);
 };
 
 static void sysc_parse_dts_quirks(struct sysc *ddata, struct device_node *np,
@@ -280,9 +282,6 @@ static int sysc_get_one_clock(struct sysc *ddata, const char *name)
 
        ddata->clocks[index] = devm_clk_get(ddata->dev, name);
        if (IS_ERR(ddata->clocks[index])) {
-               if (PTR_ERR(ddata->clocks[index]) == -ENOENT)
-                       return 0;
-
                dev_err(ddata->dev, "clock get error for %s: %li\n",
                        name, PTR_ERR(ddata->clocks[index]));
 
@@ -357,7 +356,7 @@ static int sysc_get_clocks(struct sysc *ddata)
                        continue;
 
                error = sysc_get_one_clock(ddata, name);
-               if (error && error != -ENOENT)
+               if (error)
                        return error;
        }
 
@@ -962,6 +961,9 @@ static int sysc_disable_module(struct device *dev)
        if (ddata->offsets[SYSC_SYSCONFIG] == -ENODEV)
                return 0;
 
+       if (ddata->module_disable_quirk)
+               ddata->module_disable_quirk(ddata);
+
        regbits = ddata->cap->regbits;
        reg = sysc_read(ddata, ddata->offsets[SYSC_SYSCONFIG]);
 
@@ -1251,6 +1253,9 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
                   SYSC_MODULE_QUIRK_SGX),
        SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0,
                   SYSC_MODULE_QUIRK_WDT),
+       /* Watchdog on am3 and am4 */
+       SYSC_QUIRK("wdt", 0x44e35000, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0,
+                  SYSC_MODULE_QUIRK_WDT | SYSC_QUIRK_SWSUP_SIDLE),
 
 #ifdef DEBUG
        SYSC_QUIRK("adc", 0, 0, 0x10, -1, 0x47300001, 0xffffffff, 0),
@@ -1443,14 +1448,14 @@ static void sysc_reset_done_quirk_wdt(struct sysc *ddata)
                                   !(val & 0x10), 100,
                                   MAX_MODULE_SOFTRESET_WAIT);
        if (error)
-               dev_warn(ddata->dev, "wdt disable spr failed\n");
+               dev_warn(ddata->dev, "wdt disable step1 failed\n");
 
-       sysc_write(ddata, wps, 0x5555);
+       sysc_write(ddata, spr, 0x5555);
        error = readl_poll_timeout(ddata->module_va + wps, val,
                                   !(val & 0x10), 100,
                                   MAX_MODULE_SOFTRESET_WAIT);
        if (error)
-               dev_warn(ddata->dev, "wdt disable wps failed\n");
+               dev_warn(ddata->dev, "wdt disable step2 failed\n");
 }
 
 static void sysc_init_module_quirks(struct sysc *ddata)
@@ -1474,8 +1479,10 @@ static void sysc_init_module_quirks(struct sysc *ddata)
        if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_SGX)
                ddata->module_enable_quirk = sysc_module_enable_quirk_sgx;
 
-       if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_WDT)
+       if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_WDT) {
                ddata->reset_done_quirk = sysc_reset_done_quirk_wdt;
+               ddata->module_disable_quirk = sysc_reset_done_quirk_wdt;
+       }
 }
 
 static int sysc_clockdomain_init(struct sysc *ddata)
@@ -1632,17 +1639,19 @@ static int sysc_init_module(struct sysc *ddata)
        if (error)
                return error;
 
-       if (manage_clocks) {
-               sysc_clkdm_deny_idle(ddata);
+       sysc_clkdm_deny_idle(ddata);
 
-               error = sysc_enable_opt_clocks(ddata);
-               if (error)
-                       return error;
+       /*
+        * Always enable clocks. The bootloader may or may not have enabled
+        * the related clocks.
+        */
+       error = sysc_enable_opt_clocks(ddata);
+       if (error)
+               return error;
 
-               error = sysc_enable_main_clocks(ddata);
-               if (error)
-                       goto err_opt_clocks;
-       }
+       error = sysc_enable_main_clocks(ddata);
+       if (error)
+               goto err_opt_clocks;
 
        if (!(ddata->cfg.quirks & SYSC_QUIRK_NO_RESET_ON_INIT)) {
                error = sysc_rstctrl_reset_deassert(ddata, true);
@@ -1660,7 +1669,7 @@ static int sysc_init_module(struct sysc *ddata)
                        goto err_main_clocks;
        }
 
-       if (!ddata->legacy_mode && manage_clocks) {
+       if (!ddata->legacy_mode) {
                error = sysc_enable_module(ddata->dev);
                if (error)
                        goto err_main_clocks;
@@ -1677,6 +1686,7 @@ err_main_clocks:
        if (manage_clocks)
                sysc_disable_main_clocks(ddata);
 err_opt_clocks:
+       /* No re-enable of clockdomain autoidle to prevent module autoidle */
        if (manage_clocks) {
                sysc_disable_opt_clocks(ddata);
                sysc_clkdm_allow_idle(ddata);
@@ -2357,6 +2367,27 @@ static void ti_sysc_idle(struct work_struct *work)
 
        ddata = container_of(work, struct sysc, idle_work.work);
 
+       /*
+        * One time decrement of clock usage counts if left on from init.
+        * Note that we disable opt clocks unconditionally in this case
+        * as they are enabled unconditionally during init without
+        * considering sysc_opt_clks_needed() at that point.
+        */
+       if (ddata->cfg.quirks & (SYSC_QUIRK_NO_IDLE |
+                                SYSC_QUIRK_NO_IDLE_ON_INIT)) {
+               sysc_disable_main_clocks(ddata);
+               sysc_disable_opt_clocks(ddata);
+               sysc_clkdm_allow_idle(ddata);
+       }
+
+       /* Keep permanent PM runtime usage count for SYSC_QUIRK_NO_IDLE */
+       if (ddata->cfg.quirks & SYSC_QUIRK_NO_IDLE)
+               return;
+
+       /*
+        * Decrement PM runtime usage count for SYSC_QUIRK_NO_IDLE_ON_INIT
+        * and SYSC_QUIRK_NO_RESET_ON_INIT
+        */
        if (pm_runtime_active(ddata->dev))
                pm_runtime_put_sync(ddata->dev);
 }
@@ -2445,7 +2476,8 @@ static int sysc_probe(struct platform_device *pdev)
        INIT_DELAYED_WORK(&ddata->idle_work, ti_sysc_idle);
 
        /* At least earlycon won't survive without deferred idle */
-       if (ddata->cfg.quirks & (SYSC_QUIRK_NO_IDLE_ON_INIT |
+       if (ddata->cfg.quirks & (SYSC_QUIRK_NO_IDLE |
+                                SYSC_QUIRK_NO_IDLE_ON_INIT |
                                 SYSC_QUIRK_NO_RESET_ON_INIT)) {
                schedule_delayed_work(&ddata->idle_work, 3000);
        } else {
index 9eb564c..43dd089 100644 (file)
@@ -29,8 +29,8 @@
 #include <linux/export.h>
 #include <linux/io.h>
 #include <linux/uio.h>
-
 #include <linux/uaccess.h>
+#include <linux/security.h>
 
 #ifdef CONFIG_IA64
 # include <linux/efi.h>
@@ -807,7 +807,10 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
 
 static int open_port(struct inode *inode, struct file *filp)
 {
-       return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+       if (!capable(CAP_SYS_RAWIO))
+               return -EPERM;
+
+       return security_locked_down(LOCKDOWN_DEV_MEM);
 }
 
 #define zero_lseek     null_lseek
index d3beed0..de434fe 100644 (file)
@@ -1732,6 +1732,56 @@ void get_random_bytes(void *buf, int nbytes)
 }
 EXPORT_SYMBOL(get_random_bytes);
 
+
+/*
+ * Each time the timer fires, we expect that we got an unpredictable
+ * jump in the cycle counter. Even if the timer is running on another
+ * CPU, the timer activity will be touching the stack of the CPU that is
+ * generating entropy..
+ *
+ * Note that we don't re-arm the timer in the timer itself - we are
+ * happy to be scheduled away, since that just makes the load more
+ * complex, but we do not want the timer to keep ticking unless the
+ * entropy loop is running.
+ *
+ * So the re-arming always happens in the entropy loop itself.
+ */
+static void entropy_timer(struct timer_list *t)
+{
+       credit_entropy_bits(&input_pool, 1);
+}
+
+/*
+ * If we have an actual cycle counter, see if we can
+ * generate enough entropy with timing noise
+ */
+static void try_to_generate_entropy(void)
+{
+       struct {
+               unsigned long now;
+               struct timer_list timer;
+       } stack;
+
+       stack.now = random_get_entropy();
+
+       /* Slow counter - or none. Don't even bother */
+       if (stack.now == random_get_entropy())
+               return;
+
+       timer_setup_on_stack(&stack.timer, entropy_timer, 0);
+       while (!crng_ready()) {
+               if (!timer_pending(&stack.timer))
+                       mod_timer(&stack.timer, jiffies+1);
+               mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now));
+               schedule();
+               stack.now = random_get_entropy();
+       }
+
+       del_timer_sync(&stack.timer);
+       destroy_timer_on_stack(&stack.timer);
+       mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now));
+}
+
 /*
  * Wait for the urandom pool to be seeded and thus guaranteed to supply
  * cryptographically secure random numbers. This applies to: the /dev/urandom
@@ -1746,7 +1796,17 @@ int wait_for_random_bytes(void)
 {
        if (likely(crng_ready()))
                return 0;
-       return wait_event_interruptible(crng_init_wait, crng_ready());
+
+       do {
+               int ret;
+               ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ);
+               if (ret)
+                       return ret > 0 ? 0 : ret;
+
+               try_to_generate_entropy();
+       } while (!crng_ready());
+
+       return 0;
 }
 EXPORT_SYMBOL(wait_for_random_bytes);
 
@@ -2460,4 +2520,4 @@ void add_bootloader_randomness(const void *buf, unsigned int size)
        else
                add_device_randomness(buf, size);
 }
-EXPORT_SYMBOL_GPL(add_bootloader_randomness);
\ No newline at end of file
+EXPORT_SYMBOL_GPL(add_bootloader_randomness);
index 1b4f95c..d7a3888 100644 (file)
@@ -320,18 +320,22 @@ int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx,
        if (!chip)
                return -ENODEV;
 
-       for (i = 0; i < chip->nr_allocated_banks; i++)
-               if (digests[i].alg_id != chip->allocated_banks[i].alg_id)
-                       return -EINVAL;
+       for (i = 0; i < chip->nr_allocated_banks; i++) {
+               if (digests[i].alg_id != chip->allocated_banks[i].alg_id) {
+                       rc = EINVAL;
+                       goto out;
+               }
+       }
 
        if (chip->flags & TPM_CHIP_FLAG_TPM2) {
                rc = tpm2_pcr_extend(chip, pcr_idx, digests);
-               tpm_put_ops(chip);
-               return rc;
+               goto out;
        }
 
        rc = tpm1_pcr_extend(chip, pcr_idx, digests[0].digest,
                             "attempting extend a PCR value");
+
+out:
        tpm_put_ops(chip);
        return rc;
 }
@@ -354,14 +358,9 @@ int tpm_send(struct tpm_chip *chip, void *cmd, size_t buflen)
        if (!chip)
                return -ENODEV;
 
-       rc = tpm_buf_init(&buf, 0, 0);
-       if (rc)
-               goto out;
-
-       memcpy(buf.data, cmd, buflen);
+       buf.data = cmd;
        rc = tpm_transmit_cmd(chip, &buf, 0, "attempting to a send a command");
-       tpm_buf_destroy(&buf);
-out:
+
        tpm_put_ops(chip);
        return rc;
 }
index 02c1595..18b0c39 100644 (file)
@@ -9,7 +9,6 @@
 
 #include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/slab.h>
 #include "xillybus.h"
 
index b57fe09..9dd6185 100644 (file)
@@ -683,7 +683,7 @@ static const struct omap_clkctrl_reg_data dra7_l4per2_clkctrl_regs[] __initconst
        { DRA7_L4PER2_MCASP2_CLKCTRL, dra7_mcasp2_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:0154:22" },
        { DRA7_L4PER2_MCASP3_CLKCTRL, dra7_mcasp3_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:015c:22" },
        { DRA7_L4PER2_MCASP5_CLKCTRL, dra7_mcasp5_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:016c:22" },
-       { DRA7_L4PER2_MCASP8_CLKCTRL, dra7_mcasp8_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:0184:24" },
+       { DRA7_L4PER2_MCASP8_CLKCTRL, dra7_mcasp8_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:0184:22" },
        { DRA7_L4PER2_MCASP4_CLKCTRL, dra7_mcasp4_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:018c:22" },
        { DRA7_L4PER2_UART7_CLKCTRL, dra7_uart7_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:01c4:24" },
        { DRA7_L4PER2_UART8_CLKCTRL, dra7_uart8_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:01d4:24" },
@@ -828,8 +828,8 @@ static struct ti_dt_clk dra7xx_clks[] = {
        DT_CLK(NULL, "mcasp6_aux_gfclk_mux", "l4per2-clkctrl:01f8:22"),
        DT_CLK(NULL, "mcasp7_ahclkx_mux", "l4per2-clkctrl:01fc:24"),
        DT_CLK(NULL, "mcasp7_aux_gfclk_mux", "l4per2-clkctrl:01fc:22"),
-       DT_CLK(NULL, "mcasp8_ahclkx_mux", "l4per2-clkctrl:0184:22"),
-       DT_CLK(NULL, "mcasp8_aux_gfclk_mux", "l4per2-clkctrl:0184:24"),
+       DT_CLK(NULL, "mcasp8_ahclkx_mux", "l4per2-clkctrl:0184:24"),
+       DT_CLK(NULL, "mcasp8_aux_gfclk_mux", "l4per2-clkctrl:0184:22"),
        DT_CLK(NULL, "mmc1_clk32k", "l3init-clkctrl:0008:8"),
        DT_CLK(NULL, "mmc1_fclk_div", "l3init-clkctrl:0008:25"),
        DT_CLK(NULL, "mmc1_fclk_mux", "l3init-clkctrl:0008:24"),
index d8c2bd4..11ff701 100644 (file)
@@ -25,7 +25,9 @@ static __init void timer_of_irq_exit(struct of_timer_irq *of_irq)
 
        struct clock_event_device *clkevt = &to->clkevt;
 
-       of_irq->percpu ? free_percpu_irq(of_irq->irq, clkevt) :
+       if (of_irq->percpu)
+               free_percpu_irq(of_irq->irq, clkevt);
+       else
                free_irq(of_irq->irq, clkevt);
 }
 
index c52d6fa..48a224a 100644 (file)
@@ -720,7 +720,7 @@ static ssize_t store_##file_name                                    \
        if (ret != 1)                                                   \
                return -EINVAL;                                         \
                                                                        \
-       ret = dev_pm_qos_update_request(policy->object##_freq_req, val);\
+       ret = freq_qos_update_request(policy->object##_freq_req, val);\
        return ret >= 0 ? count : ret;                                  \
 }
 
@@ -1202,19 +1202,21 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
                goto err_free_real_cpus;
        }
 
+       freq_constraints_init(&policy->constraints);
+
        policy->nb_min.notifier_call = cpufreq_notifier_min;
        policy->nb_max.notifier_call = cpufreq_notifier_max;
 
-       ret = dev_pm_qos_add_notifier(dev, &policy->nb_min,
-                                     DEV_PM_QOS_MIN_FREQUENCY);
+       ret = freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MIN,
+                                   &policy->nb_min);
        if (ret) {
                dev_err(dev, "Failed to register MIN QoS notifier: %d (%*pbl)\n",
                        ret, cpumask_pr_args(policy->cpus));
                goto err_kobj_remove;
        }
 
-       ret = dev_pm_qos_add_notifier(dev, &policy->nb_max,
-                                     DEV_PM_QOS_MAX_FREQUENCY);
+       ret = freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MAX,
+                                   &policy->nb_max);
        if (ret) {
                dev_err(dev, "Failed to register MAX QoS notifier: %d (%*pbl)\n",
                        ret, cpumask_pr_args(policy->cpus));
@@ -1232,8 +1234,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
        return policy;
 
 err_min_qos_notifier:
-       dev_pm_qos_remove_notifier(dev, &policy->nb_min,
-                                  DEV_PM_QOS_MIN_FREQUENCY);
+       freq_qos_remove_notifier(&policy->constraints, FREQ_QOS_MIN,
+                                &policy->nb_min);
 err_kobj_remove:
        cpufreq_policy_put_kobj(policy);
 err_free_real_cpus:
@@ -1250,7 +1252,6 @@ err_free_policy:
 
 static void cpufreq_policy_free(struct cpufreq_policy *policy)
 {
-       struct device *dev = get_cpu_device(policy->cpu);
        unsigned long flags;
        int cpu;
 
@@ -1262,10 +1263,13 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
                per_cpu(cpufreq_cpu_data, cpu) = NULL;
        write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-       dev_pm_qos_remove_notifier(dev, &policy->nb_max,
-                                  DEV_PM_QOS_MAX_FREQUENCY);
-       dev_pm_qos_remove_notifier(dev, &policy->nb_min,
-                                  DEV_PM_QOS_MIN_FREQUENCY);
+       freq_qos_remove_notifier(&policy->constraints, FREQ_QOS_MAX,
+                                &policy->nb_max);
+       freq_qos_remove_notifier(&policy->constraints, FREQ_QOS_MIN,
+                                &policy->nb_min);
+
+       /* Cancel any pending policy->update work before freeing the policy. */
+       cancel_work_sync(&policy->update);
 
        if (policy->max_freq_req) {
                /*
@@ -1274,10 +1278,10 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
                 */
                blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
                                             CPUFREQ_REMOVE_POLICY, policy);
-               dev_pm_qos_remove_request(policy->max_freq_req);
+               freq_qos_remove_request(policy->max_freq_req);
        }
 
-       dev_pm_qos_remove_request(policy->min_freq_req);
+       freq_qos_remove_request(policy->min_freq_req);
        kfree(policy->min_freq_req);
 
        cpufreq_policy_put_kobj(policy);
@@ -1357,8 +1361,6 @@ static int cpufreq_online(unsigned int cpu)
        cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
 
        if (new_policy) {
-               struct device *dev = get_cpu_device(cpu);
-
                for_each_cpu(j, policy->related_cpus) {
                        per_cpu(cpufreq_cpu_data, j) = policy;
                        add_cpu_dev_symlink(policy, j);
@@ -1369,36 +1371,31 @@ static int cpufreq_online(unsigned int cpu)
                if (!policy->min_freq_req)
                        goto out_destroy_policy;
 
-               ret = dev_pm_qos_add_request(dev, policy->min_freq_req,
-                                            DEV_PM_QOS_MIN_FREQUENCY,
-                                            policy->min);
+               ret = freq_qos_add_request(&policy->constraints,
+                                          policy->min_freq_req, FREQ_QOS_MIN,
+                                          policy->min);
                if (ret < 0) {
                        /*
-                        * So we don't call dev_pm_qos_remove_request() for an
+                        * So we don't call freq_qos_remove_request() for an
                         * uninitialized request.
                         */
                        kfree(policy->min_freq_req);
                        policy->min_freq_req = NULL;
-
-                       dev_err(dev, "Failed to add min-freq constraint (%d)\n",
-                               ret);
                        goto out_destroy_policy;
                }
 
                /*
                 * This must be initialized right here to avoid calling
-                * dev_pm_qos_remove_request() on uninitialized request in case
+                * freq_qos_remove_request() on uninitialized request in case
                 * of errors.
                 */
                policy->max_freq_req = policy->min_freq_req + 1;
 
-               ret = dev_pm_qos_add_request(dev, policy->max_freq_req,
-                                            DEV_PM_QOS_MAX_FREQUENCY,
-                                            policy->max);
+               ret = freq_qos_add_request(&policy->constraints,
+                                          policy->max_freq_req, FREQ_QOS_MAX,
+                                          policy->max);
                if (ret < 0) {
                        policy->max_freq_req = NULL;
-                       dev_err(dev, "Failed to add max-freq constraint (%d)\n",
-                               ret);
                        goto out_destroy_policy;
                }
 
@@ -2374,7 +2371,6 @@ int cpufreq_set_policy(struct cpufreq_policy *policy,
                       struct cpufreq_policy *new_policy)
 {
        struct cpufreq_governor *old_gov;
-       struct device *cpu_dev = get_cpu_device(policy->cpu);
        int ret;
 
        pr_debug("setting new policy for CPU %u: %u - %u kHz\n",
@@ -2386,8 +2382,8 @@ int cpufreq_set_policy(struct cpufreq_policy *policy,
         * PM QoS framework collects all the requests from users and provide us
         * the final aggregated value here.
         */
-       new_policy->min = dev_pm_qos_read_value(cpu_dev, DEV_PM_QOS_MIN_FREQUENCY);
-       new_policy->max = dev_pm_qos_read_value(cpu_dev, DEV_PM_QOS_MAX_FREQUENCY);
+       new_policy->min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN);
+       new_policy->max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX);
 
        /* verify the cpu speed can be set within this limit */
        ret = cpufreq_driver->verify(new_policy);
@@ -2518,7 +2514,7 @@ static int cpufreq_boost_set_sw(int state)
                        break;
                }
 
-               ret = dev_pm_qos_update_request(policy->max_freq_req, policy->max);
+               ret = freq_qos_update_request(policy->max_freq_req, policy->max);
                if (ret < 0)
                        break;
        }
@@ -2737,14 +2733,6 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
 }
 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
 
-/*
- * Stop cpufreq at shutdown to make sure it isn't holding any locks
- * or mutexes when secondary CPUs are halted.
- */
-static struct syscore_ops cpufreq_syscore_ops = {
-       .shutdown = cpufreq_suspend,
-};
-
 struct kobject *cpufreq_global_kobject;
 EXPORT_SYMBOL(cpufreq_global_kobject);
 
@@ -2756,8 +2744,6 @@ static int __init cpufreq_core_init(void)
        cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
        BUG_ON(!cpufreq_global_kobject);
 
-       register_syscore_ops(&cpufreq_syscore_ops);
-
        return 0;
 }
 module_param(off, int, 0444);
index 9f02de9..53a51c1 100644 (file)
@@ -1088,10 +1088,10 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
 
 static struct cpufreq_driver intel_pstate;
 
-static void update_qos_request(enum dev_pm_qos_req_type type)
+static void update_qos_request(enum freq_qos_req_type type)
 {
        int max_state, turbo_max, freq, i, perf_pct;
-       struct dev_pm_qos_request *req;
+       struct freq_qos_request *req;
        struct cpufreq_policy *policy;
 
        for_each_possible_cpu(i) {
@@ -1112,7 +1112,7 @@ static void update_qos_request(enum dev_pm_qos_req_type type)
                else
                        turbo_max = cpu->pstate.turbo_pstate;
 
-               if (type == DEV_PM_QOS_MIN_FREQUENCY) {
+               if (type == FREQ_QOS_MIN) {
                        perf_pct = global.min_perf_pct;
                } else {
                        req++;
@@ -1122,7 +1122,7 @@ static void update_qos_request(enum dev_pm_qos_req_type type)
                freq = DIV_ROUND_UP(turbo_max * perf_pct, 100);
                freq *= cpu->pstate.scaling;
 
-               if (dev_pm_qos_update_request(req, freq) < 0)
+               if (freq_qos_update_request(req, freq) < 0)
                        pr_warn("Failed to update freq constraint: CPU%d\n", i);
        }
 }
@@ -1153,7 +1153,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b,
        if (intel_pstate_driver == &intel_pstate)
                intel_pstate_update_policies();
        else
-               update_qos_request(DEV_PM_QOS_MAX_FREQUENCY);
+               update_qos_request(FREQ_QOS_MAX);
 
        mutex_unlock(&intel_pstate_driver_lock);
 
@@ -1187,7 +1187,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b,
        if (intel_pstate_driver == &intel_pstate)
                intel_pstate_update_policies();
        else
-               update_qos_request(DEV_PM_QOS_MIN_FREQUENCY);
+               update_qos_request(FREQ_QOS_MIN);
 
        mutex_unlock(&intel_pstate_driver_lock);
 
@@ -2381,7 +2381,7 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
 static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
        int max_state, turbo_max, min_freq, max_freq, ret;
-       struct dev_pm_qos_request *req;
+       struct freq_qos_request *req;
        struct cpudata *cpu;
        struct device *dev;
 
@@ -2416,15 +2416,15 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
        max_freq = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100);
        max_freq *= cpu->pstate.scaling;
 
-       ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_MIN_FREQUENCY,
-                                    min_freq);
+       ret = freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MIN,
+                                  min_freq);
        if (ret < 0) {
                dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
                goto free_req;
        }
 
-       ret = dev_pm_qos_add_request(dev, req + 1, DEV_PM_QOS_MAX_FREQUENCY,
-                                    max_freq);
+       ret = freq_qos_add_request(&policy->constraints, req + 1, FREQ_QOS_MAX,
+                                  max_freq);
        if (ret < 0) {
                dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
                goto remove_min_req;
@@ -2435,7 +2435,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
        return 0;
 
 remove_min_req:
-       dev_pm_qos_remove_request(req);
+       freq_qos_remove_request(req);
 free_req:
        kfree(req);
 pstate_exit:
@@ -2446,12 +2446,12 @@ pstate_exit:
 
 static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 {
-       struct dev_pm_qos_request *req;
+       struct freq_qos_request *req;
 
        req = policy->driver_data;
 
-       dev_pm_qos_remove_request(req + 1);
-       dev_pm_qos_remove_request(req);
+       freq_qos_remove_request(req + 1);
+       freq_qos_remove_request(req);
        kfree(req);
 
        return intel_pstate_cpu_exit(policy);
index bc9dd30..037fe23 100644 (file)
@@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi);
 static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg)
 {
        struct cpufreq_policy *policy;
-       struct dev_pm_qos_request *req;
+       struct freq_qos_request *req;
        u8 node, slow_mode;
        int cpu, ret;
 
@@ -86,7 +86,7 @@ static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg)
 
        req = policy->driver_data;
 
-       ret = dev_pm_qos_update_request(req,
+       ret = freq_qos_update_request(req,
                        policy->freq_table[slow_mode].frequency);
        if (ret < 0)
                pr_warn("Failed to update freq constraint: %d\n", ret);
@@ -103,7 +103,7 @@ static struct pmi_handler cbe_pmi_handler = {
 
 void cbe_cpufreq_pmi_policy_init(struct cpufreq_policy *policy)
 {
-       struct dev_pm_qos_request *req;
+       struct freq_qos_request *req;
        int ret;
 
        if (!cbe_cpufreq_has_pmi)
@@ -113,9 +113,8 @@ void cbe_cpufreq_pmi_policy_init(struct cpufreq_policy *policy)
        if (!req)
                return;
 
-       ret = dev_pm_qos_add_request(get_cpu_device(policy->cpu), req,
-                                    DEV_PM_QOS_MAX_FREQUENCY,
-                                    policy->freq_table[0].frequency);
+       ret = freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MAX,
+                                  policy->freq_table[0].frequency);
        if (ret < 0) {
                pr_err("Failed to add freq constraint (%d)\n", ret);
                kfree(req);
@@ -128,10 +127,10 @@ EXPORT_SYMBOL_GPL(cbe_cpufreq_pmi_policy_init);
 
 void cbe_cpufreq_pmi_policy_exit(struct cpufreq_policy *policy)
 {
-       struct dev_pm_qos_request *req = policy->driver_data;
+       struct freq_qos_request *req = policy->driver_data;
 
        if (cbe_cpufreq_has_pmi) {
-               dev_pm_qos_remove_request(req);
+               freq_qos_remove_request(req);
                kfree(req);
        }
 }
index 932390b..b0ce9bc 100644 (file)
@@ -95,6 +95,10 @@ static int __init haltpoll_init(void)
        int ret;
        struct cpuidle_driver *drv = &haltpoll_driver;
 
+       /* Do not load haltpoll if idle= is passed */
+       if (boot_option_idle_override != IDLE_NO_OVERRIDE)
+               return -ENODEV;
+
        cpuidle_poll_state_init(drv);
 
        if (!kvm_para_available() ||
index 774d991..aca7523 100644 (file)
@@ -1297,7 +1297,7 @@ static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt)
        tp->write_seq = snd_isn;
        tp->snd_nxt = snd_isn;
        tp->snd_una = snd_isn;
-       inet_sk(sk)->inet_id = tp->write_seq ^ jiffies;
+       inet_sk(sk)->inet_id = prandom_u32();
        assign_rxopt(sk, opt);
 
        if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10))
index c70cb5f..98bc5a4 100644 (file)
@@ -1078,7 +1078,7 @@ new_buf:
                        bool merge;
 
                        if (page)
-                               pg_size <<= compound_order(page);
+                               pg_size = page_size(page);
                        if (off < pg_size &&
                            skb_can_coalesce(skb, i, page, off)) {
                                merge = 1;
@@ -1105,8 +1105,7 @@ new_buf:
                                                           __GFP_NORETRY,
                                                           order);
                                        if (page)
-                                               pg_size <<=
-                                                       compound_order(page);
+                                               pg_size <<= order;
                                }
                                if (!page) {
                                        page = alloc_page(gfp);
@@ -1703,7 +1702,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
                return peekmsg(sk, msg, len, nonblock, flags);
 
        if (sk_can_busy_loop(sk) &&
-           skb_queue_empty(&sk->sk_receive_queue) &&
+           skb_queue_empty_lockless(&sk->sk_receive_queue) &&
            sk->sk_state == TCP_ESTABLISHED)
                sk_busy_loop(sk, nonblock);
 
index 42a8f3f..7090025 100644 (file)
@@ -471,7 +471,7 @@ unlock:
        if (pfence_excl)
                *pfence_excl = fence_excl;
        else if (fence_excl)
-               shared[++shared_count] = fence_excl;
+               shared[shared_count++] = fence_excl;
 
        if (!shared_count) {
                kfree(shared);
index 9ba74ab..c27e206 100644 (file)
@@ -1707,6 +1707,14 @@ static void sdma_add_scripts(struct sdma_engine *sdma,
        if (!sdma->script_number)
                sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1;
 
+       if (sdma->script_number > sizeof(struct sdma_script_start_addrs)
+                                 / sizeof(s32)) {
+               dev_err(sdma->dev,
+                       "SDMA script number %d not match with firmware.\n",
+                       sdma->script_number);
+               return;
+       }
+
        for (i = 0; i < sdma->script_number; i++)
                if (addr_arr[i] > 0)
                        saddr_arr[i] = addr_arr[i];
index 8e90a40..ef73f65 100644 (file)
@@ -694,6 +694,25 @@ static int bam_dma_terminate_all(struct dma_chan *chan)
 
        /* remove all transactions, including active transaction */
        spin_lock_irqsave(&bchan->vc.lock, flag);
+       /*
+        * If we have transactions queued, then some might be committed to the
+        * hardware in the desc fifo.  The only way to reset the desc fifo is
+        * to do a hardware reset (either by pipe or the entire block).
+        * bam_chan_init_hw() will trigger a pipe reset, and also reinit the
+        * pipe.  If the pipe is left disabled (default state after pipe reset)
+        * and is accessed by a connected hardware engine, a fatal error in
+        * the BAM will occur.  There is a small window where this could happen
+        * with bam_chan_init_hw(), but it is assumed that the caller has
+        * stopped activity on any attached hardware engine.  Make sure to do
+        * this first so that the BAM hardware doesn't cause memory corruption
+        * by accessing freed resources.
+        */
+       if (!list_empty(&bchan->desc_list)) {
+               async_desc = list_first_entry(&bchan->desc_list,
+                                             struct bam_async_desc, desc_node);
+               bam_chan_init_hw(bchan, async_desc->dir);
+       }
+
        list_for_each_entry_safe(async_desc, tmp,
                                 &bchan->desc_list, desc_node) {
                list_add(&async_desc->vd.node, &bchan->vc.desc_issued);
index 525dc73..8546ad0 100644 (file)
 #define SPRD_DMA_SRC_TRSF_STEP_OFFSET  0
 #define SPRD_DMA_TRSF_STEP_MASK                GENMASK(15, 0)
 
+/* SPRD DMA_SRC_BLK_STEP register definition */
+#define SPRD_DMA_LLIST_HIGH_MASK       GENMASK(31, 28)
+#define SPRD_DMA_LLIST_HIGH_SHIFT      28
+
 /* define DMA channel mode & trigger mode mask */
 #define SPRD_DMA_CHN_MODE_MASK         GENMASK(7, 0)
 #define SPRD_DMA_TRG_MODE_MASK         GENMASK(7, 0)
@@ -208,6 +212,7 @@ struct sprd_dma_dev {
        struct sprd_dma_chn     channels[0];
 };
 
+static void sprd_dma_free_desc(struct virt_dma_desc *vd);
 static bool sprd_dma_filter_fn(struct dma_chan *chan, void *param);
 static struct of_dma_filter_info sprd_dma_info = {
        .filter_fn = sprd_dma_filter_fn,
@@ -609,12 +614,19 @@ static int sprd_dma_alloc_chan_resources(struct dma_chan *chan)
 static void sprd_dma_free_chan_resources(struct dma_chan *chan)
 {
        struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+       struct virt_dma_desc *cur_vd = NULL;
        unsigned long flags;
 
        spin_lock_irqsave(&schan->vc.lock, flags);
+       if (schan->cur_desc)
+               cur_vd = &schan->cur_desc->vd;
+
        sprd_dma_stop(schan);
        spin_unlock_irqrestore(&schan->vc.lock, flags);
 
+       if (cur_vd)
+               sprd_dma_free_desc(cur_vd);
+
        vchan_free_chan_resources(&schan->vc);
        pm_runtime_put(chan->device->dev);
 }
@@ -717,6 +729,7 @@ static int sprd_dma_fill_desc(struct dma_chan *chan,
        u32 int_mode = flags & SPRD_DMA_INT_MASK;
        int src_datawidth, dst_datawidth, src_step, dst_step;
        u32 temp, fix_mode = 0, fix_en = 0;
+       phys_addr_t llist_ptr;
 
        if (dir == DMA_MEM_TO_DEV) {
                src_step = sprd_dma_get_step(slave_cfg->src_addr_width);
@@ -814,13 +827,16 @@ static int sprd_dma_fill_desc(struct dma_chan *chan,
                 * Set the link-list pointer point to next link-list
                 * configuration's physical address.
                 */
-               hw->llist_ptr = schan->linklist.phy_addr + temp;
+               llist_ptr = schan->linklist.phy_addr + temp;
+               hw->llist_ptr = lower_32_bits(llist_ptr);
+               hw->src_blk_step = (upper_32_bits(llist_ptr) << SPRD_DMA_LLIST_HIGH_SHIFT) &
+                       SPRD_DMA_LLIST_HIGH_MASK;
        } else {
                hw->llist_ptr = 0;
+               hw->src_blk_step = 0;
        }
 
        hw->frg_step = 0;
-       hw->src_blk_step = 0;
        hw->des_blk_step = 0;
        return 0;
 }
@@ -1023,15 +1039,22 @@ static int sprd_dma_resume(struct dma_chan *chan)
 static int sprd_dma_terminate_all(struct dma_chan *chan)
 {
        struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+       struct virt_dma_desc *cur_vd = NULL;
        unsigned long flags;
        LIST_HEAD(head);
 
        spin_lock_irqsave(&schan->vc.lock, flags);
+       if (schan->cur_desc)
+               cur_vd = &schan->cur_desc->vd;
+
        sprd_dma_stop(schan);
 
        vchan_get_all_descriptors(&schan->vc, &head);
        spin_unlock_irqrestore(&schan->vc.lock, flags);
 
+       if (cur_vd)
+               sprd_dma_free_desc(cur_vd);
+
        vchan_dma_desc_free_list(&schan->vc, &head);
        return 0;
 }
index 5f8adf5..6e12685 100644 (file)
@@ -40,6 +40,7 @@
 #define ADMA_CH_CONFIG_MAX_BURST_SIZE                   16
 #define ADMA_CH_CONFIG_WEIGHT_FOR_WRR(val)             ((val) & 0xf)
 #define ADMA_CH_CONFIG_MAX_BUFS                                8
+#define TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(reqs) (reqs << 4)
 
 #define ADMA_CH_FIFO_CTRL                              0x2c
 #define TEGRA210_ADMA_CH_FIFO_CTRL_TXSIZE(val)         (((val) & 0xf) << 8)
@@ -77,6 +78,7 @@ struct tegra_adma;
  * @ch_req_tx_shift: Register offset for AHUB transmit channel select.
  * @ch_req_rx_shift: Register offset for AHUB receive channel select.
  * @ch_base_offset: Register offset of DMA channel registers.
+ * @has_outstanding_reqs: If DMA channel can have outstanding requests.
  * @ch_fifo_ctrl: Default value for channel FIFO CTRL register.
  * @ch_req_mask: Mask for Tx or Rx channel select.
  * @ch_req_max: Maximum number of Tx or Rx channels available.
@@ -95,6 +97,7 @@ struct tegra_adma_chip_data {
        unsigned int ch_req_max;
        unsigned int ch_reg_size;
        unsigned int nr_channels;
+       bool has_outstanding_reqs;
 };
 
 /*
@@ -594,6 +597,8 @@ static int tegra_adma_set_xfer_params(struct tegra_adma_chan *tdc,
                         ADMA_CH_CTRL_FLOWCTRL_EN;
        ch_regs->config |= cdata->adma_get_burst_config(burst_size);
        ch_regs->config |= ADMA_CH_CONFIG_WEIGHT_FOR_WRR(1);
+       if (cdata->has_outstanding_reqs)
+               ch_regs->config |= TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(8);
        ch_regs->fifo_ctrl = cdata->ch_fifo_ctrl;
        ch_regs->tc = desc->period_len & ADMA_CH_TC_COUNT_MASK;
 
@@ -778,6 +783,7 @@ static const struct tegra_adma_chip_data tegra210_chip_data = {
        .ch_req_tx_shift        = 28,
        .ch_req_rx_shift        = 24,
        .ch_base_offset         = 0,
+       .has_outstanding_reqs   = false,
        .ch_fifo_ctrl           = TEGRA210_FIFO_CTRL_DEFAULT,
        .ch_req_mask            = 0xf,
        .ch_req_max             = 10,
@@ -792,6 +798,7 @@ static const struct tegra_adma_chip_data tegra186_chip_data = {
        .ch_req_tx_shift        = 27,
        .ch_req_rx_shift        = 22,
        .ch_base_offset         = 0x10000,
+       .has_outstanding_reqs   = true,
        .ch_fifo_ctrl           = TEGRA186_FIFO_CTRL_DEFAULT,
        .ch_req_mask            = 0x1f,
        .ch_req_max             = 20,
index 2f946f5..8c2f7eb 100644 (file)
@@ -586,9 +586,22 @@ static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg(
        enum dma_transfer_direction dir, unsigned long tx_flags, void *context)
 {
        struct cppi41_channel *c = to_cpp41_chan(chan);
+       struct dma_async_tx_descriptor *txd = NULL;
+       struct cppi41_dd *cdd = c->cdd;
        struct cppi41_desc *d;
        struct scatterlist *sg;
        unsigned int i;
+       int error;
+
+       error = pm_runtime_get(cdd->ddev.dev);
+       if (error < 0) {
+               pm_runtime_put_noidle(cdd->ddev.dev);
+
+               return NULL;
+       }
+
+       if (cdd->is_suspended)
+               goto err_out_not_ready;
 
        d = c->desc;
        for_each_sg(sgl, sg, sg_len, i) {
@@ -611,7 +624,13 @@ static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg(
                d++;
        }
 
-       return &c->txd;
+       txd = &c->txd;
+
+err_out_not_ready:
+       pm_runtime_mark_last_busy(cdd->ddev.dev);
+       pm_runtime_put_autosuspend(cdd->ddev.dev);
+
+       return txd;
 }
 
 static void cppi41_compute_td_desc(struct cppi41_desc *d)
index e7dc3c4..5d56f1e 100644 (file)
@@ -68,6 +68,9 @@
 #define XILINX_DMA_DMACR_CIRC_EN               BIT(1)
 #define XILINX_DMA_DMACR_RUNSTOP               BIT(0)
 #define XILINX_DMA_DMACR_FSYNCSRC_MASK         GENMASK(6, 5)
+#define XILINX_DMA_DMACR_DELAY_MASK            GENMASK(31, 24)
+#define XILINX_DMA_DMACR_FRAME_COUNT_MASK      GENMASK(23, 16)
+#define XILINX_DMA_DMACR_MASTER_MASK           GENMASK(11, 8)
 
 #define XILINX_DMA_REG_DMASR                   0x0004
 #define XILINX_DMA_DMASR_EOL_LATE_ERR          BIT(15)
@@ -1354,7 +1357,8 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan)
                                           node);
                hw = &segment->hw;
 
-               xilinx_write(chan, XILINX_DMA_REG_SRCDSTADDR, hw->buf_addr);
+               xilinx_write(chan, XILINX_DMA_REG_SRCDSTADDR,
+                            xilinx_prep_dma_addr_t(hw->buf_addr));
 
                /* Start the transfer */
                dma_ctrl_write(chan, XILINX_DMA_REG_BTT,
@@ -2117,8 +2121,10 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
        chan->config.gen_lock = cfg->gen_lock;
        chan->config.master = cfg->master;
 
+       dmacr &= ~XILINX_DMA_DMACR_GENLOCK_EN;
        if (cfg->gen_lock && chan->genlock) {
                dmacr |= XILINX_DMA_DMACR_GENLOCK_EN;
+               dmacr &= ~XILINX_DMA_DMACR_MASTER_MASK;
                dmacr |= cfg->master << XILINX_DMA_DMACR_MASTER_SHIFT;
        }
 
@@ -2134,11 +2140,13 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
        chan->config.delay = cfg->delay;
 
        if (cfg->coalesc <= XILINX_DMA_DMACR_FRAME_COUNT_MAX) {
+               dmacr &= ~XILINX_DMA_DMACR_FRAME_COUNT_MASK;
                dmacr |= cfg->coalesc << XILINX_DMA_DMACR_FRAME_COUNT_SHIFT;
                chan->config.coalesc = cfg->coalesc;
        }
 
        if (cfg->delay <= XILINX_DMA_DMACR_DELAY_MAX) {
+               dmacr &= ~XILINX_DMA_DMACR_DELAY_MASK;
                dmacr |= cfg->delay << XILINX_DMA_DMACR_DELAY_SHIFT;
                chan->config.delay = cfg->delay;
        }
index d413a0b..0bb6285 100644 (file)
@@ -553,7 +553,11 @@ void ghes_edac_unregister(struct ghes *ghes)
        if (!ghes_pvt)
                return;
 
+       if (atomic_dec_return(&ghes_init))
+               return;
+
        mci = ghes_pvt->mci;
+       ghes_pvt = NULL;
        edac_mc_del_mc(mci->pdev);
        edac_mc_free(mci);
 }
index 64cc819..ab42c21 100644 (file)
@@ -150,7 +150,7 @@ static int scmi_domain_reset(const struct scmi_handle *handle, u32 domain,
        dom = t->tx.buf;
        dom->domain_id = cpu_to_le32(domain);
        dom->flags = cpu_to_le32(flags);
-       dom->domain_id = cpu_to_le32(state);
+       dom->reset_state = cpu_to_le32(state);
 
        if (rdom->async_reset)
                ret = scmi_do_xfer_with_response(handle, t);
index 35ed56b..1e21fc3 100644 (file)
@@ -408,7 +408,7 @@ static void __init save_mem_devices(const struct dmi_header *dm, void *v)
                bytes = ~0ull;
        else if (size & 0x8000)
                bytes = (u64)(size & 0x7fff) << 10;
-       else if (size != 0x7fff)
+       else if (size != 0x7fff || dm->length < 0x20)
                bytes = (u64)size << 20;
        else
                bytes = (u64)get_unaligned((u32 *)&d[0x1C]) << 20;
index 178ee81..b248870 100644 (file)
@@ -182,6 +182,7 @@ config RESET_ATTACK_MITIGATION
 
 config EFI_RCI2_TABLE
        bool "EFI Runtime Configuration Interface Table Version 2 Support"
+       depends on X86 || COMPILE_TEST
        help
          Displays the content of the Runtime Configuration Interface
          Table version 2 on Dell EMC PowerEdge systems as a binary
index addf074..b1af0de 100644 (file)
@@ -381,7 +381,7 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
                printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
                       pcie->device_id.vendor_id, pcie->device_id.device_id);
                p = pcie->device_id.class_code;
-               printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
+               printk("%s""class_code: %02x%02x%02x\n", pfx, p[2], p[1], p[0]);
        }
        if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
                printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
index 8f1ab04..e98bbf8 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/acpi.h>
 #include <linux/ucs2_string.h>
 #include <linux/memblock.h>
+#include <linux/security.h>
 
 #include <asm/early_ioremap.h>
 
@@ -221,6 +222,11 @@ static void generic_ops_unregister(void)
 static char efivar_ssdt[EFIVAR_SSDT_NAME_MAX] __initdata;
 static int __init efivar_ssdt_setup(char *str)
 {
+       int ret = security_locked_down(LOCKDOWN_ACPI_TABLES);
+
+       if (ret)
+               return ret;
+
        if (strlen(str) < sizeof(efivar_ssdt))
                memcpy(efivar_ssdt, str, strlen(str));
        else
@@ -261,6 +267,9 @@ static __init int efivar_ssdt_load(void)
        void *data;
        int ret;
 
+       if (!efivar_ssdt[0])
+               return 0;
+
        ret = efivar_init(efivar_ssdt_iter, &entries, true, &entries);
 
        list_for_each_entry_safe(entry, aux, &entries, list) {
@@ -545,7 +554,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz,
                                              sizeof(*seed) + size);
                        if (seed != NULL) {
                                pr_notice("seeding entropy pool\n");
-                               add_device_randomness(seed->bits, seed->size);
+                               add_bootloader_randomness(seed->bits, seed->size);
                                early_memunmap(seed, sizeof(*seed) + size);
                        } else {
                                pr_err("Could not map UEFI random seed!\n");
index 0460c75..ee0661d 100644 (file)
@@ -52,6 +52,7 @@ lib-$(CONFIG_EFI_ARMSTUB)     += arm-stub.o fdt.o string.o random.o \
 
 lib-$(CONFIG_ARM)              += arm32-stub.o
 lib-$(CONFIG_ARM64)            += arm64-stub.o
+CFLAGS_arm32-stub.o            := -DTEXT_OFFSET=$(TEXT_OFFSET)
 CFLAGS_arm64-stub.o            := -DTEXT_OFFSET=$(TEXT_OFFSET)
 
 #
index e8f7aef..41213bf 100644 (file)
@@ -195,6 +195,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
                                 unsigned long dram_base,
                                 efi_loaded_image_t *image)
 {
+       unsigned long kernel_base;
        efi_status_t status;
 
        /*
@@ -204,9 +205,18 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
         * loaded. These assumptions are made by the decompressor,
         * before any memory map is available.
         */
-       dram_base = round_up(dram_base, SZ_128M);
+       kernel_base = round_up(dram_base, SZ_128M);
 
-       status = reserve_kernel_base(sys_table, dram_base, reserve_addr,
+       /*
+        * Note that some platforms (notably, the Raspberry Pi 2) put
+        * spin-tables and other pieces of firmware at the base of RAM,
+        * abusing the fact that the window of TEXT_OFFSET bytes at the
+        * base of the kernel image is only partially used at the moment.
+        * (Up to 5 pages are used for the swapper page tables)
+        */
+       kernel_base += TEXT_OFFSET - 5 * PAGE_SIZE;
+
+       status = reserve_kernel_base(sys_table, kernel_base, reserve_addr,
                                     reserve_size);
        if (status != EFI_SUCCESS) {
                pr_efi_err(sys_table, "Unable to allocate memory for uncompressed kernel.\n");
@@ -220,7 +230,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
        *image_size = image->image_size;
        status = efi_relocate_kernel(sys_table, image_addr, *image_size,
                                     *image_size,
-                                    dram_base + MAX_UNCOMP_KERNEL_SIZE, 0);
+                                    kernel_base + MAX_UNCOMP_KERNEL_SIZE, 0, 0);
        if (status != EFI_SUCCESS) {
                pr_efi_err(sys_table, "Failed to relocate kernel.\n");
                efi_free(sys_table, *reserve_size, *reserve_addr);
index 3caae7f..35dbc27 100644 (file)
@@ -260,11 +260,11 @@ fail:
 }
 
 /*
- * Allocate at the lowest possible address.
+ * Allocate at the lowest possible address that is not below 'min'.
  */
-efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
-                          unsigned long size, unsigned long align,
-                          unsigned long *addr)
+efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg,
+                                unsigned long size, unsigned long align,
+                                unsigned long *addr, unsigned long min)
 {
        unsigned long map_size, desc_size, buff_size;
        efi_memory_desc_t *map;
@@ -311,13 +311,8 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
                start = desc->phys_addr;
                end = start + desc->num_pages * EFI_PAGE_SIZE;
 
-               /*
-                * Don't allocate at 0x0. It will confuse code that
-                * checks pointers against NULL. Skip the first 8
-                * bytes so we start at a nice even number.
-                */
-               if (start == 0x0)
-                       start += 8;
+               if (start < min)
+                       start = min;
 
                start = round_up(start, align);
                if ((start + size) > end)
@@ -698,7 +693,8 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
                                 unsigned long image_size,
                                 unsigned long alloc_size,
                                 unsigned long preferred_addr,
-                                unsigned long alignment)
+                                unsigned long alignment,
+                                unsigned long min_addr)
 {
        unsigned long cur_image_addr;
        unsigned long new_addr = 0;
@@ -731,8 +727,8 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
         * possible.
         */
        if (status != EFI_SUCCESS) {
-               status = efi_low_alloc(sys_table_arg, alloc_size, alignment,
-                                      &new_addr);
+               status = efi_low_alloc_above(sys_table_arg, alloc_size,
+                                            alignment, &new_addr, min_addr);
        }
        if (status != EFI_SUCCESS) {
                pr_efi_err(sys_table_arg, "Failed to allocate usable memory for kernel.\n");
index 3e290f9..76b0c35 100644 (file)
@@ -76,7 +76,7 @@ static u16 checksum(void)
        return chksum;
 }
 
-int __init efi_rci2_sysfs_init(void)
+static int __init efi_rci2_sysfs_init(void)
 {
        struct kobject *tables_kobj;
        int ret = -ENOMEM;
index 877745c..7baf48c 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/efi.h>
+#include <linux/security.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
@@ -717,6 +718,13 @@ static long efi_test_ioctl(struct file *file, unsigned int cmd,
 
 static int efi_test_open(struct inode *inode, struct file *file)
 {
+       int ret = security_locked_down(LOCKDOWN_EFI_TEST);
+
+       if (ret)
+               return ret;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
        /*
         * nothing special to do here
         * We do accept multiple open files at the same time as we
index 1d3f5ca..31f9f0e 100644 (file)
@@ -40,7 +40,7 @@ int __init efi_tpm_eventlog_init(void)
 {
        struct linux_efi_tpm_eventlog *log_tbl;
        struct efi_tcg2_final_events_table *final_tbl;
-       unsigned int tbl_size;
+       int tbl_size;
        int ret = 0;
 
        if (efi.tpm_log == EFI_INVALID_TABLE_ADDR) {
@@ -75,16 +75,29 @@ int __init efi_tpm_eventlog_init(void)
                goto out;
        }
 
-       tbl_size = tpm2_calc_event_log_size((void *)efi.tpm_final_log
-                                           + sizeof(final_tbl->version)
-                                           + sizeof(final_tbl->nr_events),
-                                           final_tbl->nr_events,
-                                           log_tbl->log);
+       tbl_size = 0;
+       if (final_tbl->nr_events != 0) {
+               void *events = (void *)efi.tpm_final_log
+                               + sizeof(final_tbl->version)
+                               + sizeof(final_tbl->nr_events);
+
+               tbl_size = tpm2_calc_event_log_size(events,
+                                                   final_tbl->nr_events,
+                                                   log_tbl->log);
+       }
+
+       if (tbl_size < 0) {
+               pr_err(FW_BUG "Failed to parse event in TPM Final Events Log\n");
+               ret = -EINVAL;
+               goto out_calc;
+       }
+
        memblock_reserve((unsigned long)final_tbl,
                         tbl_size + sizeof(*final_tbl));
-       early_memunmap(final_tbl, sizeof(*final_tbl));
        efi_tpm_final_log_size = tbl_size;
 
+out_calc:
+       early_memunmap(final_tbl, sizeof(*final_tbl));
 out:
        early_memunmap(log_tbl, sizeof(*log_tbl));
        return ret;
index dda525c..5c6f2a7 100644 (file)
@@ -52,7 +52,7 @@ static int vpd_decode_entry(const u32 max_len, const u8 *input_buf,
        if (max_len - consumed < *entry_len)
                return VPD_FAIL;
 
-       consumed += decoded_len;
+       consumed += *entry_len;
        *_consumed = consumed;
        return VPD_OK;
 }
index fe7a73f..bb287f3 100644 (file)
@@ -530,11 +530,12 @@ static void sprd_eic_handle_one_type(struct gpio_chip *chip)
                }
 
                for_each_set_bit(n, &reg, SPRD_EIC_PER_BANK_NR) {
-                       girq = irq_find_mapping(chip->irq.domain,
-                                       bank * SPRD_EIC_PER_BANK_NR + n);
+                       u32 offset = bank * SPRD_EIC_PER_BANK_NR + n;
+
+                       girq = irq_find_mapping(chip->irq.domain, offset);
 
                        generic_handle_irq(girq);
-                       sprd_eic_toggle_trigger(chip, girq, n);
+                       sprd_eic_toggle_trigger(chip, girq, offset);
                }
        }
 }
index 4d835f9..86a10c8 100644 (file)
@@ -293,8 +293,9 @@ static void intel_mid_irq_handler(struct irq_desc *desc)
        chip->irq_eoi(data);
 }
 
-static void intel_mid_irq_init_hw(struct intel_mid_gpio *priv)
+static int intel_mid_irq_init_hw(struct gpio_chip *chip)
 {
+       struct intel_mid_gpio *priv = gpiochip_get_data(chip);
        void __iomem *reg;
        unsigned base;
 
@@ -309,6 +310,8 @@ static void intel_mid_irq_init_hw(struct intel_mid_gpio *priv)
                reg = gpio_reg(&priv->chip, base, GEDR);
                writel(~0, reg);
        }
+
+       return 0;
 }
 
 static int __maybe_unused intel_gpio_runtime_idle(struct device *dev)
@@ -372,6 +375,7 @@ static int intel_gpio_probe(struct pci_dev *pdev,
 
        girq = &priv->chip.irq;
        girq->chip = &intel_mid_irqchip;
+       girq->init_hw = intel_mid_irq_init_hw;
        girq->parent_handler = intel_mid_irq_handler;
        girq->num_parents = 1;
        girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents,
@@ -384,9 +388,8 @@ static int intel_gpio_probe(struct pci_dev *pdev,
        girq->default_type = IRQ_TYPE_NONE;
        girq->handler = handle_simple_irq;
 
-       intel_mid_irq_init_hw(priv);
-
        pci_set_drvdata(pdev, priv);
+
        retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv);
        if (retval) {
                dev_err(&pdev->dev, "gpiochip_add error %d\n", retval);
index 6bb9741..e9e47c0 100644 (file)
@@ -294,8 +294,9 @@ static struct irq_chip lp_irqchip = {
        .flags = IRQCHIP_SKIP_SET_WAKE,
 };
 
-static void lp_gpio_irq_init_hw(struct lp_gpio *lg)
+static int lp_gpio_irq_init_hw(struct gpio_chip *chip)
 {
+       struct lp_gpio *lg = gpiochip_get_data(chip);
        unsigned long reg;
        unsigned base;
 
@@ -307,6 +308,8 @@ static void lp_gpio_irq_init_hw(struct lp_gpio *lg)
                reg = lp_gpio_reg(&lg->chip, base, LP_INT_STAT);
                outl(0xffffffff, reg);
        }
+
+       return 0;
 }
 
 static int lp_gpio_probe(struct platform_device *pdev)
@@ -364,6 +367,7 @@ static int lp_gpio_probe(struct platform_device *pdev)
 
                girq = &gc->irq;
                girq->chip = &lp_irqchip;
+               girq->init_hw = lp_gpio_irq_init_hw;
                girq->parent_handler = lp_gpio_irq_handler;
                girq->num_parents = 1;
                girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents,
@@ -373,9 +377,7 @@ static int lp_gpio_probe(struct platform_device *pdev)
                        return -ENOMEM;
                girq->parents[0] = (unsigned)irq_rc->start;
                girq->default_type = IRQ_TYPE_NONE;
-               girq->handler = handle_simple_irq;
-
-               lp_gpio_irq_init_hw(lg);
+               girq->handler = handle_bad_irq;
        }
 
        ret = devm_gpiochip_add_data(dev, gc, lg);
index 47d05e3..faf86ea 100644 (file)
@@ -192,13 +192,13 @@ static int max77620_gpio_set_debounce(struct max77620_gpio *mgpio,
        case 0:
                val = MAX77620_CNFG_GPIO_DBNC_None;
                break;
-       case 1 ... 8:
+       case 1000 ... 8000:
                val = MAX77620_CNFG_GPIO_DBNC_8ms;
                break;
-       case 9 ... 16:
+       case 9000 ... 16000:
                val = MAX77620_CNFG_GPIO_DBNC_16ms;
                break;
-       case 17 ... 32:
+       case 17000 ... 32000:
                val = MAX77620_CNFG_GPIO_DBNC_32ms;
                break;
        default:
index 4f27ddf..3302125 100644 (file)
@@ -397,7 +397,6 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id
 {
        const struct mrfld_gpio_pinrange *range;
        const char *pinctrl_dev_name;
-       struct gpio_irq_chip *girq;
        struct mrfld_gpio *priv;
        u32 gpio_base, irq_base;
        void __iomem *base;
@@ -445,21 +444,6 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id
 
        raw_spin_lock_init(&priv->lock);
 
-       girq = &priv->chip.irq;
-       girq->chip = &mrfld_irqchip;
-       girq->parent_handler = mrfld_irq_handler;
-       girq->num_parents = 1;
-       girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents,
-                                    sizeof(*girq->parents),
-                                    GFP_KERNEL);
-       if (!girq->parents)
-               return -ENOMEM;
-       girq->parents[0] = pdev->irq;
-       girq->default_type = IRQ_TYPE_NONE;
-       girq->handler = handle_bad_irq;
-
-       mrfld_irq_init_hw(priv);
-
        pci_set_drvdata(pdev, priv);
        retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv);
        if (retval) {
@@ -481,6 +465,18 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id
                }
        }
 
+       retval = gpiochip_irqchip_add(&priv->chip, &mrfld_irqchip, irq_base,
+                                     handle_bad_irq, IRQ_TYPE_NONE);
+       if (retval) {
+               dev_err(&pdev->dev, "could not connect irqchip to gpiochip\n");
+               return retval;
+       }
+
+       mrfld_irq_init_hw(priv);
+
+       gpiochip_set_chained_irqchip(&priv->chip, &mrfld_irqchip, pdev->irq,
+                                    mrfld_irq_handler);
+
        return 0;
 }
 
index 869d47f..6c06876 100644 (file)
@@ -694,7 +694,7 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip,
 }
 
 static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                          struct pwm_state *state)
+                          const struct pwm_state *state)
 {
        struct mvebu_pwm *mvpwm = to_mvebu_pwm(chip);
        struct mvebu_gpio_chip *mvchip = mvpwm->mvchip;
index 1eea2c6..80ea49f 100644 (file)
@@ -317,7 +317,7 @@ struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
        transitory = flags & OF_GPIO_TRANSITORY;
 
        ret = gpiod_request(desc, label);
-       if (ret == -EBUSY && (flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE))
+       if (ret == -EBUSY && (dflags & GPIOD_FLAGS_BIT_NONEXCLUSIVE))
                return desc;
        if (ret)
                return ERR_PTR(ret);
index bdbc164..104ed29 100644 (file)
@@ -86,6 +86,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
                                struct lock_class_key *lock_key,
                                struct lock_class_key *request_key);
 static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
+static int gpiochip_irqchip_init_hw(struct gpio_chip *gpiochip);
 static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip);
 static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip);
 
@@ -1406,6 +1407,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
 
        machine_gpiochip_add(chip);
 
+       ret = gpiochip_irqchip_init_hw(chip);
+       if (ret)
+               goto err_remove_acpi_chip;
+
        ret = gpiochip_irqchip_init_valid_mask(chip);
        if (ret)
                goto err_remove_acpi_chip;
@@ -1622,6 +1627,16 @@ static struct gpio_chip *find_chip_by_name(const char *name)
  * The following is irqchip helper code for gpiochips.
  */
 
+static int gpiochip_irqchip_init_hw(struct gpio_chip *gc)
+{
+       struct gpio_irq_chip *girq = &gc->irq;
+
+       if (!girq->init_hw)
+               return 0;
+
+       return girq->init_hw(gc);
+}
+
 static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gc)
 {
        struct gpio_irq_chip *girq = &gc->irq;
@@ -2446,8 +2461,13 @@ static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
 {
        return 0;
 }
-
 static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip) {}
+
+static inline int gpiochip_irqchip_init_hw(struct gpio_chip *gpiochip)
+{
+       return 0;
+}
+
 static inline int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip)
 {
        return 0;
@@ -3070,8 +3090,10 @@ int gpiod_direction_output(struct gpio_desc *desc, int value)
                if (!ret)
                        goto set_output_value;
                /* Emulate open drain by not actively driving the line high */
-               if (value)
-                       return gpiod_direction_input(desc);
+               if (value) {
+                       ret = gpiod_direction_input(desc);
+                       goto set_output_flag;
+               }
        }
        else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) {
                ret = gpio_set_config(gc, gpio_chip_hwgpio(desc),
@@ -3079,8 +3101,10 @@ int gpiod_direction_output(struct gpio_desc *desc, int value)
                if (!ret)
                        goto set_output_value;
                /* Emulate open source by not actively driving the line low */
-               if (!value)
-                       return gpiod_direction_input(desc);
+               if (!value) {
+                       ret = gpiod_direction_input(desc);
+                       goto set_output_flag;
+               }
        } else {
                gpio_set_config(gc, gpio_chip_hwgpio(desc),
                                PIN_CONFIG_DRIVE_PUSH_PULL);
@@ -3088,6 +3112,17 @@ int gpiod_direction_output(struct gpio_desc *desc, int value)
 
 set_output_value:
        return gpiod_direction_output_raw_commit(desc, value);
+
+set_output_flag:
+       /*
+        * When emulating open-source or open-drain functionalities by not
+        * actively driving the line (setting mode to input) we still need to
+        * set the IS_OUT flag or otherwise we won't be able to set the line
+        * value anymore.
+        */
+       if (ret == 0)
+               set_bit(FLAG_IS_OUT, &desc->flags);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(gpiod_direction_output);
 
@@ -3448,8 +3483,6 @@ static void gpio_set_open_drain_value_commit(struct gpio_desc *desc, bool value)
 
        if (value) {
                ret = chip->direction_input(chip, offset);
-               if (!ret)
-                       clear_bit(FLAG_IS_OUT, &desc->flags);
        } else {
                ret = chip->direction_output(chip, offset, 0);
                if (!ret)
@@ -3479,8 +3512,6 @@ static void gpio_set_open_source_value_commit(struct gpio_desc *desc, bool value
                        set_bit(FLAG_IS_OUT, &desc->flags);
        } else {
                ret = chip->direction_input(chip, offset);
-               if (!ret)
-                       clear_bit(FLAG_IS_OUT, &desc->flags);
        }
        trace_gpio_direction(desc_to_gpio(desc), !value, ret);
        if (ret < 0)
index 42e2c1f..00962a6 100644 (file)
@@ -54,7 +54,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
        amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
        amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
        amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
-       amdgpu_vm_sdma.o amdgpu_pmu.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o
+       amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o
 
 amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
 
index eba42c7..82155ac 100644 (file)
@@ -189,7 +189,7 @@ static int acp_hw_init(void *handle)
        u32 val = 0;
        u32 count = 0;
        struct device *dev;
-       struct i2s_platform_data *i2s_pdata;
+       struct i2s_platform_data *i2s_pdata = NULL;
 
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
@@ -231,20 +231,21 @@ static int acp_hw_init(void *handle)
        adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
                                                        GFP_KERNEL);
 
-       if (adev->acp.acp_cell == NULL)
-               return -ENOMEM;
+       if (adev->acp.acp_cell == NULL) {
+               r = -ENOMEM;
+               goto failure;
+       }
 
        adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL);
        if (adev->acp.acp_res == NULL) {
-               kfree(adev->acp.acp_cell);
-               return -ENOMEM;
+               r = -ENOMEM;
+               goto failure;
        }
 
        i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL);
        if (i2s_pdata == NULL) {
-               kfree(adev->acp.acp_res);
-               kfree(adev->acp.acp_cell);
-               return -ENOMEM;
+               r = -ENOMEM;
+               goto failure;
        }
 
        switch (adev->asic_type) {
@@ -341,14 +342,14 @@ static int acp_hw_init(void *handle)
        r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell,
                                                                ACP_DEVS);
        if (r)
-               return r;
+               goto failure;
 
        for (i = 0; i < ACP_DEVS ; i++) {
                dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
                r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
                if (r) {
                        dev_err(dev, "Failed to add dev to genpd\n");
-                       return r;
+                       goto failure;
                }
        }
 
@@ -367,7 +368,8 @@ static int acp_hw_init(void *handle)
                        break;
                if (--count == 0) {
                        dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
-                       return -ETIMEDOUT;
+                       r = -ETIMEDOUT;
+                       goto failure;
                }
                udelay(100);
        }
@@ -384,7 +386,8 @@ static int acp_hw_init(void *handle)
                        break;
                if (--count == 0) {
                        dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
-                       return -ETIMEDOUT;
+                       r = -ETIMEDOUT;
+                       goto failure;
                }
                udelay(100);
        }
@@ -393,6 +396,13 @@ static int acp_hw_init(void *handle)
        val &= ~ACP_SOFT_RESET__SoftResetAud_MASK;
        cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val);
        return 0;
+
+failure:
+       kfree(i2s_pdata);
+       kfree(adev->acp.acp_res);
+       kfree(adev->acp.acp_cell);
+       kfree(adev->acp.acp_genpd);
+       return r;
 }
 
 /**
index 42b936b..6d021ec 100644 (file)
@@ -1103,7 +1103,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                alloc_flags = 0;
                if (!offset || !*offset)
                        return -EINVAL;
-               user_addr = *offset;
+               user_addr = untagged_addr(*offset);
        } else if (flags & (ALLOC_MEM_FLAGS_DOORBELL |
                        ALLOC_MEM_FLAGS_MMIO_REMAP)) {
                domain = AMDGPU_GEM_DOMAIN_GTT;
index 7bcf86c..85b0515 100644 (file)
@@ -140,7 +140,12 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
        return 0;
 
 error_free:
-       while (i--) {
+       for (i = 0; i < last_entry; ++i) {
+               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
+
+               amdgpu_bo_unref(&bo);
+       }
+       for (i = first_userptr; i < num_entries; ++i) {
                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
 
                amdgpu_bo_unref(&bo);
@@ -270,7 +275,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 
        r = amdgpu_bo_create_list_entry_array(&args->in, &info);
        if (r)
-               goto error_free;
+               return r;
 
        switch (args->in.operation) {
        case AMDGPU_BO_LIST_OP_CREATE:
@@ -283,8 +288,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
                r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
                mutex_unlock(&fpriv->bo_list_lock);
                if (r < 0) {
-                       amdgpu_bo_list_put(list);
-                       return r;
+                       goto error_put_list;
                }
 
                handle = r;
@@ -306,9 +310,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
                mutex_unlock(&fpriv->bo_list_lock);
 
                if (IS_ERR(old)) {
-                       amdgpu_bo_list_put(list);
                        r = PTR_ERR(old);
-                       goto error_free;
+                       goto error_put_list;
                }
 
                amdgpu_bo_list_put(old);
@@ -325,8 +328,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 
        return 0;
 
+error_put_list:
+       amdgpu_bo_list_put(list);
+
 error_free:
-       if (info)
-               kvfree(info);
+       kvfree(info);
        return r;
 }
index 2e53fee..82823d9 100644 (file)
@@ -536,7 +536,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 
        list_for_each_entry(lobj, validated, tv.head) {
                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
-               bool binding_userptr = false;
                struct mm_struct *usermm;
 
                usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
@@ -553,7 +552,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 
                        amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
                                                     lobj->user_pages);
-                       binding_userptr = true;
                }
 
                if (p->evictable == lobj)
@@ -563,10 +561,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
                if (r)
                        return r;
 
-               if (binding_userptr) {
-                       kvfree(lobj->user_pages);
-                       lobj->user_pages = NULL;
-               }
+               kvfree(lobj->user_pages);
+               lobj->user_pages = NULL;
        }
        return 0;
 }
index 61bd103..5803fcb 100644 (file)
@@ -948,6 +948,7 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
        case AMD_IP_BLOCK_TYPE_UVD:
        case AMD_IP_BLOCK_TYPE_VCN:
        case AMD_IP_BLOCK_TYPE_VCE:
+       case AMD_IP_BLOCK_TYPE_SDMA:
                if (swsmu)
                        ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
                else
@@ -956,7 +957,6 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
                break;
        case AMD_IP_BLOCK_TYPE_GMC:
        case AMD_IP_BLOCK_TYPE_ACP:
-       case AMD_IP_BLOCK_TYPE_SDMA:
                ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
                                (adev)->powerplay.pp_handle, block_type, gate));
                break;
index bdf849d..2a00a36 100644 (file)
  * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS.
  * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS.
  * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches
+ * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask
  */
 #define KMS_DRIVER_MAJOR       3
-#define KMS_DRIVER_MINOR       34
+#define KMS_DRIVER_MINOR       35
 #define KMS_DRIVER_PATCHLEVEL  0
 
 #define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256
@@ -1012,11 +1013,16 @@ static const struct pci_device_id pciidlist[] = {
        {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
        {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
        /* Navi14 */
-       {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+       {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
+       {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
+       {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
 
        /* Renoir */
        {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT},
 
+       /* Navi12 */
+       {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},
+
        {0, 0, 0}
 };
 
@@ -1042,6 +1048,41 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
                return -ENODEV;
        }
 
+#ifdef CONFIG_DRM_AMDGPU_SI
+       if (!amdgpu_si_support) {
+               switch (flags & AMD_ASIC_MASK) {
+               case CHIP_TAHITI:
+               case CHIP_PITCAIRN:
+               case CHIP_VERDE:
+               case CHIP_OLAND:
+               case CHIP_HAINAN:
+                       dev_info(&pdev->dev,
+                                "SI support provided by radeon.\n");
+                       dev_info(&pdev->dev,
+                                "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
+                               );
+                       return -ENODEV;
+               }
+       }
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+       if (!amdgpu_cik_support) {
+               switch (flags & AMD_ASIC_MASK) {
+               case CHIP_KAVERI:
+               case CHIP_BONAIRE:
+               case CHIP_HAWAII:
+               case CHIP_KABINI:
+               case CHIP_MULLINS:
+                       dev_info(&pdev->dev,
+                                "CIK support provided by radeon.\n");
+                       dev_info(&pdev->dev,
+                                "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
+                               );
+                       return -ENODEV;
+               }
+       }
+#endif
+
        /* Get rid of things like offb */
        ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb");
        if (ret)
index b174bd5..8ceb449 100644 (file)
@@ -291,6 +291,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
        uint32_t handle;
        int r;
 
+       args->addr = untagged_addr(args->addr);
+
        if (offset_in_page(args->addr | args->size))
                return -EINVAL;
 
index 554a59b..6ee4021 100644 (file)
@@ -165,6 +165,7 @@ struct amdgpu_gfx_config {
        uint32_t num_sc_per_sh;
        uint32_t num_packer_per_sc;
        uint32_t pa_sc_tile_steering_override;
+       uint64_t tcc_disabled_mask;
 };
 
 struct amdgpu_cu_info {
index 7850084..6065583 100644 (file)
@@ -143,7 +143,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
        /* ring tests don't use a job */
        if (job) {
                vm = job->vm;
-               fence_ctx = job->base.s_fence->scheduled.context;
+               fence_ctx = job->base.s_fence ?
+                       job->base.s_fence->scheduled.context : 0;
        } else {
                vm = NULL;
                fence_ctx = 0;
index 9d76e09..96b2a31 100644 (file)
@@ -218,7 +218,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
        struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
        struct dma_fence *fence = NULL, *finished;
        struct amdgpu_job *job;
-       int r;
+       int r = 0;
 
        job = to_amdgpu_job(sched_job);
        finished = &job->base.s_fence->finished;
@@ -243,6 +243,8 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
        job->fence = dma_fence_get(fence);
 
        amdgpu_job_free_resources(job);
+
+       fence = r ? ERR_PTR(r) : fence;
        return fence;
 }
 
index 0e2ec60..d55f5ba 100644 (file)
@@ -144,41 +144,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
        struct amdgpu_device *adev;
        int r, acpi_status;
 
-#ifdef CONFIG_DRM_AMDGPU_SI
-       if (!amdgpu_si_support) {
-               switch (flags & AMD_ASIC_MASK) {
-               case CHIP_TAHITI:
-               case CHIP_PITCAIRN:
-               case CHIP_VERDE:
-               case CHIP_OLAND:
-               case CHIP_HAINAN:
-                       dev_info(dev->dev,
-                                "SI support provided by radeon.\n");
-                       dev_info(dev->dev,
-                                "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
-                               );
-                       return -ENODEV;
-               }
-       }
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
-       if (!amdgpu_cik_support) {
-               switch (flags & AMD_ASIC_MASK) {
-               case CHIP_KAVERI:
-               case CHIP_BONAIRE:
-               case CHIP_HAWAII:
-               case CHIP_KABINI:
-               case CHIP_MULLINS:
-                       dev_info(dev->dev,
-                                "CIK support provided by radeon.\n");
-                       dev_info(dev->dev,
-                                "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
-                               );
-                       return -ENODEV;
-               }
-       }
-#endif
-
        adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
        if (adev == NULL) {
                return -ENOMEM;
@@ -677,6 +642,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK)
                        sh_num = 0xffffffff;
 
+               if (info->read_mmr_reg.count > 128)
+                       return -EINVAL;
+
                regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL);
                if (!regs)
                        return -ENOMEM;
@@ -784,6 +752,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                        dev_info.pa_sc_tile_steering_override =
                                adev->gfx.config.pa_sc_tile_steering_override;
 
+               dev_info.tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask;
+
                return copy_to_user(out, &dev_info,
                                    min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0;
        }
index 1fead0e..7289e1b 100644 (file)
@@ -453,7 +453,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
                .interruptible = (bp->type != ttm_bo_type_kernel),
                .no_wait_gpu = false,
                .resv = bp->resv,
-               .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
+               .flags = bp->type != ttm_bo_type_kernel ?
+                       TTM_OPT_FLAG_ALLOW_RES_EVICT : 0
        };
        struct amdgpu_bo *bo;
        unsigned long page_align, size = bp->size;
index b70b3c4..65044b1 100644 (file)
@@ -429,13 +429,14 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
  * Open up a stream for HW test
  */
 int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+                             struct amdgpu_bo *bo,
                              struct dma_fence **fence)
 {
        const unsigned ib_size_dw = 1024;
        struct amdgpu_job *job;
        struct amdgpu_ib *ib;
        struct dma_fence *f = NULL;
-       uint64_t dummy;
+       uint64_t addr;
        int i, r;
 
        r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -444,7 +445,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 
        ib = &job->ibs[0];
 
-       dummy = ib->gpu_addr + 1024;
+       addr = amdgpu_bo_gpu_offset(bo);
 
        /* stitch together an VCE create msg */
        ib->length_dw = 0;
@@ -476,8 +477,8 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 
        ib->ptr[ib->length_dw++] = 0x00000014; /* len */
        ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
-       ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-       ib->ptr[ib->length_dw++] = dummy;
+       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+       ib->ptr[ib->length_dw++] = addr;
        ib->ptr[ib->length_dw++] = 0x00000001;
 
        for (i = ib->length_dw; i < ib_size_dw; ++i)
@@ -1110,13 +1111,20 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
 int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
        struct dma_fence *fence = NULL;
+       struct amdgpu_bo *bo = NULL;
        long r;
 
        /* skip vce ring1/2 ib test for now, since it's not reliable */
        if (ring != &ring->adev->vce.ring[0])
                return 0;
 
-       r = amdgpu_vce_get_create_msg(ring, 1, NULL);
+       r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,
+                                     AMDGPU_GEM_DOMAIN_VRAM,
+                                     &bo, NULL, NULL);
+       if (r)
+               return r;
+
+       r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
        if (r)
                goto error;
 
@@ -1132,5 +1140,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 error:
        dma_fence_put(fence);
+       amdgpu_bo_unreserve(bo);
+       amdgpu_bo_unref(&bo);
        return r;
 }
index 30ea54d..e802f7d 100644 (file)
@@ -59,6 +59,7 @@ int amdgpu_vce_entity_init(struct amdgpu_device *adev);
 int amdgpu_vce_suspend(struct amdgpu_device *adev);
 int amdgpu_vce_resume(struct amdgpu_device *adev);
 int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+                             struct amdgpu_bo *bo,
                              struct dma_fence **fence);
 int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
                               bool direct, struct dma_fence **fence);
index 7a6beb2..3199e4a 100644 (file)
@@ -569,13 +569,14 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
 }
 
 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-                             struct dma_fence **fence)
+                                        struct amdgpu_bo *bo,
+                                        struct dma_fence **fence)
 {
        const unsigned ib_size_dw = 16;
        struct amdgpu_job *job;
        struct amdgpu_ib *ib;
        struct dma_fence *f = NULL;
-       uint64_t dummy;
+       uint64_t addr;
        int i, r;
 
        r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -583,14 +584,14 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
                return r;
 
        ib = &job->ibs[0];
-       dummy = ib->gpu_addr + 1024;
+       addr = amdgpu_bo_gpu_offset(bo);
 
        ib->length_dw = 0;
        ib->ptr[ib->length_dw++] = 0x00000018;
        ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
        ib->ptr[ib->length_dw++] = handle;
-       ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-       ib->ptr[ib->length_dw++] = dummy;
+       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+       ib->ptr[ib->length_dw++] = addr;
        ib->ptr[ib->length_dw++] = 0x0000000b;
 
        ib->ptr[ib->length_dw++] = 0x00000014;
@@ -621,13 +622,14 @@ err:
 }
 
 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-                               struct dma_fence **fence)
+                                         struct amdgpu_bo *bo,
+                                         struct dma_fence **fence)
 {
        const unsigned ib_size_dw = 16;
        struct amdgpu_job *job;
        struct amdgpu_ib *ib;
        struct dma_fence *f = NULL;
-       uint64_t dummy;
+       uint64_t addr;
        int i, r;
 
        r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -635,14 +637,14 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
                return r;
 
        ib = &job->ibs[0];
-       dummy = ib->gpu_addr + 1024;
+       addr = amdgpu_bo_gpu_offset(bo);
 
        ib->length_dw = 0;
        ib->ptr[ib->length_dw++] = 0x00000018;
        ib->ptr[ib->length_dw++] = 0x00000001;
        ib->ptr[ib->length_dw++] = handle;
-       ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-       ib->ptr[ib->length_dw++] = dummy;
+       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+       ib->ptr[ib->length_dw++] = addr;
        ib->ptr[ib->length_dw++] = 0x0000000b;
 
        ib->ptr[ib->length_dw++] = 0x00000014;
@@ -675,13 +677,20 @@ err:
 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
        struct dma_fence *fence = NULL;
+       struct amdgpu_bo *bo = NULL;
        long r;
 
-       r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
+       r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+                                     AMDGPU_GEM_DOMAIN_VRAM,
+                                     &bo, NULL, NULL);
+       if (r)
+               return r;
+
+       r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL);
        if (r)
                goto error;
 
-       r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
+       r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence);
        if (r)
                goto error;
 
@@ -693,6 +702,8 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 error:
        dma_fence_put(fence);
+       amdgpu_bo_unreserve(bo);
+       amdgpu_bo_unref(&bo);
        return r;
 }
 
index e2fb141..5251352 100644 (file)
@@ -603,14 +603,12 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
        struct ttm_bo_global *glob = adev->mman.bdev.glob;
        struct amdgpu_vm_bo_base *bo_base;
 
-#if 0
        if (vm->bulk_moveable) {
                spin_lock(&glob->lru_lock);
                ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
                spin_unlock(&glob->lru_lock);
                return;
        }
-#endif
 
        memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
 
index db28823..8dfc775 100644 (file)
@@ -70,6 +70,11 @@ MODULE_FIRMWARE("amdgpu/navi10_mec.bin");
 MODULE_FIRMWARE("amdgpu/navi10_mec2.bin");
 MODULE_FIRMWARE("amdgpu/navi10_rlc.bin");
 
+MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin");
 MODULE_FIRMWARE("amdgpu/navi14_ce.bin");
 MODULE_FIRMWARE("amdgpu/navi14_pfp.bin");
 MODULE_FIRMWARE("amdgpu/navi14_me.bin");
@@ -88,7 +93,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] =
 {
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100),
+       SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
@@ -135,7 +140,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100),
+       SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
@@ -174,7 +179,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0xc0000100),
+       SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x0d000100),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
@@ -594,7 +599,8 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
 static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 {
        const char *chip_name;
-       char fw_name[30];
+       char fw_name[40];
+       char wks[10];
        int err;
        struct amdgpu_firmware_info *info = NULL;
        const struct common_firmware_header *header = NULL;
@@ -607,12 +613,16 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 
        DRM_DEBUG("\n");
 
+       memset(wks, 0, sizeof(wks));
        switch (adev->asic_type) {
        case CHIP_NAVI10:
                chip_name = "navi10";
                break;
        case CHIP_NAVI14:
                chip_name = "navi14";
+               if (!(adev->pdev->device == 0x7340 &&
+                     adev->pdev->revision != 0x00))
+                       snprintf(wks, sizeof(wks), "_wks");
                break;
        case CHIP_NAVI12:
                chip_name = "navi12";
@@ -621,7 +631,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
                BUG();
        }
 
-       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
+       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, wks);
        err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
        if (err)
                goto out;
@@ -632,7 +642,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
        adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
        adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
-       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
+       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks);
        err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
        if (err)
                goto out;
@@ -643,7 +653,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
        adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
        adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
-       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
+       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks);
        err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
        if (err)
                goto out;
@@ -708,7 +718,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
        if (adev->gfx.rlc.is_rlc_v2_1)
                gfx_v10_0_init_rlc_ext_microcode(adev);
 
-       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks);
        err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
        if (err)
                goto out;
@@ -719,7 +729,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
        adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
        adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
-       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
+       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks);
        err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
        if (!err) {
                err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
@@ -1681,6 +1691,17 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
        }
 }
 
+static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev)
+{
+       /* TCCs are global (not instanced). */
+       uint32_t tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) |
+                              RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE);
+
+       adev->gfx.config.tcc_disabled_mask =
+               REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
+               (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
+}
+
 static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
 {
        u32 tmp;
@@ -1692,6 +1713,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
 
        gfx_v10_0_setup_rb(adev);
        gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);
+       gfx_v10_0_get_tcc_info(adev);
        adev->gfx.config.pa_sc_tile_steering_override =
                gfx_v10_0_init_pa_sc_tile_steering_override(adev);
 
index 83d45f9..dcadc73 100644 (file)
@@ -1650,7 +1650,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
 
        switch (adev->asic_type) {
        case CHIP_RAVEN:
-       case CHIP_RENOIR:
                gfx_v9_0_init_lbpw(adev);
                break;
        case CHIP_VEGA20:
@@ -3026,7 +3025,6 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
 
        switch (adev->asic_type) {
        case CHIP_RAVEN:
-       case CHIP_RENOIR:
                if (amdgpu_lbpw == 0)
                        gfx_v9_0_enable_lbpw(adev, false);
                else
index 8b789f7..db10640 100644 (file)
@@ -151,6 +151,15 @@ static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
        WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp);
 
        tmp = mmGCVM_L2_CNTL3_DEFAULT;
+       if (adev->gmc.translate_further) {
+               tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+               tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+                                   L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+       } else {
+               tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+               tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+                                   L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+       }
        WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp);
 
        tmp = mmGCVM_L2_CNTL4_DEFAULT;
index 241a4e5..354e620 100644 (file)
@@ -309,6 +309,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 
        job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
        job->vm_needs_flush = true;
+       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
        r = amdgpu_job_submit(job, &adev->mman.entity,
                              AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
index 3542c20..b39bea6 100644 (file)
@@ -137,6 +137,15 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
        WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp);
 
        tmp = mmMMVM_L2_CNTL3_DEFAULT;
+       if (adev->gmc.translate_further) {
+               tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+               tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+                                   L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+       } else {
+               tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+               tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+                                   L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+       }
        WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp);
 
        tmp = mmMMVM_L2_CNTL4_DEFAULT;
index 85393a9..de9b995 100644 (file)
@@ -317,10 +317,12 @@ static int nv_asic_reset(struct amdgpu_device *adev)
        struct smu_context *smu = &adev->smu;
 
        if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
-               amdgpu_inc_vram_lost(adev);
+               if (!adev->in_suspend)
+                       amdgpu_inc_vram_lost(adev);
                ret = smu_baco_reset(smu);
        } else {
-               amdgpu_inc_vram_lost(adev);
+               if (!adev->in_suspend)
+                       amdgpu_inc_vram_lost(adev);
                ret = nv_asic_mode1_reset(adev);
        }
 
index ff18b3a..4554e72 100644 (file)
@@ -254,6 +254,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
        SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
        SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
        SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+       SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
 };
 
 static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
@@ -1889,8 +1890,9 @@ static int sdma_v4_0_hw_init(void *handle)
        int r;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
-                       adev->powerplay.pp_funcs->set_powergating_by_smu)
+       if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
+                       adev->powerplay.pp_funcs->set_powergating_by_smu) ||
+                       adev->asic_type == CHIP_RENOIR)
                amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
 
        if (!amdgpu_sriov_vf(adev))
@@ -1917,8 +1919,9 @@ static int sdma_v4_0_hw_fini(void *handle)
        sdma_v4_0_ctx_switch_enable(adev, false);
        sdma_v4_0_enable(adev, false);
 
-       if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs
-                       && adev->powerplay.pp_funcs->set_powergating_by_smu)
+       if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs
+                       && adev->powerplay.pp_funcs->set_powergating_by_smu) ||
+                       adev->asic_type == CHIP_RENOIR)
                amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
 
        return 0;
index fa2f70c..f6e8168 100644 (file)
@@ -1129,7 +1129,7 @@ static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
        amdgpu_ring_write(ring, addr & 0xfffffffc);
        amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
        amdgpu_ring_write(ring, seq); /* reference */
-       amdgpu_ring_write(ring, 0xfffffff); /* mask */
+       amdgpu_ring_write(ring, 0xffffffff); /* mask */
        amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
                          SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
 }
index 4a59510..c44723c 100644 (file)
@@ -493,7 +493,15 @@ static void smu_v11_0_i2c_fini(struct i2c_adapter *control)
        }
 
        /* Restore clock gating */
-       smu_v11_0_i2c_set_clock_gating(control, true);
+
+       /*
+        * TODO Reenabling clock gating seems to break subsequent SMU operation
+        *      on the I2C bus. My guess is that SMU doesn't disable clock gating like
+        *      we do here before working with the bus. So for now just don't restore
+        *      it but later work with SMU to see if they have this issue and can
+        *      update their code appropriately
+        */
+       /* smu_v11_0_i2c_set_clock_gating(control, true); */
 
 }
 
index f70658a..f8ab80c 100644 (file)
@@ -558,12 +558,14 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
 {
        switch (soc15_asic_reset_method(adev)) {
                case AMD_RESET_METHOD_BACO:
-                       amdgpu_inc_vram_lost(adev);
+                       if (!adev->in_suspend)
+                               amdgpu_inc_vram_lost(adev);
                        return soc15_asic_baco_reset(adev);
                case AMD_RESET_METHOD_MODE2:
                        return soc15_mode2_reset(adev);
                default:
-                       amdgpu_inc_vram_lost(adev);
+                       if (!adev->in_suspend)
+                               amdgpu_inc_vram_lost(adev);
                        return soc15_asic_mode1_reset(adev);
        }
 }
@@ -771,8 +773,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 #if defined(CONFIG_DRM_AMD_DC)
                 else if (amdgpu_device_has_dc_support(adev))
                         amdgpu_device_ip_block_add(adev, &dm_ip_block);
-#else
-#       warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15."
 #endif
                amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
                break;
index 670784a..217084d 100644 (file)
@@ -206,13 +206,14 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
  * Open up a stream for HW test
  */
 static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+                                      struct amdgpu_bo *bo,
                                       struct dma_fence **fence)
 {
        const unsigned ib_size_dw = 16;
        struct amdgpu_job *job;
        struct amdgpu_ib *ib;
        struct dma_fence *f = NULL;
-       uint64_t dummy;
+       uint64_t addr;
        int i, r;
 
        r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -220,15 +221,15 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
                return r;
 
        ib = &job->ibs[0];
-       dummy = ib->gpu_addr + 1024;
+       addr = amdgpu_bo_gpu_offset(bo);
 
        ib->length_dw = 0;
        ib->ptr[ib->length_dw++] = 0x00000018;
        ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
        ib->ptr[ib->length_dw++] = handle;
        ib->ptr[ib->length_dw++] = 0x00010000;
-       ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-       ib->ptr[ib->length_dw++] = dummy;
+       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+       ib->ptr[ib->length_dw++] = addr;
 
        ib->ptr[ib->length_dw++] = 0x00000014;
        ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -268,13 +269,14 @@ err:
  */
 static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
                                        uint32_t handle,
+                                       struct amdgpu_bo *bo,
                                        struct dma_fence **fence)
 {
        const unsigned ib_size_dw = 16;
        struct amdgpu_job *job;
        struct amdgpu_ib *ib;
        struct dma_fence *f = NULL;
-       uint64_t dummy;
+       uint64_t addr;
        int i, r;
 
        r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -282,15 +284,15 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
                return r;
 
        ib = &job->ibs[0];
-       dummy = ib->gpu_addr + 1024;
+       addr = amdgpu_bo_gpu_offset(bo);
 
        ib->length_dw = 0;
        ib->ptr[ib->length_dw++] = 0x00000018;
        ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
        ib->ptr[ib->length_dw++] = handle;
        ib->ptr[ib->length_dw++] = 0x00010000;
-       ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-       ib->ptr[ib->length_dw++] = dummy;
+       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+       ib->ptr[ib->length_dw++] = addr;
 
        ib->ptr[ib->length_dw++] = 0x00000014;
        ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -327,13 +329,20 @@ err:
 static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
        struct dma_fence *fence = NULL;
+       struct amdgpu_bo *bo = NULL;
        long r;
 
-       r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL);
+       r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+                                     AMDGPU_GEM_DOMAIN_VRAM,
+                                     &bo, NULL, NULL);
+       if (r)
+               return r;
+
+       r = uvd_v6_0_enc_get_create_msg(ring, 1, bo, NULL);
        if (r)
                goto error;
 
-       r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence);
+       r = uvd_v6_0_enc_get_destroy_msg(ring, 1, bo, &fence);
        if (r)
                goto error;
 
@@ -345,6 +354,8 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 error:
        dma_fence_put(fence);
+       amdgpu_bo_unreserve(bo);
+       amdgpu_bo_unref(&bo);
        return r;
 }
 
index 01f658f..0995378 100644 (file)
@@ -214,13 +214,14 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
  * Open up a stream for HW test
  */
 static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+                                      struct amdgpu_bo *bo,
                                       struct dma_fence **fence)
 {
        const unsigned ib_size_dw = 16;
        struct amdgpu_job *job;
        struct amdgpu_ib *ib;
        struct dma_fence *f = NULL;
-       uint64_t dummy;
+       uint64_t addr;
        int i, r;
 
        r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -228,15 +229,15 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
                return r;
 
        ib = &job->ibs[0];
-       dummy = ib->gpu_addr + 1024;
+       addr = amdgpu_bo_gpu_offset(bo);
 
        ib->length_dw = 0;
        ib->ptr[ib->length_dw++] = 0x00000018;
        ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
        ib->ptr[ib->length_dw++] = handle;
        ib->ptr[ib->length_dw++] = 0x00000000;
-       ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-       ib->ptr[ib->length_dw++] = dummy;
+       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+       ib->ptr[ib->length_dw++] = addr;
 
        ib->ptr[ib->length_dw++] = 0x00000014;
        ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -275,13 +276,14 @@ err:
  * Close up a stream for HW test or if userspace failed to do so
  */
 static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-                               struct dma_fence **fence)
+                                       struct amdgpu_bo *bo,
+                                       struct dma_fence **fence)
 {
        const unsigned ib_size_dw = 16;
        struct amdgpu_job *job;
        struct amdgpu_ib *ib;
        struct dma_fence *f = NULL;
-       uint64_t dummy;
+       uint64_t addr;
        int i, r;
 
        r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -289,15 +291,15 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
                return r;
 
        ib = &job->ibs[0];
-       dummy = ib->gpu_addr + 1024;
+       addr = amdgpu_bo_gpu_offset(bo);
 
        ib->length_dw = 0;
        ib->ptr[ib->length_dw++] = 0x00000018;
        ib->ptr[ib->length_dw++] = 0x00000001;
        ib->ptr[ib->length_dw++] = handle;
        ib->ptr[ib->length_dw++] = 0x00000000;
-       ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-       ib->ptr[ib->length_dw++] = dummy;
+       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+       ib->ptr[ib->length_dw++] = addr;
 
        ib->ptr[ib->length_dw++] = 0x00000014;
        ib->ptr[ib->length_dw++] = 0x00000002;
@@ -334,13 +336,20 @@ err:
 static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
        struct dma_fence *fence = NULL;
+       struct amdgpu_bo *bo = NULL;
        long r;
 
-       r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
+       r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+                                     AMDGPU_GEM_DOMAIN_VRAM,
+                                     &bo, NULL, NULL);
+       if (r)
+               return r;
+
+       r = uvd_v7_0_enc_get_create_msg(ring, 1, bo, NULL);
        if (r)
                goto error;
 
-       r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence);
+       r = uvd_v7_0_enc_get_destroy_msg(ring, 1, bo, &fence);
        if (r)
                goto error;
 
@@ -352,6 +361,8 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 error:
        dma_fence_put(fence);
+       amdgpu_bo_unreserve(bo);
+       amdgpu_bo_unref(&bo);
        return r;
 }
 
index a8cf82d..901fe35 100644 (file)
@@ -694,10 +694,10 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
        0x003f8000, 0x8f6f896f,
        0x88776f77, 0x8a6eff6e,
        0x023f8000, 0xb9eef807,
-       0xb970f812, 0xb971f813,
-       0x8ff08870, 0xf4051bb8,
+       0xb97af812, 0xb97bf813,
+       0x8ffa887a, 0xf4051bbd,
        0xfa000000, 0xbf8cc07f,
-       0xf4051c38, 0xfa000008,
+       0xf4051ebd, 0xfa000008,
        0xbf8cc07f, 0x87ee6e6e,
        0xbf840001, 0xbe80206e,
        0xb971f803, 0x8771ff71,
index 3598621..cdaa523 100644 (file)
@@ -187,12 +187,12 @@ L_FETCH_2ND_TRAP:
        // Read second-level TBA/TMA from first-level TMA and jump if available.
        // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
        // ttmp12 holds SQ_WAVE_STATUS
-       s_getreg_b32    ttmp4, hwreg(HW_REG_SHADER_TMA_LO)
-       s_getreg_b32    ttmp5, hwreg(HW_REG_SHADER_TMA_HI)
-       s_lshl_b64      [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8
-       s_load_dwordx2  [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1               // second-level TBA
+       s_getreg_b32    ttmp14, hwreg(HW_REG_SHADER_TMA_LO)
+       s_getreg_b32    ttmp15, hwreg(HW_REG_SHADER_TMA_HI)
+       s_lshl_b64      [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+       s_load_dwordx2  [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1             // second-level TBA
        s_waitcnt       lgkmcnt(0)
-       s_load_dwordx2  [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1               // second-level TMA
+       s_load_dwordx2  [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1           // second-level TMA
        s_waitcnt       lgkmcnt(0)
        s_and_b64       [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
        s_cbranch_scc0  L_NO_NEXT_TRAP                                          // second-level trap handler not been set
index e1b09bb..a52f0b1 100644 (file)
@@ -2113,6 +2113,7 @@ static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd)
 }
 
 static const struct backlight_ops amdgpu_dm_backlight_ops = {
+       .options = BL_CORE_SUSPENDRESUME,
        .get_brightness = amdgpu_dm_backlight_get_brightness,
        .update_status  = amdgpu_dm_backlight_update_status,
 };
@@ -5770,8 +5771,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
                 * change FB pitch, DCC state, rotation or mirroing.
                 */
                bundle->flip_addrs[planes_count].flip_immediate =
-                       (crtc->state->pageflip_flags &
-                        DRM_MODE_PAGE_FLIP_ASYNC) != 0 &&
+                       crtc->state->async_flip &&
                        acrtc_state->update_type == UPDATE_TYPE_FAST;
 
                timestamp_ns = ktime_get_ns();
@@ -6017,7 +6017,9 @@ static void amdgpu_dm_enable_crtc_interrupts(struct drm_device *dev,
        struct drm_crtc *crtc;
        struct drm_crtc_state *old_crtc_state, *new_crtc_state;
        int i;
+#ifdef CONFIG_DEBUG_FS
        enum amdgpu_dm_pipe_crc_source source;
+#endif
 
        for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
                                      new_crtc_state, i) {
@@ -6348,7 +6350,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
        amdgpu_dm_enable_crtc_interrupts(dev, state, true);
 
        for_each_new_crtc_in_state(state, crtc, new_crtc_state, j)
-               if (new_crtc_state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC)
+               if (new_crtc_state->async_flip)
                        wait_for_vblank = false;
 
        /* update planes when needed per crtc*/
index 985633c..26c6d73 100644 (file)
 # It calculates Bandwidth and Watermarks values for HW programming
 #
 
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
-       cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
-       cc_stack_align := -mstack-alignment=16
-endif
+calcs_ccflags := -mhard-float -msse
 
-calcs_ccflags := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
 
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+calcs_ccflags += -mpreferred-stack-boundary=4
+else
 calcs_ccflags += -msse2
 endif
 
index 383f4f8..9b2cb57 100644 (file)
@@ -708,6 +708,10 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v,
 
 unsigned int get_highest_allowed_voltage_level(uint32_t hw_internal_rev)
 {
+       /* for dali, the highest voltage level we want is 0 */
+       if (ASICREV_IS_DALI(hw_internal_rev))
+               return 0;
+
        /* we are ok with all levels */
        return 4;
 }
index 5cc3acc..b1e657e 100644 (file)
@@ -98,11 +98,14 @@ uint32_t dce110_get_min_vblank_time_us(const struct dc_state *context)
                struct dc_stream_state *stream = context->streams[j];
                uint32_t vertical_blank_in_pixels = 0;
                uint32_t vertical_blank_time = 0;
+               uint32_t vertical_total_min = stream->timing.v_total;
+               struct dc_crtc_timing_adjust adjust = stream->adjust;
+               if (adjust.v_total_max != adjust.v_total_min)
+                       vertical_total_min = adjust.v_total_min;
 
                vertical_blank_in_pixels = stream->timing.h_total *
-                       (stream->timing.v_total
+                       (vertical_total_min
                         - stream->timing.v_addressable);
-
                vertical_blank_time = vertical_blank_in_pixels
                        * 10000 / stream->timing.pix_clk_100hz;
 
@@ -171,6 +174,10 @@ void dce11_pplib_apply_display_requirements(
        struct dc_state *context)
 {
        struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg;
+       int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ;
+
+       if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm)
+               memory_type_multiplier = MEMORY_TYPE_HBM;
 
        pp_display_cfg->all_displays_in_sync =
                context->bw_ctx.bw.dce.all_displays_in_sync;
@@ -183,8 +190,20 @@ void dce11_pplib_apply_display_requirements(
        pp_display_cfg->cpu_pstate_separation_time =
                        context->bw_ctx.bw.dce.blackout_recovery_time_us;
 
-       pp_display_cfg->min_memory_clock_khz = context->bw_ctx.bw.dce.yclk_khz
-               / MEMORY_TYPE_MULTIPLIER_CZ;
+       /*
+        * TODO: determine whether the bandwidth has reached memory's limitation
+        * , then change minimum memory clock based on real-time bandwidth
+        * limitation.
+        */
+       if (ASICREV_IS_VEGA20_P(dc->ctx->asic_id.hw_internal_rev) && (context->stream_count >= 2)) {
+               pp_display_cfg->min_memory_clock_khz = max(pp_display_cfg->min_memory_clock_khz,
+                                                          (uint32_t) div64_s64(
+                                                                  div64_s64(dc->bw_vbios->high_yclk.value,
+                                                                            memory_type_multiplier), 10000));
+       } else {
+               pp_display_cfg->min_memory_clock_khz = context->bw_ctx.bw.dce.yclk_khz
+                       / memory_type_multiplier;
+       }
 
        pp_display_cfg->min_engine_clock_khz = determine_sclk_from_bounding_box(
                        dc,
index 5d1aded..4b8819c 100644 (file)
@@ -580,6 +580,10 @@ static bool construct(struct dc *dc,
 #ifdef CONFIG_DRM_AMD_DC_DCN2_0
        // Allocate memory for the vm_helper
        dc->vm_helper = kzalloc(sizeof(struct vm_helper), GFP_KERNEL);
+       if (!dc->vm_helper) {
+               dm_error("%s: failed to create dc->vm_helper\n", __func__);
+               goto fail;
+       }
 
 #endif
        memcpy(&dc->bb_overrides, &init_params->bb_overrides, sizeof(dc->bb_overrides));
index ca20b15..9c58670 100644 (file)
@@ -2767,6 +2767,15 @@ void core_link_enable_stream(
                                        CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
                                        COLOR_DEPTH_UNDEFINED);
 
+               /* This second call is needed to reconfigure the DIG
+                * as a workaround for the incorrect value being applied
+                * from transmitter control.
+                */
+               if (!dc_is_virtual_signal(pipe_ctx->stream->signal))
+                       stream->link->link_enc->funcs->setup(
+                               stream->link->link_enc,
+                               pipe_ctx->stream->signal);
+
 #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
                if (pipe_ctx->stream->timing.flags.DSC) {
                        if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
index 505967b..51991bf 100644 (file)
@@ -374,6 +374,7 @@ void dal_ddc_service_i2c_query_dp_dual_mode_adaptor(
        enum display_dongle_type *dongle = &sink_cap->dongle_type;
        uint8_t type2_dongle_buf[DP_ADAPTOR_TYPE2_SIZE];
        bool is_type2_dongle = false;
+       int retry_count = 2;
        struct dp_hdmi_dongle_signature_data *dongle_signature;
 
        /* Assume we have no valid DP passive dongle connected */
@@ -386,13 +387,24 @@ void dal_ddc_service_i2c_query_dp_dual_mode_adaptor(
                DP_HDMI_DONGLE_ADDRESS,
                type2_dongle_buf,
                sizeof(type2_dongle_buf))) {
-               *dongle = DISPLAY_DONGLE_DP_DVI_DONGLE;
-               sink_cap->max_hdmi_pixel_clock = DP_ADAPTOR_DVI_MAX_TMDS_CLK;
+               /* Passive HDMI dongles can sometimes fail here without retrying*/
+               while (retry_count > 0) {
+                       if (i2c_read(ddc,
+                               DP_HDMI_DONGLE_ADDRESS,
+                               type2_dongle_buf,
+                               sizeof(type2_dongle_buf)))
+                               break;
+                       retry_count--;
+               }
+               if (retry_count == 0) {
+                       *dongle = DISPLAY_DONGLE_DP_DVI_DONGLE;
+                       sink_cap->max_hdmi_pixel_clock = DP_ADAPTOR_DVI_MAX_TMDS_CLK;
 
-               CONN_DATA_DETECT(ddc->link, type2_dongle_buf, sizeof(type2_dongle_buf),
-                               "DP-DVI passive dongle %dMhz: ",
-                               DP_ADAPTOR_DVI_MAX_TMDS_CLK / 1000);
-               return;
+                       CONN_DATA_DETECT(ddc->link, type2_dongle_buf, sizeof(type2_dongle_buf),
+                                       "DP-DVI passive dongle %dMhz: ",
+                                       DP_ADAPTOR_DVI_MAX_TMDS_CLK / 1000);
+                       return;
+               }
        }
 
        /* Check if Type 2 dongle.*/
index 8f70295..f25ac17 100644 (file)
@@ -404,6 +404,9 @@ bool resource_are_streams_timing_synchronizable(
        if (stream1->view_format != stream2->view_format)
                return false;
 
+       if (stream1->ignore_msa_timing_param || stream2->ignore_msa_timing_param)
+               return false;
+
        return true;
 }
 static bool is_dp_and_hdmi_sharable(
@@ -1540,6 +1543,9 @@ bool dc_is_stream_unchanged(
        if (!are_stream_backends_same(old_stream, stream))
                return false;
 
+       if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param)
+               return false;
+
        return true;
 }
 
index 1488ffd..31b698b 100644 (file)
@@ -148,7 +148,7 @@ static void dce_mi_program_pte_vm(
                        pte->min_pte_before_flip_horiz_scan;
 
        REG_UPDATE(GRPH_PIPE_OUTSTANDING_REQUEST_LIMIT,
-                       GRPH_PIPE_OUTSTANDING_REQUEST_LIMIT, 0xff);
+                       GRPH_PIPE_OUTSTANDING_REQUEST_LIMIT, 0x7f);
 
        REG_UPDATE_3(DVMM_PTE_CONTROL,
                        DVMM_PAGE_WIDTH, page_width,
@@ -157,7 +157,7 @@ static void dce_mi_program_pte_vm(
 
        REG_UPDATE_2(DVMM_PTE_ARB_CONTROL,
                        DVMM_PTE_REQ_PER_CHUNK, pte->pte_req_per_chunk,
-                       DVMM_MAX_PTE_REQ_OUTSTANDING, 0xff);
+                       DVMM_MAX_PTE_REQ_OUTSTANDING, 0x7f);
 }
 
 static void program_urgency_watermark(
index afc6105..76d5488 100644 (file)
@@ -668,6 +668,7 @@ struct clock_source *dce100_clock_source_create(
                return &clk_src->base;
        }
 
+       kfree(clk_src);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
@@ -1091,6 +1092,7 @@ struct resource_pool *dce100_create_resource_pool(
        if (construct(num_virtual_links, dc, pool))
                return &pool->base;
 
+       kfree(pool);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
index c66fe17..89620ad 100644 (file)
@@ -714,6 +714,7 @@ struct clock_source *dce110_clock_source_create(
                return &clk_src->base;
        }
 
+       kfree(clk_src);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
@@ -1462,6 +1463,7 @@ struct resource_pool *dce110_create_resource_pool(
        if (construct(num_virtual_links, dc, pool, asic_id))
                return &pool->base;
 
+       kfree(pool);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
index 3ac4c7e..21a657e 100644 (file)
@@ -687,6 +687,7 @@ struct clock_source *dce112_clock_source_create(
                return &clk_src->base;
        }
 
+       kfree(clk_src);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
@@ -987,6 +988,10 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
        struct dm_pp_clock_levels_with_latency mem_clks = {0};
        struct dm_pp_wm_sets_with_clock_ranges clk_ranges = {0};
        struct dm_pp_clock_levels clks = {0};
+       int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ;
+
+       if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm)
+               memory_type_multiplier = MEMORY_TYPE_HBM;
 
        /*do system clock  TODO PPLIB: after PPLIB implement,
         * then remove old way
@@ -1026,12 +1031,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
                                &clks);
 
                dc->bw_vbios->low_yclk = bw_frc_to_fixed(
-                       clks.clocks_in_khz[0] * MEMORY_TYPE_MULTIPLIER_CZ, 1000);
+                       clks.clocks_in_khz[0] * memory_type_multiplier, 1000);
                dc->bw_vbios->mid_yclk = bw_frc_to_fixed(
-                       clks.clocks_in_khz[clks.num_levels>>1] * MEMORY_TYPE_MULTIPLIER_CZ,
+                       clks.clocks_in_khz[clks.num_levels>>1] * memory_type_multiplier,
                        1000);
                dc->bw_vbios->high_yclk = bw_frc_to_fixed(
-                       clks.clocks_in_khz[clks.num_levels-1] * MEMORY_TYPE_MULTIPLIER_CZ,
+                       clks.clocks_in_khz[clks.num_levels-1] * memory_type_multiplier,
                        1000);
 
                return;
@@ -1067,12 +1072,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
         * YCLK = UMACLK*m_memoryTypeMultiplier
         */
        dc->bw_vbios->low_yclk = bw_frc_to_fixed(
-               mem_clks.data[0].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, 1000);
+               mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000);
        dc->bw_vbios->mid_yclk = bw_frc_to_fixed(
-               mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ,
+               mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier,
                1000);
        dc->bw_vbios->high_yclk = bw_frc_to_fixed(
-               mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ,
+               mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier,
                1000);
 
        /* Now notify PPLib/SMU about which Watermarks sets they should select
@@ -1338,6 +1343,7 @@ struct resource_pool *dce112_create_resource_pool(
        if (construct(num_virtual_links, dc, pool))
                return &pool->base;
 
+       kfree(pool);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
index 7d08154..7c52f7f 100644 (file)
@@ -500,6 +500,7 @@ static struct clock_source *dce120_clock_source_create(
                return &clk_src->base;
        }
 
+       kfree(clk_src);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
@@ -847,6 +848,8 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
        int i;
        unsigned int clk;
        unsigned int latency;
+       /*original logic in dal3*/
+       int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ;
 
        /*do system clock*/
        if (!dm_pp_get_clock_levels_by_type_with_latency(
@@ -905,13 +908,16 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
         * ALSO always convert UMA clock (from PPLIB)  to YCLK (HW formula):
         * YCLK = UMACLK*m_memoryTypeMultiplier
         */
+       if (dc->bw_vbios->memory_type == bw_def_hbm)
+               memory_type_multiplier = MEMORY_TYPE_HBM;
+
        dc->bw_vbios->low_yclk = bw_frc_to_fixed(
-               mem_clks.data[0].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, 1000);
+               mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000);
        dc->bw_vbios->mid_yclk = bw_frc_to_fixed(
-               mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ,
+               mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier,
                1000);
        dc->bw_vbios->high_yclk = bw_frc_to_fixed(
-               mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ,
+               mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier,
                1000);
 
        /* Now notify PPLib/SMU about which Watermarks sets they should select
@@ -1203,6 +1209,7 @@ struct resource_pool *dce120_create_resource_pool(
        if (construct(num_virtual_links, dc, pool))
                return &pool->base;
 
+       kfree(pool);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
index 4625df9..643ccb0 100644 (file)
@@ -701,6 +701,7 @@ struct clock_source *dce80_clock_source_create(
                return &clk_src->base;
        }
 
+       kfree(clk_src);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
index 01c7e30..bbd6e01 100644 (file)
@@ -393,6 +393,10 @@ bool cm_helper_translate_curve_to_hw_format(
        rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
        rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
 
+       rgb_resulted[hw_points].red = rgb_resulted[hw_points - 1].red;
+       rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green;
+       rgb_resulted[hw_points].blue = rgb_resulted[hw_points - 1].blue;
+
        // All 3 color channels have same x
        corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2),
                                             dc_fixpt_from_int(region_start));
@@ -464,13 +468,6 @@ bool cm_helper_translate_curve_to_hw_format(
 
        i = 1;
        while (i != hw_points + 1) {
-               if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
-                       rgb_plus_1->red = rgb->red;
-               if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
-                       rgb_plus_1->green = rgb->green;
-               if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
-                       rgb_plus_1->blue = rgb->blue;
-
                rgb->delta_red   = dc_fixpt_sub(rgb_plus_1->red,   rgb->red);
                rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
                rgb->delta_blue  = dc_fixpt_sub(rgb_plus_1->blue,  rgb->blue);
@@ -562,6 +559,10 @@ bool cm_helper_translate_curve_to_degamma_hw_format(
        rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
        rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
 
+       rgb_resulted[hw_points].red = rgb_resulted[hw_points - 1].red;
+       rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green;
+       rgb_resulted[hw_points].blue = rgb_resulted[hw_points - 1].blue;
+
        corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2),
                                             dc_fixpt_from_int(region_start));
        corner_points[0].green.x = corner_points[0].red.x;
@@ -624,13 +625,6 @@ bool cm_helper_translate_curve_to_degamma_hw_format(
 
        i = 1;
        while (i != hw_points + 1) {
-               if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
-                       rgb_plus_1->red = rgb->red;
-               if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
-                       rgb_plus_1->green = rgb->green;
-               if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
-                       rgb_plus_1->blue = rgb->blue;
-
                rgb->delta_red   = dc_fixpt_sub(rgb_plus_1->red,   rgb->red);
                rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
                rgb->delta_blue  = dc_fixpt_sub(rgb_plus_1->blue,  rgb->blue);
index 5a89e46..1599bb9 100644 (file)
@@ -786,6 +786,7 @@ struct clock_source *dcn10_clock_source_create(
                return &clk_src->base;
        }
 
+       kfree(clk_src);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
@@ -1570,6 +1571,7 @@ struct resource_pool *dcn10_create_resource_pool(
        if (construct(init_data->num_virtual_links, dc, pool))
                return &pool->base;
 
+       kfree(pool);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
index ddb8d56..63f3bdd 100644 (file)
@@ -10,15 +10,20 @@ ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 DCN20 += dcn20_dsc.o
 endif
 
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
-       cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
-       cc_stack_align := -mstack-alignment=16
-endif
+CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse
 
-CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
 
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -mpreferred-stack-boundary=4
+else
 CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -msse2
 endif
 
index b4e3ce2..dfb2082 100644 (file)
@@ -814,7 +814,7 @@ static const struct resource_caps res_cap_nv14 = {
                .num_audio = 6,
                .num_stream_encoder = 5,
                .num_pll = 5,
-               .num_dwb = 0,
+               .num_dwb = 1,
                .num_ddc = 5,
 };
 
@@ -1077,6 +1077,7 @@ struct clock_source *dcn20_clock_source_create(
                return &clk_src->base;
        }
 
+       kfree(clk_src);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
index 8cd9de8..ff50ae7 100644 (file)
@@ -3,7 +3,22 @@
 
 DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o
 
-CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse -mpreferred-stack-boundary=4
+CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse
+
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
+
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -mpreferred-stack-boundary=4
+else
+CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2
+endif
 
 AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21))
 
index 5b2a65b..8df2516 100644 (file)
 # It provides the general basic services required by other DAL
 # subcomponents.
 
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
-       cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
-       cc_stack_align := -mstack-alignment=16
-endif
+dml_ccflags := -mhard-float -msse
 
-dml_ccflags := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
 
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+dml_ccflags += -mpreferred-stack-boundary=4
+else
 dml_ccflags += -msse2
 endif
 
index 6498837..6c6c486 100644 (file)
@@ -2577,7 +2577,8 @@ static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPer
                        mode_lib->vba.MinActiveDRAMClockChangeMargin
                                        + mode_lib->vba.DRAMClockChangeLatency;
 
-       if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
+       if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 50) {
+               mode_lib->vba.DRAMClockChangeWatermark += 25;
                mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive;
        } else {
                if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) {
index 456cd0e..3b6ed60 100644 (file)
@@ -39,9 +39,6 @@
  * ways. Unless there is something clearly wrong with it the code should
  * remain as-is as it provides us with a guarantee from HW that it is correct.
  */
-
-typedef unsigned int uint;
-
 typedef struct {
        double DPPCLK;
        double DISPCLK;
@@ -4774,7 +4771,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0;
                                mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0;
                                for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-                                       uint m;
+                                       unsigned int m;
 
                                        locals->cursor_bw[k] = 0;
                                        locals->cursor_bw_pre[k] = 0;
@@ -5285,7 +5282,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
        double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
        double FullDETBufferingTimeYStutterCriticalPlane = 0;
        double TimeToFinishSwathTransferStutterCriticalPlane = 0;
-       uint k, j;
+       unsigned int k, j;
 
        mode_lib->vba.TotalActiveDPP = 0;
        mode_lib->vba.TotalDCCActiveDPP = 0;
@@ -5507,7 +5504,7 @@ static void CalculateDCFCLKDeepSleep(
                double DPPCLK[],
                double *DCFCLKDeepSleep)
 {
-       uint k;
+       unsigned int k;
        double DisplayPipeLineDeliveryTimeLuma;
        double DisplayPipeLineDeliveryTimeChroma;
        //double   DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
@@ -5727,7 +5724,7 @@ static void CalculatePixelDeliveryTimes(
                double DisplayPipeRequestDeliveryTimeChromaPrefetch[])
 {
        double req_per_swath_ub;
-       uint k;
+       unsigned int k;
 
        for (k = 0; k < NumberOfActivePlanes; ++k) {
                if (VRatio[k] <= 1) {
@@ -5869,7 +5866,7 @@ static void CalculateMetaAndPTETimes(
        unsigned int dpte_groups_per_row_chroma_ub;
        unsigned int num_group_per_lower_vm_stage;
        unsigned int num_req_per_lower_vm_stage;
-       uint k;
+       unsigned int k;
 
        for (k = 0; k < NumberOfActivePlanes; ++k) {
                if (GPUVMEnable == true) {
index b456cd2..9707372 100644 (file)
@@ -1,15 +1,20 @@
 #
 # Makefile for the 'dsc' sub-component of DAL.
 
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
-       cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
-       cc_stack_align := -mstack-alignment=16
-endif
+dsc_ccflags := -mhard-float -msse
 
-dsc_ccflags := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
 
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+dsc_ccflags += -mpreferred-stack-boundary=4
+else
 dsc_ccflags += -msse2
 endif
 
index 34485d9..8572678 100644 (file)
 
 #include "hw_factory_dcn21.h"
 
-
 #include "dcn/dcn_2_1_0_offset.h"
 #include "dcn/dcn_2_1_0_sh_mask.h"
 #include "renoir_ip_offset.h"
 
-
 #include "reg_helper.h"
 #include "../hpd_regs.h"
 /* begin *********************
@@ -136,6 +134,39 @@ static const struct ddc_sh_mask ddc_mask[] = {
        DDC_MASK_SH_LIST_DCN2(_MASK, 6)
 };
 
+#include "../generic_regs.h"
+
+/* set field name */
+#define SF_GENERIC(reg_name, field_name, post_fix)\
+       .field_name = reg_name ## __ ## field_name ## post_fix
+
+#define generic_regs(id) \
+{\
+       GENERIC_REG_LIST(id)\
+}
+
+static const struct generic_registers generic_regs[] = {
+       generic_regs(A),
+};
+
+static const struct generic_sh_mask generic_shift[] = {
+       GENERIC_MASK_SH_LIST(__SHIFT, A),
+};
+
+static const struct generic_sh_mask generic_mask[] = {
+       GENERIC_MASK_SH_LIST(_MASK, A),
+};
+
+static void define_generic_registers(struct hw_gpio_pin *pin, uint32_t en)
+{
+       struct hw_generic *generic = HW_GENERIC_FROM_BASE(pin);
+
+       generic->regs = &generic_regs[en];
+       generic->shifts = &generic_shift[en];
+       generic->masks = &generic_mask[en];
+       generic->base.regs = &generic_regs[en].gpio;
+}
+
 static void define_ddc_registers(
                struct hw_gpio_pin *pin,
                uint32_t en)
@@ -181,7 +212,8 @@ static const struct hw_factory_funcs funcs = {
        .get_hpd_pin = dal_hw_hpd_get_pin,
        .get_generic_pin = dal_hw_generic_get_pin,
        .define_hpd_registers = define_hpd_registers,
-       .define_ddc_registers = define_ddc_registers
+       .define_ddc_registers = define_ddc_registers,
+       .define_generic_registers = define_generic_registers
 };
 /*
  * dal_hw_factory_dcn10_init
index ad7c437..fbb58fb 100644 (file)
@@ -58,7 +58,6 @@
 #define SF_HPD(reg_name, field_name, post_fix)\
        .field_name = reg_name ## __ ## field_name ## post_fix
 
-
 /* macros to expend register list macro defined in HW object header file
  * end *********************/
 
@@ -71,7 +70,7 @@ static bool offset_to_id(
 {
        switch (offset) {
        /* GENERIC */
-       case REG(DC_GENERICA):
+       case REG(DC_GPIO_GENERIC_A):
                *id = GPIO_ID_GENERIC;
                switch (mask) {
                case DC_GPIO_GENERIC_A__DC_GPIO_GENERICA_A_MASK:
index 1cc1c8c..bef224b 100644 (file)
@@ -31,6 +31,8 @@
 #include "dm_pp_smu.h"
 
 #define MEMORY_TYPE_MULTIPLIER_CZ 4
+#define MEMORY_TYPE_HBM 2
+
 
 enum dce_version resource_parse_asic_id(
                struct hw_asic_id asic_id);
index 1f16892..1be6c44 100644 (file)
 #define RAVEN1_F0 0xF0
 #define RAVEN_UNKNOWN 0xFF
 
+#define PICASSO_15D8_REV_E3 0xE3
+#define PICASSO_15D8_REV_E4 0xE4
+
 #define ASICREV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN)
 #define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0))
-#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0))
-
+#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < PICASSO_15D8_REV_E3))
+#define ASICREV_IS_DALI(eChipRev) ((eChipRev >= PICASSO_15D8_REV_E3) && (eChipRev < RAVEN1_F0))
 
 #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN))
 
index 554714c..094648c 100644 (file)
@@ -155,7 +155,7 @@ static const struct IP_BASE MP0_BASE ={ { { { 0x00016000, 0x0243FC00, 0x00DC0000
                                         { { 0, 0, 0, 0, 0 } },
                                         { { 0, 0, 0, 0, 0 } },
                                         { { 0, 0, 0, 0, 0 } } } };
-static const struct IP_BASE MP1_BASE ={ { { { 0x00016200, 0x02400400, 0x00E80000, 0x00EC0000, 0x00F00000 } },
+static const struct IP_BASE MP1_BASE ={ { { { 0x00016000, 0x02400400, 0x00E80000, 0x00EC0000, 0x00F00000 } },
                                         { { 0, 0, 0, 0, 0 } },
                                         { { 0, 0, 0, 0, 0 } },
                                         { { 0, 0, 0, 0, 0 } },
index fa636cb..fa8ad7d 100644 (file)
@@ -1531,6 +1531,7 @@ static int pp_asic_reset_mode_2(void *handle)
 static int pp_smu_i2c_bus_access(void *handle, bool acquire)
 {
        struct pp_hwmgr *hwmgr = handle;
+       int ret = 0;
 
        if (!hwmgr || !hwmgr->pm_en)
                return -EINVAL;
@@ -1540,7 +1541,11 @@ static int pp_smu_i2c_bus_access(void *handle, bool acquire)
                return -EINVAL;
        }
 
-       return hwmgr->hwmgr_func->smu_i2c_bus_access(hwmgr, acquire);
+       mutex_lock(&hwmgr->smu_lock);
+       ret = hwmgr->hwmgr_func->smu_i2c_bus_access(hwmgr, acquire);
+       mutex_unlock(&hwmgr->smu_lock);
+
+       return ret;
 }
 
 static const struct amd_pm_funcs pp_dpm_funcs = {
index 22f3c60..4acf139 100644 (file)
@@ -354,6 +354,9 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
        case AMD_IP_BLOCK_TYPE_GFX:
                ret = smu_gfx_off_control(smu, gate);
                break;
+       case AMD_IP_BLOCK_TYPE_SDMA:
+               ret = smu_powergate_sdma(smu, gate);
+               break;
        default:
                break;
        }
@@ -840,6 +843,8 @@ static int smu_sw_init(void *handle)
        smu->smu_baco.state = SMU_BACO_STATE_EXIT;
        smu->smu_baco.platform_support = false;
 
+       mutex_init(&smu->sensor_lock);
+
        smu->watermarks_bitmap = 0;
        smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
        smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
index f1f0720..d493a3f 100644 (file)
@@ -1018,6 +1018,7 @@ static int arcturus_read_sensor(struct smu_context *smu,
        if (!data || !size)
                return -EINVAL;
 
+       mutex_lock(&smu->sensor_lock);
        switch (sensor) {
        case AMDGPU_PP_SENSOR_MAX_FAN_RPM:
                *(uint32_t *)data = pptable->FanMaximumRpm;
@@ -1044,6 +1045,7 @@ static int arcturus_read_sensor(struct smu_context *smu,
        default:
                ret = smu_smc_read_sensor(smu, sensor, data, size);
        }
+       mutex_unlock(&smu->sensor_lock);
 
        return ret;
 }
index d08493b..beacfff 100644 (file)
@@ -5098,9 +5098,7 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
 
        if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) {
                podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk;
-               for (i = 0; i < podn_vdd_dep->count - 1; i++)
-                       od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc;
-               if (od_vddc_lookup_table->entries[i].us_vdd < podn_vdd_dep->entries[i].vddc)
+               for (i = 0; i < podn_vdd_dep->count; i++)
                        od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc;
        } else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) {
                podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk;
index 6109815..23171a4 100644 (file)
@@ -344,6 +344,7 @@ struct smu_context
        const struct smu_funcs          *funcs;
        const struct pptable_funcs      *ppt_funcs;
        struct mutex                    mutex;
+       struct mutex                    sensor_lock;
        uint64_t pool_size;
 
        struct smu_table_context        smu_table;
index 12c0e46..0b46140 100644 (file)
@@ -547,7 +547,7 @@ static int navi10_get_metrics_table(struct smu_context *smu,
        struct smu_table_context *smu_table= &smu->smu_table;
        int ret = 0;
 
-       if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + HZ / 1000)) {
+       if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + msecs_to_jiffies(100))) {
                ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
                                (void *)smu_table->metrics_table, false);
                if (ret) {
@@ -1386,6 +1386,7 @@ static int navi10_read_sensor(struct smu_context *smu,
        if(!data || !size)
                return -EINVAL;
 
+       mutex_lock(&smu->sensor_lock);
        switch (sensor) {
        case AMDGPU_PP_SENSOR_MAX_FAN_RPM:
                *(uint32_t *)data = pptable->FanMaximumRpm;
@@ -1409,6 +1410,7 @@ static int navi10_read_sensor(struct smu_context *smu,
        default:
                ret = smu_smc_read_sensor(smu, sensor, data, size);
        }
+       mutex_unlock(&smu->sensor_lock);
 
        return ret;
 }
index 2a6da54..e62bfba 100644 (file)
@@ -177,12 +177,82 @@ static int renoir_get_dpm_uclk_limited(struct smu_context *smu, uint32_t *clock,
 
 }
 
+static int renoir_print_clk_levels(struct smu_context *smu,
+                       enum smu_clk_type clk_type, char *buf)
+{
+       int i, size = 0, ret = 0;
+       uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0;
+       DpmClocks_t *clk_table = smu->smu_table.clocks_table;
+       SmuMetrics_t metrics = {0};
+
+       if (!clk_table || clk_type >= SMU_CLK_COUNT)
+               return -EINVAL;
+
+       ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
+                              (void *)&metrics, false);
+       if (ret)
+               return ret;
+
+       switch (clk_type) {
+       case SMU_GFXCLK:
+       case SMU_SCLK:
+               /* retirve table returned paramters unit is MHz */
+               cur_value = metrics.ClockFrequency[CLOCK_GFXCLK];
+               ret = smu_get_dpm_freq_range(smu, SMU_GFXCLK, &min, &max);
+               if (!ret) {
+                       /* driver only know min/max gfx_clk, Add level 1 for all other gfx clks */
+                       if (cur_value  == max)
+                               i = 2;
+                       else if (cur_value == min)
+                               i = 0;
+                       else
+                               i = 1;
+
+                       size += sprintf(buf + size, "0: %uMhz %s\n", min,
+                                       i == 0 ? "*" : "");
+                       size += sprintf(buf + size, "1: %uMhz %s\n",
+                                       i == 1 ? cur_value : RENOIR_UMD_PSTATE_GFXCLK,
+                                       i == 1 ? "*" : "");
+                       size += sprintf(buf + size, "2: %uMhz %s\n", max,
+                                       i == 2 ? "*" : "");
+               }
+               return size;
+       case SMU_SOCCLK:
+               count = NUM_SOCCLK_DPM_LEVELS;
+               cur_value = metrics.ClockFrequency[CLOCK_SOCCLK];
+               break;
+       case SMU_MCLK:
+               count = NUM_MEMCLK_DPM_LEVELS;
+               cur_value = metrics.ClockFrequency[CLOCK_UMCCLK];
+               break;
+       case SMU_DCEFCLK:
+               count = NUM_DCFCLK_DPM_LEVELS;
+               cur_value = metrics.ClockFrequency[CLOCK_DCFCLK];
+               break;
+       case SMU_FCLK:
+               count = NUM_FCLK_DPM_LEVELS;
+               cur_value = metrics.ClockFrequency[CLOCK_FCLK];
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       for (i = 0; i < count; i++) {
+               GET_DPM_CUR_FREQ(clk_table, clk_type, i, value);
+               size += sprintf(buf + size, "%d: %uMhz %s\n", i, value,
+                               cur_value == value ? "*" : "");
+       }
+
+       return size;
+}
+
 static const struct pptable_funcs renoir_ppt_funcs = {
        .get_smu_msg_index = renoir_get_smu_msg_index,
        .get_smu_table_index = renoir_get_smu_table_index,
        .tables_init = renoir_tables_init,
        .set_power_state = NULL,
        .get_dpm_uclk_limited = renoir_get_dpm_uclk_limited,
+       .print_clk_levels = renoir_print_clk_levels,
 };
 
 void renoir_set_ppt_funcs(struct smu_context *smu)
index e9b7237..2a390dd 100644 (file)
 
 extern void renoir_set_ppt_funcs(struct smu_context *smu);
 
+/* UMD PState Renoir Msg Parameters in MHz */
+#define RENOIR_UMD_PSTATE_GFXCLK       700
+#define RENOIR_UMD_PSTATE_SOCCLK       678
+#define RENOIR_UMD_PSTATE_FCLK         800
+
+#define GET_DPM_CUR_FREQ(table, clk_type, dpm_level, freq)             \
+       do {                                                            \
+               switch (clk_type) {                                     \
+               case SMU_SOCCLK:                                        \
+                       freq = table->SocClocks[dpm_level].Freq;        \
+                       break;                                          \
+               case SMU_MCLK:                                          \
+                       freq = table->MemClocks[dpm_level].Freq;        \
+                       break;                                          \
+               case SMU_DCEFCLK:                                       \
+                       freq = table->DcfClocks[dpm_level].Freq;        \
+                       break;                                          \
+               case SMU_FCLK:                                          \
+                       freq = table->FClocks[dpm_level].Freq;          \
+                       break;                                          \
+               default:                                                \
+                       break;                                          \
+               }                                                       \
+       } while (0)
+
 #endif
index dc75444..23c1201 100644 (file)
@@ -655,7 +655,7 @@ static int polaris10_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
                        count = SMU_MAX_SMIO_LEVELS;
                for (level = 0; level < count; level++) {
                        table->SmioTable2.Pattern[level].Voltage =
-                               PP_HOST_TO_SMC_US(data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE);
+                               PP_HOST_TO_SMC_US(data->mvdd_voltage_table.entries[level].value * VOLTAGE_SCALE);
                        /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/
                        table->SmioTable2.Pattern[level].Smio =
                                (uint8_t) level;
index 7c960b0..ae18fbc 100644 (file)
@@ -456,7 +456,7 @@ static int vegam_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
                        count = SMU_MAX_SMIO_LEVELS;
                for (level = 0; level < count; level++) {
                        table->SmioTable2.Pattern[level].Voltage = PP_HOST_TO_SMC_US(
-                                       data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE);
+                                       data->mvdd_voltage_table.entries[level].value * VOLTAGE_SCALE);
                        /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/
                        table->SmioTable2.Pattern[level].Smio =
                                (uint8_t) level;
index 64386ee..bbd8ebd 100644 (file)
@@ -3023,6 +3023,7 @@ static int vega20_read_sensor(struct smu_context *smu,
        if(!data || !size)
                return -EINVAL;
 
+       mutex_lock(&smu->sensor_lock);
        switch (sensor) {
        case AMDGPU_PP_SENSOR_MAX_FAN_RPM:
                *(uint32_t *)data = pptable->FanMaximumRpm;
@@ -3048,6 +3049,7 @@ static int vega20_read_sensor(struct smu_context *smu,
        default:
                ret = smu_smc_read_sensor(smu, sensor, data, size);
        }
+       mutex_unlock(&smu->sensor_lock);
 
        return ret;
 }
index 8820ce1..ae27490 100644 (file)
@@ -82,7 +82,8 @@ static void komeda_kms_commit_tail(struct drm_atomic_state *old_state)
 
        drm_atomic_helper_commit_modeset_disables(dev, old_state);
 
-       drm_atomic_helper_commit_planes(dev, old_state, 0);
+       drm_atomic_helper_commit_planes(dev, old_state,
+                                       DRM_PLANE_COMMIT_ACTIVE_ONLY);
 
        drm_atomic_helper_commit_modeset_enables(dev, old_state);
 
index ea26bc9..b848270 100644 (file)
@@ -564,8 +564,8 @@ komeda_splitter_validate(struct komeda_splitter *splitter,
        }
 
        if (!in_range(&splitter->vsize, dflow->in_h)) {
-               DRM_DEBUG_ATOMIC("split in_in: %d exceed the acceptable range.\n",
-                                dflow->in_w);
+               DRM_DEBUG_ATOMIC("split in_h: %d exceeds the acceptable range.\n",
+                                dflow->in_h);
                return -EINVAL;
        }
 
index 2851cac..b72840c 100644 (file)
@@ -43,9 +43,8 @@ komeda_wb_encoder_atomic_check(struct drm_encoder *encoder,
        struct komeda_data_flow_cfg dflow;
        int err;
 
-       if (!writeback_job || !writeback_job->fb) {
+       if (!writeback_job)
                return 0;
-       }
 
        if (!crtc_st->active) {
                DRM_DEBUG_ATOMIC("Cannot write the composition result out on a inactive CRTC.\n");
@@ -166,8 +165,10 @@ static int komeda_wb_connector_add(struct komeda_kms_dev *kms,
                                           &komeda_wb_encoder_helper_funcs,
                                           formats, n_formats);
        komeda_put_fourcc_list(formats);
-       if (err)
+       if (err) {
+               kfree(kwb_conn);
                return err;
+       }
 
        drm_connector_helper_add(&wb_conn->base, &komeda_wb_conn_helper_funcs);
 
index 22c0847..875a3a9 100644 (file)
@@ -131,7 +131,7 @@ malidp_mw_encoder_atomic_check(struct drm_encoder *encoder,
        struct drm_framebuffer *fb;
        int i, n_planes;
 
-       if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
+       if (!conn_state->writeback_job)
                return 0;
 
        fb = conn_state->writeback_job->fb;
@@ -248,7 +248,7 @@ void malidp_mw_atomic_commit(struct drm_device *drm,
 
        mw_state = to_mw_state(conn_state);
 
-       if (conn_state->writeback_job && conn_state->writeback_job->fb) {
+       if (conn_state->writeback_job) {
                struct drm_framebuffer *fb = conn_state->writeback_job->fb;
 
                DRM_DEV_DEBUG_DRIVER(drm->dev,
index 98bccac..9e13e46 100644 (file)
@@ -874,6 +874,9 @@ static int adv7511_bridge_attach(struct drm_bridge *bridge)
                                 &adv7511_connector_helper_funcs);
        drm_connector_attach_encoder(&adv->connector, bridge->encoder);
 
+       if (adv->type == ADV7533)
+               ret = adv7533_attach_dsi(adv);
+
        if (adv->i2c_main->irq)
                regmap_write(adv->regmap, ADV7511_REG_INT_ENABLE(0),
                             ADV7511_INT0_HPD);
@@ -978,10 +981,10 @@ static int adv7511_init_cec_regmap(struct adv7511 *adv)
 {
        int ret;
 
-       adv->i2c_cec = i2c_new_secondary_device(adv->i2c_main, "cec",
+       adv->i2c_cec = i2c_new_ancillary_device(adv->i2c_main, "cec",
                                                ADV7511_CEC_I2C_ADDR_DEFAULT);
-       if (!adv->i2c_cec)
-               return -EINVAL;
+       if (IS_ERR(adv->i2c_cec))
+               return PTR_ERR(adv->i2c_cec);
        i2c_set_clientdata(adv->i2c_cec, adv);
 
        adv->regmap_cec = devm_regmap_init_i2c(adv->i2c_cec,
@@ -1162,20 +1165,20 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 
        adv7511_packet_disable(adv7511, 0xffff);
 
-       adv7511->i2c_edid = i2c_new_secondary_device(i2c, "edid",
+       adv7511->i2c_edid = i2c_new_ancillary_device(i2c, "edid",
                                        ADV7511_EDID_I2C_ADDR_DEFAULT);
-       if (!adv7511->i2c_edid) {
-               ret = -EINVAL;
+       if (IS_ERR(adv7511->i2c_edid)) {
+               ret = PTR_ERR(adv7511->i2c_edid);
                goto uninit_regulators;
        }
 
        regmap_write(adv7511->regmap, ADV7511_REG_EDID_I2C_ADDR,
                     adv7511->i2c_edid->addr << 1);
 
-       adv7511->i2c_packet = i2c_new_secondary_device(i2c, "packet",
+       adv7511->i2c_packet = i2c_new_ancillary_device(i2c, "packet",
                                        ADV7511_PACKET_I2C_ADDR_DEFAULT);
-       if (!adv7511->i2c_packet) {
-               ret = -EINVAL;
+       if (IS_ERR(adv7511->i2c_packet)) {
+               ret = PTR_ERR(adv7511->i2c_packet);
                goto err_i2c_unregister_edid;
        }
 
@@ -1219,17 +1222,8 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
        drm_bridge_add(&adv7511->bridge);
 
        adv7511_audio_init(dev, adv7511);
-
-       if (adv7511->type == ADV7533) {
-               ret = adv7533_attach_dsi(adv7511);
-               if (ret)
-                       goto err_remove_bridge;
-       }
-
        return 0;
 
-err_remove_bridge:
-       drm_bridge_remove(&adv7511->bridge);
 err_unregister_cec:
        i2c_unregister_device(adv7511->i2c_cec);
        if (adv7511->cec_clk)
index cebc8e6..8a8d605 100644 (file)
@@ -728,6 +728,8 @@ static int tc_set_video_mode(struct tc_data *tc,
        int lower_margin = mode->vsync_start - mode->vdisplay;
        int vsync_len = mode->vsync_end - mode->vsync_start;
        u32 dp0_syncval;
+       u32 bits_per_pixel = 24;
+       u32 in_bw, out_bw;
 
        /*
         * Recommended maximum number of symbols transferred in a transfer unit:
@@ -735,7 +737,10 @@ static int tc_set_video_mode(struct tc_data *tc,
         *              (output active video bandwidth in bytes))
         * Must be less than tu_size.
         */
-       max_tu_symbol = TU_SIZE_RECOMMENDED - 1;
+
+       in_bw = mode->clock * bits_per_pixel / 8;
+       out_bw = tc->link.base.num_lanes * tc->link.base.rate;
+       max_tu_symbol = DIV_ROUND_UP(in_bw * TU_SIZE_RECOMMENDED, out_bw);
 
        dev_dbg(tc->dev, "set mode %dx%d\n",
                mode->hdisplay, mode->vdisplay);
index 419381a..14aeaf7 100644 (file)
@@ -430,10 +430,15 @@ static int drm_atomic_connector_check(struct drm_connector *connector,
                return -EINVAL;
        }
 
-       if (writeback_job->out_fence && !writeback_job->fb) {
-               DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] requesting out-fence without framebuffer\n",
-                                connector->base.id, connector->name);
-               return -EINVAL;
+       if (!writeback_job->fb) {
+               if (writeback_job->out_fence) {
+                       DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] requesting out-fence without framebuffer\n",
+                                        connector->base.id, connector->name);
+                       return -EINVAL;
+               }
+
+               drm_writeback_cleanup_job(writeback_job);
+               state->writeback_job = NULL;
        }
 
        return 0;
index aa16ea1..3ef2ac5 100644 (file)
@@ -26,6 +26,7 @@
  */
 
 #include <linux/dma-fence.h>
+#include <linux/ktime.h>
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
@@ -1580,9 +1581,23 @@ static void commit_tail(struct drm_atomic_state *old_state)
 {
        struct drm_device *dev = old_state->dev;
        const struct drm_mode_config_helper_funcs *funcs;
+       ktime_t start;
+       s64 commit_time_ms;
 
        funcs = dev->mode_config.helper_private;
 
+       /*
+        * We're measuring the _entire_ commit, so the time will vary depending
+        * on how many fences and objects are involved. For the purposes of self
+        * refresh, this is desirable since it'll give us an idea of how
+        * congested things are. This will inform our decision on how often we
+        * should enter self refresh after idle.
+        *
+        * These times will be averaged out in the self refresh helpers to avoid
+        * overreacting over one outlier frame
+        */
+       start = ktime_get();
+
        drm_atomic_helper_wait_for_fences(dev, old_state, false);
 
        drm_atomic_helper_wait_for_dependencies(old_state);
@@ -1592,6 +1607,11 @@ static void commit_tail(struct drm_atomic_state *old_state)
        else
                drm_atomic_helper_commit_tail(old_state);
 
+       commit_time_ms = ktime_ms_delta(ktime_get(), start);
+       if (commit_time_ms > 0)
+               drm_self_refresh_helper_update_avg_times(old_state,
+                                                (unsigned long)commit_time_ms);
+
        drm_atomic_helper_commit_cleanup_done(old_state);
 
        drm_atomic_state_put(old_state);
@@ -3275,7 +3295,7 @@ static int page_flip_common(struct drm_atomic_state *state,
                return PTR_ERR(crtc_state);
 
        crtc_state->event = event;
-       crtc_state->pageflip_flags = flags;
+       crtc_state->async_flip = flags & DRM_MODE_PAGE_FLIP_ASYNC;
 
        plane_state = drm_atomic_get_plane_state(state, plane);
        if (IS_ERR(plane_state))
index 46dc264..d0a937f 100644 (file)
@@ -128,7 +128,7 @@ void __drm_atomic_helper_crtc_duplicate_state(struct drm_crtc *crtc,
        state->zpos_changed = false;
        state->commit = NULL;
        state->event = NULL;
-       state->pageflip_flags = 0;
+       state->async_flip = false;
 
        /* Self refresh should be canceled when a new update is available */
        state->active = drm_atomic_crtc_effectively_active(state);
index 5a5b42d..7a26bfb 100644 (file)
@@ -1305,8 +1305,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
        if (arg->reserved)
                return -EINVAL;
 
-       if ((arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) &&
-                       !dev->mode_config.async_page_flip)
+       if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC)
                return -EINVAL;
 
        /* can't test and expect an event at the same time. */
index c456c3d..769feef 100644 (file)
@@ -976,14 +976,14 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags)
        if (ret)
                goto err_minors;
 
+       dev->registered = true;
+
        if (dev->driver->load) {
                ret = dev->driver->load(dev, flags);
                if (ret)
                        goto err_minors;
        }
 
-       dev->registered = true;
-
        if (drm_core_check_feature(dev, DRIVER_MODESET))
                drm_modeset_register_all(dev);
 
index 82a4cee..6b01771 100644 (file)
@@ -159,6 +159,9 @@ static const struct edid_quirk {
        /* Medion MD 30217 PG */
        { "MED", 0x7b8, EDID_QUIRK_PREFER_LARGE_75 },
 
+       /* Lenovo G50 */
+       { "SDC", 18514, EDID_QUIRK_FORCE_6BPC },
+
        /* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */
        { "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC },
 
index f675a3b..fcd728d 100644 (file)
@@ -336,7 +336,12 @@ drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv)
        case DRM_CLIENT_CAP_ATOMIC:
                if (!drm_core_check_feature(dev, DRIVER_ATOMIC))
                        return -EOPNOTSUPP;
-               if (req->value > 1)
+               /* The modesetting DDX has a totally broken idea of atomic. */
+               if (current->comm[0] == 'X' && req->value == 1) {
+                       pr_info("broken atomic modeset userspace detected, disabling atomic\n");
+                       return -EOPNOTSUPP;
+               }
+               if (req->value > 2)
                        return -EINVAL;
                file_priv->atomic = req->value;
                file_priv->universal_planes = req->value;
index c355ba8..6a23e36 100644 (file)
@@ -42,7 +42,7 @@ int __drm_mode_object_add(struct drm_device *dev, struct drm_mode_object *obj,
 {
        int ret;
 
-       WARN_ON(dev->registered && !obj_free_cb);
+       WARN_ON(!dev->driver->load && dev->registered && !obj_free_cb);
 
        mutex_lock(&dev->mode_config.idr_mutex);
        ret = idr_alloc(&dev->mode_config.object_idr, register_obj ? obj : NULL,
@@ -104,7 +104,7 @@ void drm_mode_object_register(struct drm_device *dev,
 void drm_mode_object_unregister(struct drm_device *dev,
                                struct drm_mode_object *object)
 {
-       WARN_ON(dev->registered && !object->free_cb);
+       WARN_ON(!dev->driver->load && dev->registered && !object->free_cb);
 
        mutex_lock(&dev->mode_config.idr_mutex);
        if (object->id) {
index 4b9424a..68f4765 100644 (file)
@@ -5,6 +5,7 @@
  * Authors:
  * Sean Paul <seanpaul@chromium.org>
  */
+#include <linux/average.h>
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
  * atomic_check when &drm_crtc_state.self_refresh_active is true.
  */
 
+#define SELF_REFRESH_AVG_SEED_MS 200
+
+DECLARE_EWMA(psr_time, 4, 4)
+
 struct drm_self_refresh_data {
        struct drm_crtc *crtc;
        struct delayed_work entry_work;
-       struct drm_atomic_state *save_state;
-       unsigned int entry_delay_ms;
+
+       struct mutex avg_mutex;
+       struct ewma_psr_time entry_avg_ms;
+       struct ewma_psr_time exit_avg_ms;
 };
 
 static void drm_self_refresh_helper_entry_work(struct work_struct *work)
@@ -122,6 +129,44 @@ out_drop_locks:
        drm_modeset_acquire_fini(&ctx);
 }
 
+/**
+ * drm_self_refresh_helper_update_avg_times - Updates a crtc's SR time averages
+ * @state: the state which has just been applied to hardware
+ * @commit_time_ms: the amount of time in ms that this commit took to complete
+ *
+ * Called after &drm_mode_config_funcs.atomic_commit_tail, this function will
+ * update the average entry/exit self refresh times on self refresh transitions.
+ * These averages will be used when calculating how long to delay before
+ * entering self refresh mode after activity.
+ */
+void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
+                                             unsigned int commit_time_ms)
+{
+       struct drm_crtc *crtc;
+       struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+       int i;
+
+       for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
+                                     new_crtc_state, i) {
+               struct drm_self_refresh_data *sr_data = crtc->self_refresh_data;
+               struct ewma_psr_time *time;
+
+               if (old_crtc_state->self_refresh_active ==
+                   new_crtc_state->self_refresh_active)
+                       continue;
+
+               if (new_crtc_state->self_refresh_active)
+                       time = &sr_data->entry_avg_ms;
+               else
+                       time = &sr_data->exit_avg_ms;
+
+               mutex_lock(&sr_data->avg_mutex);
+               ewma_psr_time_add(time, commit_time_ms);
+               mutex_unlock(&sr_data->avg_mutex);
+       }
+}
+EXPORT_SYMBOL(drm_self_refresh_helper_update_avg_times);
+
 /**
  * drm_self_refresh_helper_alter_state - Alters the atomic state for SR exit
  * @state: the state currently being checked
@@ -153,6 +198,7 @@ void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state)
 
        for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
                struct drm_self_refresh_data *sr_data;
+               unsigned int delay;
 
                /* Don't trigger the entry timer when we're already in SR */
                if (crtc_state->self_refresh_active)
@@ -162,8 +208,13 @@ void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state)
                if (!sr_data)
                        continue;
 
+               mutex_lock(&sr_data->avg_mutex);
+               delay = (ewma_psr_time_read(&sr_data->entry_avg_ms) +
+                        ewma_psr_time_read(&sr_data->exit_avg_ms)) * 2;
+               mutex_unlock(&sr_data->avg_mutex);
+
                mod_delayed_work(system_wq, &sr_data->entry_work,
-                                msecs_to_jiffies(sr_data->entry_delay_ms));
+                                msecs_to_jiffies(delay));
        }
 }
 EXPORT_SYMBOL(drm_self_refresh_helper_alter_state);
@@ -171,12 +222,10 @@ EXPORT_SYMBOL(drm_self_refresh_helper_alter_state);
 /**
  * drm_self_refresh_helper_init - Initializes self refresh helpers for a crtc
  * @crtc: the crtc which supports self refresh supported displays
- * @entry_delay_ms: amount of inactivity to wait before entering self refresh
  *
  * Returns zero if successful or -errno on failure
  */
-int drm_self_refresh_helper_init(struct drm_crtc *crtc,
-                                unsigned int entry_delay_ms)
+int drm_self_refresh_helper_init(struct drm_crtc *crtc)
 {
        struct drm_self_refresh_data *sr_data = crtc->self_refresh_data;
 
@@ -190,8 +239,18 @@ int drm_self_refresh_helper_init(struct drm_crtc *crtc,
 
        INIT_DELAYED_WORK(&sr_data->entry_work,
                          drm_self_refresh_helper_entry_work);
-       sr_data->entry_delay_ms = entry_delay_ms;
        sr_data->crtc = crtc;
+       mutex_init(&sr_data->avg_mutex);
+       ewma_psr_time_init(&sr_data->entry_avg_ms);
+       ewma_psr_time_init(&sr_data->exit_avg_ms);
+
+       /*
+        * Seed the averages so they're non-zero (and sufficiently large
+        * for even poorly performing panels). As time goes on, this will be
+        * averaged out and the values will trend to their true value.
+        */
+       ewma_psr_time_add(&sr_data->entry_avg_ms, SELF_REFRESH_AVG_SEED_MS);
+       ewma_psr_time_add(&sr_data->exit_avg_ms, SELF_REFRESH_AVG_SEED_MS);
 
        crtc->self_refresh_data = sr_data;
        return 0;
index ff138b6..43d9e3b 100644 (file)
@@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job)
        if (job->fb)
                drm_framebuffer_put(job->fb);
 
+       if (job->out_fence)
+               dma_fence_put(job->out_fence);
+
        kfree(job);
 }
 EXPORT_SYMBOL(drm_writeback_cleanup_job);
@@ -366,25 +369,29 @@ drm_writeback_signal_completion(struct drm_writeback_connector *wb_connector,
 {
        unsigned long flags;
        struct drm_writeback_job *job;
+       struct dma_fence *out_fence;
 
        spin_lock_irqsave(&wb_connector->job_lock, flags);
        job = list_first_entry_or_null(&wb_connector->job_queue,
                                       struct drm_writeback_job,
                                       list_entry);
-       if (job) {
+       if (job)
                list_del(&job->list_entry);
-               if (job->out_fence) {
-                       if (status)
-                               dma_fence_set_error(job->out_fence, status);
-                       dma_fence_signal(job->out_fence);
-                       dma_fence_put(job->out_fence);
-               }
-       }
+
        spin_unlock_irqrestore(&wb_connector->job_lock, flags);
 
        if (WARN_ON(!job))
                return;
 
+       out_fence = job->out_fence;
+       if (out_fence) {
+               if (status)
+                       dma_fence_set_error(out_fence, status);
+               dma_fence_signal(out_fence);
+               dma_fence_put(out_fence);
+               job->out_fence = NULL;
+       }
+
        INIT_WORK(&job->cleanup_work, cleanup_work);
        queue_work(system_long_wq, &job->cleanup_work);
 }
index 698db54..648cf02 100644 (file)
@@ -180,6 +180,8 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit)
                              etnaviv_cmdbuf_get_va(&submit->cmdbuf,
                                        &gpu->mmu_context->cmdbuf_mapping));
 
+       mutex_unlock(&gpu->mmu_context->lock);
+
        /* Reserve space for the bomap */
        if (n_bomap_pages) {
                bomap_start = bomap = iter.data;
@@ -221,8 +223,6 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit)
                                         obj->base.size);
        }
 
-       mutex_unlock(&gpu->mmu_context->lock);
-
        etnaviv_core_dump_header(&iter, ETDUMP_BUF_END, iter.data);
 
        dev_coredumpv(gpu->dev, iter.start, iter.data - iter.start, GFP_KERNEL);
index 043111a..f8bf488 100644 (file)
@@ -155,9 +155,11 @@ static void etnaviv_iommuv2_dump(struct etnaviv_iommu_context *context, void *bu
 
        memcpy(buf, v2_context->mtlb_cpu, SZ_4K);
        buf += SZ_4K;
-       for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++, buf += SZ_4K)
-               if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT)
+       for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++)
+               if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT) {
                        memcpy(buf, v2_context->stlb_cpu[i], SZ_4K);
+                       buf += SZ_4K;
+               }
 }
 
 static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu,
index 35ebae6..3607d34 100644 (file)
@@ -328,12 +328,23 @@ etnaviv_iommu_context_init(struct etnaviv_iommu_global *global,
 
        ret = etnaviv_cmdbuf_suballoc_map(suballoc, ctx, &ctx->cmdbuf_mapping,
                                          global->memory_base);
-       if (ret) {
-               global->ops->free(ctx);
-               return NULL;
+       if (ret)
+               goto out_free;
+
+       if (global->version == ETNAVIV_IOMMU_V1 &&
+           ctx->cmdbuf_mapping.iova > 0x80000000) {
+               dev_err(global->dev,
+                       "command buffer outside valid memory window\n");
+               goto out_unmap;
        }
 
        return ctx;
+
+out_unmap:
+       etnaviv_cmdbuf_suballoc_unmap(ctx, &ctx->cmdbuf_mapping);
+out_free:
+       global->ops->free(ctx);
+       return NULL;
 }
 
 void etnaviv_iommu_restore(struct etnaviv_gpu *gpu,
index efb39f3..3250c1b 100644 (file)
@@ -1270,7 +1270,7 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv,
                DRM_DEBUG_KMS("port %c trying to use the same DDC pin (0x%x) as port %c, "
                              "disabling port %c DVI/HDMI support\n",
                              port_name(port), info->alternate_ddc_pin,
-                             port_name(p), port_name(port));
+                             port_name(p), port_name(p));
 
                /*
                 * If we have multiple ports supposedly sharing the
@@ -1278,9 +1278,14 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv,
                 * port. Otherwise they share the same ddc bin and
                 * system couldn't communicate with them separately.
                 *
-                * Give child device order the priority, first come first
-                * served.
+                * Give inverse child device order the priority,
+                * last one wins. Yes, there are real machines
+                * (eg. Asrock B250M-HDV) where VBT has both
+                * port A and port E with the same AUX ch and
+                * we must pick port E :(
                 */
+               info = &dev_priv->vbt.ddi_port_info[p];
+
                info->supports_dvi = false;
                info->supports_hdmi = false;
                info->alternate_ddc_pin = 0;
@@ -1316,7 +1321,7 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv,
                DRM_DEBUG_KMS("port %c trying to use the same AUX CH (0x%x) as port %c, "
                              "disabling port %c DP support\n",
                              port_name(port), info->alternate_aux_channel,
-                             port_name(p), port_name(port));
+                             port_name(p), port_name(p));
 
                /*
                 * If we have multiple ports supposedlt sharing the
@@ -1324,9 +1329,14 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv,
                 * port. Otherwise they share the same aux channel
                 * and system couldn't communicate with them separately.
                 *
-                * Give child device order the priority, first come first
-                * served.
+                * Give inverse child device order the priority,
+                * last one wins. Yes, there are real machines
+                * (eg. Asrock B250M-HDV) where VBT has both
+                * port A and port E with the same AUX ch and
+                * we must pick port E :(
                 */
+               info = &dev_priv->vbt.ddi_port_info[p];
+
                info->supports_dp = false;
                info->alternate_aux_channel = 0;
        }
index b51d1ce..dfff6f4 100644 (file)
@@ -3280,7 +3280,20 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb,
        switch (fb->modifier) {
        case DRM_FORMAT_MOD_LINEAR:
        case I915_FORMAT_MOD_X_TILED:
-               return 4096;
+               /*
+                * Validated limit is 4k, but has 5k should
+                * work apart from the following features:
+                * - Ytile (already limited to 4k)
+                * - FP16 (already limited to 4k)
+                * - render compression (already limited to 4k)
+                * - KVMR sprite and cursor (don't care)
+                * - horizontal panning (TODO verify this)
+                * - pipe and plane scaling (TODO verify this)
+                */
+               if (cpp == 8)
+                       return 4096;
+               else
+                       return 5120;
        case I915_FORMAT_MOD_Y_TILED_CCS:
        case I915_FORMAT_MOD_Yf_TILED_CCS:
                /* FIXME AUX plane? */
@@ -7261,7 +7274,7 @@ retry:
        pipe_config->fdi_lanes = lane;
 
        intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock,
-                              link_bw, &pipe_config->fdi_m_n, false);
+                              link_bw, &pipe_config->fdi_m_n, false, false);
 
        ret = ironlake_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config);
        if (ret == -EDEADLK)
@@ -7508,11 +7521,15 @@ void
 intel_link_compute_m_n(u16 bits_per_pixel, int nlanes,
                       int pixel_clock, int link_clock,
                       struct intel_link_m_n *m_n,
-                      bool constant_n)
+                      bool constant_n, bool fec_enable)
 {
-       m_n->tu = 64;
+       u32 data_clock = bits_per_pixel * pixel_clock;
+
+       if (fec_enable)
+               data_clock = intel_dp_mode_to_fec_clock(data_clock);
 
-       compute_m_n(bits_per_pixel * pixel_clock,
+       m_n->tu = 64;
+       compute_m_n(data_clock,
                    link_clock * nlanes * 8,
                    &m_n->gmch_m, &m_n->gmch_n,
                    constant_n);
@@ -9298,7 +9315,6 @@ static bool wrpll_uses_pch_ssc(struct drm_i915_private *dev_priv,
 static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv)
 {
        struct intel_encoder *encoder;
-       bool pch_ssc_in_use = false;
        bool has_fdi = false;
 
        for_each_intel_encoder(&dev_priv->drm, encoder) {
@@ -9326,22 +9342,24 @@ static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv)
         * clock hierarchy. That would also allow us to do
         * clock bending finally.
         */
+       dev_priv->pch_ssc_use = 0;
+
        if (spll_uses_pch_ssc(dev_priv)) {
                DRM_DEBUG_KMS("SPLL using PCH SSC\n");
-               pch_ssc_in_use = true;
+               dev_priv->pch_ssc_use |= BIT(DPLL_ID_SPLL);
        }
 
        if (wrpll_uses_pch_ssc(dev_priv, DPLL_ID_WRPLL1)) {
                DRM_DEBUG_KMS("WRPLL1 using PCH SSC\n");
-               pch_ssc_in_use = true;
+               dev_priv->pch_ssc_use |= BIT(DPLL_ID_WRPLL1);
        }
 
        if (wrpll_uses_pch_ssc(dev_priv, DPLL_ID_WRPLL2)) {
                DRM_DEBUG_KMS("WRPLL2 using PCH SSC\n");
-               pch_ssc_in_use = true;
+               dev_priv->pch_ssc_use |= BIT(DPLL_ID_WRPLL2);
        }
 
-       if (pch_ssc_in_use)
+       if (dev_priv->pch_ssc_use)
                return;
 
        if (has_fdi) {
index e57e696..01fa87a 100644 (file)
@@ -414,7 +414,7 @@ enum phy_fia {
 void intel_link_compute_m_n(u16 bpp, int nlanes,
                            int pixel_clock, int link_clock,
                            struct intel_link_m_n *m_n,
-                           bool constant_n);
+                           bool constant_n, bool fec_enable);
 bool is_ccs_modifier(u64 modifier);
 void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv);
 u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv,
index 921ad0a..57e9f0b 100644 (file)
@@ -78,8 +78,8 @@
 #define DP_DSC_MAX_ENC_THROUGHPUT_0            340000
 #define DP_DSC_MAX_ENC_THROUGHPUT_1            400000
 
-/* DP DSC FEC Overhead factor = (100 - 2.4)/100 */
-#define DP_DSC_FEC_OVERHEAD_FACTOR             976
+/* DP DSC FEC Overhead factor = 1/(0.972261) */
+#define DP_DSC_FEC_OVERHEAD_FACTOR             972261
 
 /* Compliance test status bits  */
 #define INTEL_DP_RESOLUTION_SHIFT_MASK 0
@@ -494,6 +494,97 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp,
        return 0;
 }
 
+u32 intel_dp_mode_to_fec_clock(u32 mode_clock)
+{
+       return div_u64(mul_u32_u32(mode_clock, 1000000U),
+                      DP_DSC_FEC_OVERHEAD_FACTOR);
+}
+
+static u16 intel_dp_dsc_get_output_bpp(u32 link_clock, u32 lane_count,
+                                      u32 mode_clock, u32 mode_hdisplay)
+{
+       u32 bits_per_pixel, max_bpp_small_joiner_ram;
+       int i;
+
+       /*
+        * Available Link Bandwidth(Kbits/sec) = (NumberOfLanes)*
+        * (LinkSymbolClock)* 8 * (TimeSlotsPerMTP)
+        * for SST -> TimeSlotsPerMTP is 1,
+        * for MST -> TimeSlotsPerMTP has to be calculated
+        */
+       bits_per_pixel = (link_clock * lane_count * 8) /
+                        intel_dp_mode_to_fec_clock(mode_clock);
+       DRM_DEBUG_KMS("Max link bpp: %u\n", bits_per_pixel);
+
+       /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */
+       max_bpp_small_joiner_ram = DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER / mode_hdisplay;
+       DRM_DEBUG_KMS("Max small joiner bpp: %u\n", max_bpp_small_joiner_ram);
+
+       /*
+        * Greatest allowed DSC BPP = MIN (output BPP from available Link BW
+        * check, output bpp from small joiner RAM check)
+        */
+       bits_per_pixel = min(bits_per_pixel, max_bpp_small_joiner_ram);
+
+       /* Error out if the max bpp is less than smallest allowed valid bpp */
+       if (bits_per_pixel < valid_dsc_bpp[0]) {
+               DRM_DEBUG_KMS("Unsupported BPP %u, min %u\n",
+                             bits_per_pixel, valid_dsc_bpp[0]);
+               return 0;
+       }
+
+       /* Find the nearest match in the array of known BPPs from VESA */
+       for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) {
+               if (bits_per_pixel < valid_dsc_bpp[i + 1])
+                       break;
+       }
+       bits_per_pixel = valid_dsc_bpp[i];
+
+       /*
+        * Compressed BPP in U6.4 format so multiply by 16, for Gen 11,
+        * fractional part is 0
+        */
+       return bits_per_pixel << 4;
+}
+
+static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
+                                      int mode_clock, int mode_hdisplay)
+{
+       u8 min_slice_count, i;
+       int max_slice_width;
+
+       if (mode_clock <= DP_DSC_PEAK_PIXEL_RATE)
+               min_slice_count = DIV_ROUND_UP(mode_clock,
+                                              DP_DSC_MAX_ENC_THROUGHPUT_0);
+       else
+               min_slice_count = DIV_ROUND_UP(mode_clock,
+                                              DP_DSC_MAX_ENC_THROUGHPUT_1);
+
+       max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd);
+       if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) {
+               DRM_DEBUG_KMS("Unsupported slice width %d by DP DSC Sink device\n",
+                             max_slice_width);
+               return 0;
+       }
+       /* Also take into account max slice width */
+       min_slice_count = min_t(u8, min_slice_count,
+                               DIV_ROUND_UP(mode_hdisplay,
+                                            max_slice_width));
+
+       /* Find the closest match to the valid slice count values */
+       for (i = 0; i < ARRAY_SIZE(valid_dsc_slicecount); i++) {
+               if (valid_dsc_slicecount[i] >
+                   drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd,
+                                                   false))
+                       break;
+               if (min_slice_count  <= valid_dsc_slicecount[i])
+                       return valid_dsc_slicecount[i];
+       }
+
+       DRM_DEBUG_KMS("Unsupported Slice Count %d\n", min_slice_count);
+       return 0;
+}
+
 static enum drm_mode_status
 intel_dp_mode_valid(struct drm_connector *connector,
                    struct drm_display_mode *mode)
@@ -2226,7 +2317,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
                               adjusted_mode->crtc_clock,
                               pipe_config->port_clock,
                               &pipe_config->dp_m_n,
-                              constant_n);
+                              constant_n, pipe_config->fec_enable);
 
        if (intel_connector->panel.downclock_mode != NULL &&
                dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) {
@@ -2236,7 +2327,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
                                               intel_connector->panel.downclock_mode->clock,
                                               pipe_config->port_clock,
                                               &pipe_config->dp_m2_n2,
-                                              constant_n);
+                                              constant_n, pipe_config->fec_enable);
        }
 
        if (!HAS_DDI(dev_priv))
@@ -4323,91 +4414,6 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector)
                DP_DPRX_ESI_LEN;
 }
 
-u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count,
-                               int mode_clock, int mode_hdisplay)
-{
-       u16 bits_per_pixel, max_bpp_small_joiner_ram;
-       int i;
-
-       /*
-        * Available Link Bandwidth(Kbits/sec) = (NumberOfLanes)*
-        * (LinkSymbolClock)* 8 * ((100-FECOverhead)/100)*(TimeSlotsPerMTP)
-        * FECOverhead = 2.4%, for SST -> TimeSlotsPerMTP is 1,
-        * for MST -> TimeSlotsPerMTP has to be calculated
-        */
-       bits_per_pixel = (link_clock * lane_count * 8 *
-                         DP_DSC_FEC_OVERHEAD_FACTOR) /
-               mode_clock;
-
-       /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */
-       max_bpp_small_joiner_ram = DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER /
-               mode_hdisplay;
-
-       /*
-        * Greatest allowed DSC BPP = MIN (output BPP from avaialble Link BW
-        * check, output bpp from small joiner RAM check)
-        */
-       bits_per_pixel = min(bits_per_pixel, max_bpp_small_joiner_ram);
-
-       /* Error out if the max bpp is less than smallest allowed valid bpp */
-       if (bits_per_pixel < valid_dsc_bpp[0]) {
-               DRM_DEBUG_KMS("Unsupported BPP %d\n", bits_per_pixel);
-               return 0;
-       }
-
-       /* Find the nearest match in the array of known BPPs from VESA */
-       for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) {
-               if (bits_per_pixel < valid_dsc_bpp[i + 1])
-                       break;
-       }
-       bits_per_pixel = valid_dsc_bpp[i];
-
-       /*
-        * Compressed BPP in U6.4 format so multiply by 16, for Gen 11,
-        * fractional part is 0
-        */
-       return bits_per_pixel << 4;
-}
-
-u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
-                               int mode_clock,
-                               int mode_hdisplay)
-{
-       u8 min_slice_count, i;
-       int max_slice_width;
-
-       if (mode_clock <= DP_DSC_PEAK_PIXEL_RATE)
-               min_slice_count = DIV_ROUND_UP(mode_clock,
-                                              DP_DSC_MAX_ENC_THROUGHPUT_0);
-       else
-               min_slice_count = DIV_ROUND_UP(mode_clock,
-                                              DP_DSC_MAX_ENC_THROUGHPUT_1);
-
-       max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd);
-       if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) {
-               DRM_DEBUG_KMS("Unsupported slice width %d by DP DSC Sink device\n",
-                             max_slice_width);
-               return 0;
-       }
-       /* Also take into account max slice width */
-       min_slice_count = min_t(u8, min_slice_count,
-                               DIV_ROUND_UP(mode_hdisplay,
-                                            max_slice_width));
-
-       /* Find the closest match to the valid slice count values */
-       for (i = 0; i < ARRAY_SIZE(valid_dsc_slicecount); i++) {
-               if (valid_dsc_slicecount[i] >
-                   drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd,
-                                                   false))
-                       break;
-               if (min_slice_count  <= valid_dsc_slicecount[i])
-                       return valid_dsc_slicecount[i];
-       }
-
-       DRM_DEBUG_KMS("Unsupported Slice Count %d\n", min_slice_count);
-       return 0;
-}
-
 static void
 intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp,
                               const struct intel_crtc_state *crtc_state)
index 657bbb1..00981fb 100644 (file)
@@ -102,10 +102,6 @@ bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp);
 bool intel_dp_source_supports_hbr3(struct intel_dp *intel_dp);
 bool
 intel_dp_get_link_status(struct intel_dp *intel_dp, u8 *link_status);
-u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count,
-                               int mode_clock, int mode_hdisplay);
-u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, int mode_clock,
-                               int mode_hdisplay);
 
 bool intel_dp_read_dpcd(struct intel_dp *intel_dp);
 bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp);
@@ -118,4 +114,6 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
        return ~((1 << lane_count) - 1) & 0xf;
 }
 
+u32 intel_dp_mode_to_fec_clock(u32 mode_clock);
+
 #endif /* __INTEL_DP_H__ */
index 6df240a..600873c 100644 (file)
@@ -81,7 +81,7 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
                               adjusted_mode->crtc_clock,
                               crtc_state->port_clock,
                               &crtc_state->dp_m_n,
-                              constant_n);
+                              constant_n, crtc_state->fec_enable);
        crtc_state->dp_m_n.tu = slots;
 
        return 0;
@@ -615,7 +615,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum
        intel_encoder->type = INTEL_OUTPUT_DP_MST;
        intel_encoder->power_domain = intel_dig_port->base.power_domain;
        intel_encoder->port = intel_dig_port->base.port;
-       intel_encoder->crtc_mask = BIT(pipe);
+       intel_encoder->crtc_mask = 0x7;
        intel_encoder->cloneable = 0;
 
        intel_encoder->compute_config = intel_dp_mst_compute_config;
index b8148f8..d5a298c 100644 (file)
@@ -525,16 +525,31 @@ static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv,
        val = I915_READ(WRPLL_CTL(id));
        I915_WRITE(WRPLL_CTL(id), val & ~WRPLL_PLL_ENABLE);
        POSTING_READ(WRPLL_CTL(id));
+
+       /*
+        * Try to set up the PCH reference clock once all DPLLs
+        * that depend on it have been shut down.
+        */
+       if (dev_priv->pch_ssc_use & BIT(id))
+               intel_init_pch_refclk(dev_priv);
 }
 
 static void hsw_ddi_spll_disable(struct drm_i915_private *dev_priv,
                                 struct intel_shared_dpll *pll)
 {
+       enum intel_dpll_id id = pll->info->id;
        u32 val;
 
        val = I915_READ(SPLL_CTL);
        I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE);
        POSTING_READ(SPLL_CTL);
+
+       /*
+        * Try to set up the PCH reference clock once all DPLLs
+        * that depend on it have been shut down.
+        */
+       if (dev_priv->pch_ssc_use & BIT(id))
+               intel_init_pch_refclk(dev_priv);
 }
 
 static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv,
index e758879..104cf6d 100644 (file)
@@ -147,11 +147,11 @@ enum intel_dpll_id {
         */
        DPLL_ID_ICL_MGPLL4 = 6,
        /**
-        * @DPLL_ID_TGL_TCPLL5: TGL TC PLL port 5 (TC5)
+        * @DPLL_ID_TGL_MGPLL5: TGL TC PLL port 5 (TC5)
         */
        DPLL_ID_TGL_MGPLL5 = 7,
        /**
-        * @DPLL_ID_TGL_TCPLL6: TGL TC PLL port 6 (TC6)
+        * @DPLL_ID_TGL_MGPLL6: TGL TC PLL port 6 (TC6)
         */
        DPLL_ID_TGL_MGPLL6 = 8,
 };
index dea63be..cae25e4 100644 (file)
@@ -1528,6 +1528,7 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state,
        int src_x, src_w, src_h, crtc_w, crtc_h;
        const struct drm_display_mode *adjusted_mode =
                &crtc_state->base.adjusted_mode;
+       unsigned int stride = plane_state->color_plane[0].stride;
        unsigned int cpp = fb->format->cpp[0];
        unsigned int width_bytes;
        int min_width, min_height;
@@ -1569,9 +1570,9 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state,
                return -EINVAL;
        }
 
-       if (width_bytes > 4096 || fb->pitches[0] > 4096) {
+       if (stride > 4096) {
                DRM_DEBUG_KMS("Stride (%u) exceeds hardware max with scaling (%u)\n",
-                             fb->pitches[0], 4096);
+                             stride, 4096);
                return -EINVAL;
        }
 
index 261c9bd..05289ed 100644 (file)
@@ -245,11 +245,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 
        wakeref = intel_runtime_pm_get(rpm);
 
-       srcu = intel_gt_reset_trylock(ggtt->vm.gt);
-       if (srcu < 0) {
-               ret = srcu;
+       ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+       if (ret)
                goto err_rpm;
-       }
 
        ret = i915_mutex_lock_interruptible(dev);
        if (ret)
@@ -318,7 +316,11 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
                intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
                                   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
 
-       i915_vma_set_ggtt_write(vma);
+       if (write) {
+               GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+               i915_vma_set_ggtt_write(vma);
+               obj->mm.dirty = true;
+       }
 
 err_fence:
        i915_vma_unpin_fence(vma);
@@ -362,6 +364,7 @@ err:
                return VM_FAULT_OOM;
        case -ENOSPC:
        case -EFAULT:
+       case -ENODEV: /* bad object, how did you get here! */
                return VM_FAULT_SIGBUS;
        default:
                WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
@@ -473,10 +476,16 @@ i915_gem_mmap_gtt(struct drm_file *file,
        if (!obj)
                return -ENOENT;
 
+       if (i915_gem_object_never_bind_ggtt(obj)) {
+               ret = -ENODEV;
+               goto out;
+       }
+
        ret = create_mmap_offset(obj);
        if (ret == 0)
                *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
 
+out:
        i915_gem_object_put(obj);
        return ret;
 }
index 5efb993..ddf3605 100644 (file)
@@ -152,6 +152,12 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
        return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY;
 }
 
+static inline bool
+i915_gem_object_never_bind_ggtt(const struct drm_i915_gem_object *obj)
+{
+       return obj->ops->flags & I915_GEM_OBJECT_NO_GGTT;
+}
+
 static inline bool
 i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
 {
index ede0eb4..646859f 100644 (file)
@@ -32,7 +32,8 @@ struct drm_i915_gem_object_ops {
 #define I915_GEM_OBJECT_HAS_STRUCT_PAGE        BIT(0)
 #define I915_GEM_OBJECT_IS_SHRINKABLE  BIT(1)
 #define I915_GEM_OBJECT_IS_PROXY       BIT(2)
-#define I915_GEM_OBJECT_ASYNC_CANCEL   BIT(3)
+#define I915_GEM_OBJECT_NO_GGTT                BIT(3)
+#define I915_GEM_OBJECT_ASYNC_CANCEL   BIT(4)
 
        /* Interface between the GEM object and its backing storage.
         * get_pages() is called once prior to the use of the associated set
index 92e53c2..ad2a63d 100644 (file)
@@ -241,9 +241,6 @@ void i915_gem_resume(struct drm_i915_private *i915)
        mutex_lock(&i915->drm.struct_mutex);
        intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
 
-       i915_gem_restore_gtt_mappings(i915);
-       i915_gem_restore_fences(i915);
-
        if (i915_gem_init_hw(i915))
                goto err_wedged;
 
index 11b231c..6b3b50f 100644 (file)
@@ -702,6 +702,7 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
 static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
        .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
                 I915_GEM_OBJECT_IS_SHRINKABLE |
+                I915_GEM_OBJECT_NO_GGTT |
                 I915_GEM_OBJECT_ASYNC_CANCEL,
        .get_pages = i915_gem_userptr_get_pages,
        .put_pages = i915_gem_userptr_put_pages,
index d3c6993..22aab85 100644 (file)
@@ -136,6 +136,20 @@ execlists_active(const struct intel_engine_execlists *execlists)
        return READ_ONCE(*execlists->active);
 }
 
+static inline void
+execlists_active_lock_bh(struct intel_engine_execlists *execlists)
+{
+       local_bh_disable(); /* prevent local softirq and lock recursion */
+       tasklet_lock(&execlists->tasklet);
+}
+
+static inline void
+execlists_active_unlock_bh(struct intel_engine_execlists *execlists)
+{
+       tasklet_unlock(&execlists->tasklet);
+       local_bh_enable(); /* restore softirq, and kick ksoftirqd! */
+}
+
 struct i915_request *
 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
 
index 82630db..4ce8626 100644 (file)
@@ -1197,9 +1197,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
                                         struct drm_printer *m)
 {
        struct drm_i915_private *dev_priv = engine->i915;
-       const struct intel_engine_execlists * const execlists =
-               &engine->execlists;
-       unsigned long flags;
+       struct intel_engine_execlists * const execlists = &engine->execlists;
        u64 addr;
 
        if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
@@ -1281,7 +1279,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
                                   idx, hws[idx * 2], hws[idx * 2 + 1]);
                }
 
-               spin_lock_irqsave(&engine->active.lock, flags);
+               execlists_active_lock_bh(execlists);
                for (port = execlists->active; (rq = *port); port++) {
                        char hdr[80];
                        int len;
@@ -1309,7 +1307,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
                                 hwsp_seqno(rq));
                        print_request(m, rq, hdr);
                }
-               spin_unlock_irqrestore(&engine->active.lock, flags);
+               execlists_active_unlock_bh(execlists);
        } else if (INTEL_GEN(dev_priv) > 6) {
                drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
                           ENGINE_READ(engine, RING_PP_DIR_BASE));
@@ -1440,8 +1438,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
        if (!intel_engine_supports_stats(engine))
                return -ENODEV;
 
-       spin_lock_irqsave(&engine->active.lock, flags);
-       write_seqlock(&engine->stats.lock);
+       execlists_active_lock_bh(execlists);
+       write_seqlock_irqsave(&engine->stats.lock, flags);
 
        if (unlikely(engine->stats.enabled == ~0)) {
                err = -EBUSY;
@@ -1469,8 +1467,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
        }
 
 unlock:
-       write_sequnlock(&engine->stats.lock);
-       spin_unlock_irqrestore(&engine->active.lock, flags);
+       write_sequnlock_irqrestore(&engine->stats.lock, flags);
+       execlists_active_unlock_bh(execlists);
 
        return err;
 }
index d425844..06a506c 100644 (file)
@@ -234,6 +234,13 @@ static void execlists_init_reg_state(u32 *reg_state,
                                     struct intel_engine_cs *engine,
                                     struct intel_ring *ring);
 
+static void mark_eio(struct i915_request *rq)
+{
+       if (!i915_request_signaled(rq))
+               dma_fence_set_error(&rq->fence, -EIO);
+       i915_request_mark_complete(rq);
+}
+
 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 {
        return (i915_ggtt_offset(engine->status_page.vma) +
@@ -631,7 +638,6 @@ execlists_schedule_out(struct i915_request *rq)
        struct intel_engine_cs *cur, *old;
 
        trace_i915_request_out(rq);
-       GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
 
        old = READ_ONCE(ce->inflight);
        do
@@ -797,6 +803,17 @@ static bool can_merge_rq(const struct i915_request *prev,
        GEM_BUG_ON(prev == next);
        GEM_BUG_ON(!assert_priority_queue(prev, next));
 
+       /*
+        * We do not submit known completed requests. Therefore if the next
+        * request is already completed, we can pretend to merge it in
+        * with the previous context (and we will skip updating the ELSP
+        * and tracking). Thus hopefully keeping the ELSP full with active
+        * contexts, despite the best efforts of preempt-to-busy to confuse
+        * us.
+        */
+       if (i915_request_completed(next))
+               return true;
+
        if (!can_merge_ctx(prev->hw_context, next->hw_context))
                return false;
 
@@ -893,7 +910,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
 static struct i915_request *
 last_active(const struct intel_engine_execlists *execlists)
 {
-       struct i915_request * const *last = execlists->active;
+       struct i915_request * const *last = READ_ONCE(execlists->active);
 
        while (*last && i915_request_completed(*last))
                last++;
@@ -1172,21 +1189,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                continue;
                        }
 
-                       if (i915_request_completed(rq)) {
-                               ve->request = NULL;
-                               ve->base.execlists.queue_priority_hint = INT_MIN;
-                               rb_erase_cached(rb, &execlists->virtual);
-                               RB_CLEAR_NODE(rb);
-
-                               rq->engine = engine;
-                               __i915_request_submit(rq);
-
-                               spin_unlock(&ve->base.active.lock);
-
-                               rb = rb_first_cached(&execlists->virtual);
-                               continue;
-                       }
-
                        if (last && !can_merge_rq(last, rq)) {
                                spin_unlock(&ve->base.active.lock);
                                return; /* leave this for another */
@@ -1237,11 +1239,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                GEM_BUG_ON(ve->siblings[0] != engine);
                        }
 
-                       __i915_request_submit(rq);
-                       if (!i915_request_completed(rq)) {
+                       if (__i915_request_submit(rq)) {
                                submit = true;
                                last = rq;
                        }
+                       i915_request_put(rq);
+
+                       /*
+                        * Hmm, we have a bunch of virtual engine requests,
+                        * but the first one was already completed (thanks
+                        * preempt-to-busy!). Keep looking at the veng queue
+                        * until we have no more relevant requests (i.e.
+                        * the normal submit queue has higher priority).
+                        */
+                       if (!submit) {
+                               spin_unlock(&ve->base.active.lock);
+                               rb = rb_first_cached(&execlists->virtual);
+                               continue;
+                       }
                }
 
                spin_unlock(&ve->base.active.lock);
@@ -1254,8 +1269,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                int i;
 
                priolist_for_each_request_consume(rq, rn, p, i) {
-                       if (i915_request_completed(rq))
-                               goto skip;
+                       bool merge = true;
 
                        /*
                         * Can we combine this request with the current port?
@@ -1296,14 +1310,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                    ctx_single_port_submission(rq->hw_context))
                                        goto done;
 
-                               *port = execlists_schedule_in(last, port - execlists->pending);
-                               port++;
+                               merge = false;
                        }
 
-                       last = rq;
-                       submit = true;
-skip:
-                       __i915_request_submit(rq);
+                       if (__i915_request_submit(rq)) {
+                               if (!merge) {
+                                       *port = execlists_schedule_in(last, port - execlists->pending);
+                                       port++;
+                                       last = NULL;
+                               }
+
+                               GEM_BUG_ON(last &&
+                                          !can_merge_ctx(last->hw_context,
+                                                         rq->hw_context));
+
+                               submit = true;
+                               last = rq;
+                       }
                }
 
                rb_erase_cached(&p->node, &execlists->queue);
@@ -1593,8 +1616,11 @@ static void process_csb(struct intel_engine_cs *engine)
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
        lockdep_assert_held(&engine->active.lock);
-       if (!engine->execlists.pending[0])
+       if (!engine->execlists.pending[0]) {
+               rcu_read_lock(); /* protect peeking at execlists->active */
                execlists_dequeue(engine);
+               rcu_read_unlock();
+       }
 }
 
 /*
@@ -2399,10 +2425,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 
 static struct i915_request *active_request(struct i915_request *rq)
 {
-       const struct list_head * const list = &rq->timeline->requests;
        const struct intel_context * const ce = rq->hw_context;
        struct i915_request *active = NULL;
+       struct list_head *list;
 
+       if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
+               return rq;
+
+       list = &rq->timeline->requests;
        list_for_each_entry_from_reverse(rq, list, link) {
                if (i915_request_completed(rq))
                        break;
@@ -2552,12 +2582,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
        __execlists_reset(engine, true);
 
        /* Mark all executing requests as skipped. */
-       list_for_each_entry(rq, &engine->active.requests, sched.link) {
-               if (!i915_request_signaled(rq))
-                       dma_fence_set_error(&rq->fence, -EIO);
-
-               i915_request_mark_complete(rq);
-       }
+       list_for_each_entry(rq, &engine->active.requests, sched.link)
+               mark_eio(rq);
 
        /* Flush the queued requests to the timeline list (for retiring). */
        while ((rb = rb_first_cached(&execlists->queue))) {
@@ -2565,10 +2591,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
                int i;
 
                priolist_for_each_request_consume(rq, rn, p, i) {
-                       list_del_init(&rq->sched.link);
+                       mark_eio(rq);
                        __i915_request_submit(rq);
-                       dma_fence_set_error(&rq->fence, -EIO);
-                       i915_request_mark_complete(rq);
                }
 
                rb_erase_cached(&p->node, &execlists->queue);
@@ -2584,13 +2608,15 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
                RB_CLEAR_NODE(rb);
 
                spin_lock(&ve->base.active.lock);
-               if (ve->request) {
-                       ve->request->engine = engine;
-                       __i915_request_submit(ve->request);
-                       dma_fence_set_error(&ve->request->fence, -EIO);
-                       i915_request_mark_complete(ve->request);
+               rq = fetch_and_zero(&ve->request);
+               if (rq) {
+                       mark_eio(rq);
+
+                       rq->engine = engine;
+                       __i915_request_submit(rq);
+                       i915_request_put(rq);
+
                        ve->base.execlists.queue_priority_hint = INT_MIN;
-                       ve->request = NULL;
                }
                spin_unlock(&ve->base.active.lock);
        }
@@ -3594,6 +3620,8 @@ submit_engine:
 static void virtual_submit_request(struct i915_request *rq)
 {
        struct virtual_engine *ve = to_virtual_engine(rq->engine);
+       struct i915_request *old;
+       unsigned long flags;
 
        GEM_TRACE("%s: rq=%llx:%lld\n",
                  ve->base.name,
@@ -3602,15 +3630,31 @@ static void virtual_submit_request(struct i915_request *rq)
 
        GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
 
-       GEM_BUG_ON(ve->request);
-       GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+       spin_lock_irqsave(&ve->base.active.lock, flags);
+
+       old = ve->request;
+       if (old) { /* background completion event from preempt-to-busy */
+               GEM_BUG_ON(!i915_request_completed(old));
+               __i915_request_submit(old);
+               i915_request_put(old);
+       }
+
+       if (i915_request_completed(rq)) {
+               __i915_request_submit(rq);
+
+               ve->base.execlists.queue_priority_hint = INT_MIN;
+               ve->request = NULL;
+       } else {
+               ve->base.execlists.queue_priority_hint = rq_prio(rq);
+               ve->request = i915_request_get(rq);
 
-       ve->base.execlists.queue_priority_hint = rq_prio(rq);
-       WRITE_ONCE(ve->request, rq);
+               GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+               list_move_tail(&rq->sched.link, virtual_queue(ve));
 
-       list_move_tail(&rq->sched.link, virtual_queue(ve));
+               tasklet_schedule(&ve->base.execlists.tasklet);
+       }
 
-       tasklet_schedule(&ve->base.execlists.tasklet);
+       spin_unlock_irqrestore(&ve->base.active.lock, flags);
 }
 
 static struct ve_bond *
@@ -3631,18 +3675,22 @@ static void
 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
 {
        struct virtual_engine *ve = to_virtual_engine(rq->engine);
+       intel_engine_mask_t allowed, exec;
        struct ve_bond *bond;
 
+       allowed = ~to_request(signal)->engine->mask;
+
        bond = virtual_find_bond(ve, to_request(signal)->engine);
-       if (bond) {
-               intel_engine_mask_t old, new, cmp;
+       if (bond)
+               allowed &= bond->sibling_mask;
 
-               cmp = READ_ONCE(rq->execution_mask);
-               do {
-                       old = cmp;
-                       new = cmp & bond->sibling_mask;
-               } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
-       }
+       /* Restrict the bonded request to run on only the available engines */
+       exec = READ_ONCE(rq->execution_mask);
+       while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
+               ;
+
+       /* Prevent the master from being re-run on the bonded engines */
+       to_request(signal)->execution_mask &= ~allowed;
 }
 
 struct intel_context *
index b9d84d5..8cea423 100644 (file)
@@ -42,11 +42,10 @@ static void engine_skip_context(struct i915_request *rq)
        struct intel_engine_cs *engine = rq->engine;
        struct i915_gem_context *hung_ctx = rq->gem_context;
 
-       lockdep_assert_held(&engine->active.lock);
-
        if (!i915_request_is_active(rq))
                return;
 
+       lockdep_assert_held(&engine->active.lock);
        list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
                if (rq->gem_context == hung_ctx)
                        i915_request_skip(rq, -EIO);
@@ -123,7 +122,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
                  rq->fence.seqno,
                  yesno(guilty));
 
-       lockdep_assert_held(&rq->engine->active.lock);
        GEM_BUG_ON(i915_request_completed(rq));
 
        if (guilty) {
@@ -1214,10 +1212,8 @@ out:
        intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
 }
 
-int intel_gt_reset_trylock(struct intel_gt *gt)
+int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
 {
-       int srcu;
-
        might_lock(&gt->reset.backoff_srcu);
        might_sleep();
 
@@ -1232,10 +1228,10 @@ int intel_gt_reset_trylock(struct intel_gt *gt)
 
                rcu_read_lock();
        }
-       srcu = srcu_read_lock(&gt->reset.backoff_srcu);
+       *srcu = srcu_read_lock(&gt->reset.backoff_srcu);
        rcu_read_unlock();
 
-       return srcu;
+       return 0;
 }
 
 void intel_gt_reset_unlock(struct intel_gt *gt, int tag)
index 37a987b..52c0019 100644 (file)
@@ -38,7 +38,7 @@ int intel_engine_reset(struct intel_engine_cs *engine,
 
 void __i915_request_reset(struct i915_request *rq, bool guilty);
 
-int __must_check intel_gt_reset_trylock(struct intel_gt *gt);
+int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu);
 void intel_gt_reset_unlock(struct intel_gt *gt, int tag);
 
 void intel_gt_set_wedged(struct intel_gt *gt);
index 601c162..bacaa7b 100644 (file)
@@ -1573,7 +1573,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
        struct intel_engine_cs *engine = rq->engine;
        enum intel_engine_id id;
        const int num_engines =
-               IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
+               IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
        bool force_restore = false;
        int len;
        u32 *cs;
index 45481eb..5f6ec2f 100644 (file)
@@ -1063,6 +1063,9 @@ static void gen9_whitelist_build(struct i915_wa_list *w)
 
        /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
        whitelist_reg(w, GEN8_HDC_CHICKEN1);
+
+       /* WaSendPushConstantsFromMMIO:skl,bxt */
+       whitelist_reg(w, COMMON_SLICE_CHICKEN2);
 }
 
 static void skl_whitelist_build(struct intel_engine_cs *engine)
index 0206967..bb6f86c 100644 (file)
@@ -1924,6 +1924,11 @@ static int i915_drm_resume(struct drm_device *dev)
        if (ret)
                DRM_ERROR("failed to re-enable GGTT\n");
 
+       mutex_lock(&dev_priv->drm.struct_mutex);
+       i915_gem_restore_gtt_mappings(dev_priv);
+       i915_gem_restore_fences(dev_priv);
+       mutex_unlock(&dev_priv->drm.struct_mutex);
+
        intel_csr_ucode_resume(dev_priv);
 
        i915_restore_state(dev_priv);
index 772154e..953e1d1 100644 (file)
@@ -1723,6 +1723,8 @@ struct drm_i915_private {
                struct work_struct idle_work;
        } gem;
 
+       u8 pch_ssc_use;
+
        /* For i945gm vblank irq vs. C3 workaround */
        struct {
                struct work_struct work;
index 95e7c52..d0f94f2 100644 (file)
@@ -969,6 +969,9 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 
        lockdep_assert_held(&obj->base.dev->struct_mutex);
 
+       if (i915_gem_object_never_bind_ggtt(obj))
+               return ERR_PTR(-ENODEV);
+
        if (flags & PIN_MAPPABLE &&
            (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
                /* If the required space is larger than the available
index 167a7b5..6795f1d 100644 (file)
@@ -77,6 +77,12 @@ struct drm_i915_private;
 
 #define I915_GEM_IDLE_TIMEOUT (HZ / 5)
 
+static inline void tasklet_lock(struct tasklet_struct *t)
+{
+       while (!tasklet_trylock(t))
+               cpu_relax();
+}
+
 static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
 {
        if (!atomic_fetch_inc(&t->count))
index a53777d..1c55068 100644 (file)
@@ -194,6 +194,27 @@ static void free_capture_list(struct i915_request *request)
        }
 }
 
+static void remove_from_engine(struct i915_request *rq)
+{
+       struct intel_engine_cs *engine, *locked;
+
+       /*
+        * Virtual engines complicate acquiring the engine timeline lock,
+        * as their rq->engine pointer is not stable until under that
+        * engine lock. The simple ploy we use is to take the lock then
+        * check that the rq still belongs to the newly locked engine.
+        */
+       locked = READ_ONCE(rq->engine);
+       spin_lock(&locked->active.lock);
+       while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
+               spin_unlock(&locked->active.lock);
+               spin_lock(&engine->active.lock);
+               locked = engine;
+       }
+       list_del(&rq->sched.link);
+       spin_unlock(&locked->active.lock);
+}
+
 static bool i915_request_retire(struct i915_request *rq)
 {
        struct i915_active_request *active, *next;
@@ -259,9 +280,7 @@ static bool i915_request_retire(struct i915_request *rq)
         * request that we have removed from the HW and put back on a run
         * queue.
         */
-       spin_lock(&rq->engine->active.lock);
-       list_del(&rq->sched.link);
-       spin_unlock(&rq->engine->active.lock);
+       remove_from_engine(rq);
 
        spin_lock(&rq->lock);
        i915_request_mark_complete(rq);
@@ -358,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq,
        return 0;
 }
 
-void __i915_request_submit(struct i915_request *request)
+bool __i915_request_submit(struct i915_request *request)
 {
        struct intel_engine_cs *engine = request->engine;
+       bool result = false;
 
        GEM_TRACE("%s fence %llx:%lld, current %d\n",
                  engine->name,
@@ -370,6 +390,25 @@ void __i915_request_submit(struct i915_request *request)
        GEM_BUG_ON(!irqs_disabled());
        lockdep_assert_held(&engine->active.lock);
 
+       /*
+        * With the advent of preempt-to-busy, we frequently encounter
+        * requests that we have unsubmitted from HW, but left running
+        * until the next ack and so have completed in the meantime. On
+        * resubmission of that completed request, we can skip
+        * updating the payload, and execlists can even skip submitting
+        * the request.
+        *
+        * We must remove the request from the caller's priority queue,
+        * and the caller must only call us when the request is in their
+        * priority queue, under the active.lock. This ensures that the
+        * request has *not* yet been retired and we can safely move
+        * the request into the engine->active.list where it will be
+        * dropped upon retiring. (Otherwise if resubmit a *retired*
+        * request, this would be a horrible use-after-free.)
+        */
+       if (i915_request_completed(request))
+               goto xfer;
+
        if (i915_gem_context_is_banned(request->gem_context))
                i915_request_skip(request, -EIO);
 
@@ -393,13 +432,18 @@ void __i915_request_submit(struct i915_request *request)
            i915_sw_fence_signaled(&request->semaphore))
                engine->saturated |= request->sched.semaphores;
 
-       /* We may be recursing from the signal callback of another i915 fence */
-       spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+       engine->emit_fini_breadcrumb(request,
+                                    request->ring->vaddr + request->postfix);
 
-       list_move_tail(&request->sched.link, &engine->active.requests);
+       trace_i915_request_execute(request);
+       engine->serial++;
+       result = true;
+
+xfer:  /* We may be recursing from the signal callback of another i915 fence */
+       spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
-       GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
-       set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
+       if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
+               list_move_tail(&request->sched.link, &engine->active.requests);
 
        if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
            !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
@@ -410,12 +454,7 @@ void __i915_request_submit(struct i915_request *request)
 
        spin_unlock(&request->lock);
 
-       engine->emit_fini_breadcrumb(request,
-                                    request->ring->vaddr + request->postfix);
-
-       engine->serial++;
-
-       trace_i915_request_execute(request);
+       return result;
 }
 
 void i915_request_submit(struct i915_request *request)
index 8ac6e12..e4dd013 100644 (file)
@@ -292,7 +292,7 @@ int i915_request_await_execution(struct i915_request *rq,
 
 void i915_request_add(struct i915_request *rq);
 
-void __i915_request_submit(struct i915_request *request);
+bool __i915_request_submit(struct i915_request *request);
 void i915_request_submit(struct i915_request *request);
 
 void i915_request_skip(struct i915_request *request, int error);
index fa864d8..15f8bff 100644 (file)
@@ -69,6 +69,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
                WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv));
                return PCH_CNP;
        case INTEL_PCH_CMP_DEVICE_ID_TYPE:
+       case INTEL_PCH_CMP2_DEVICE_ID_TYPE:
                DRM_DEBUG_KMS("Found Comet Lake PCH (CMP)\n");
                WARN_ON(!IS_COFFEELAKE(dev_priv));
                /* CometPoint is CNP Compatible */
index e6a2d65..c29c81e 100644 (file)
@@ -41,6 +41,7 @@ enum intel_pch {
 #define INTEL_PCH_CNP_DEVICE_ID_TYPE           0xA300
 #define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE                0x9D80
 #define INTEL_PCH_CMP_DEVICE_ID_TYPE           0x0280
+#define INTEL_PCH_CMP2_DEVICE_ID_TYPE          0x0680
 #define INTEL_PCH_ICP_DEVICE_ID_TYPE           0x3480
 #define INTEL_PCH_MCC_DEVICE_ID_TYPE           0x4B00
 #define INTEL_PCH_MCC2_DEVICE_ID_TYPE          0x3880
index bb6dd54..3759383 100644 (file)
@@ -118,6 +118,12 @@ static void pm_resume(struct drm_i915_private *i915)
        with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
                intel_gt_sanitize(&i915->gt, false);
                i915_gem_sanitize(i915);
+
+               mutex_lock(&i915->drm.struct_mutex);
+               i915_gem_restore_gtt_mappings(i915);
+               i915_gem_restore_fences(i915);
+               mutex_unlock(&i915->drm.struct_mutex);
+
                i915_gem_resume(i915);
        }
 }
index 663ff9f..1e7b1be 100644 (file)
@@ -26,6 +26,8 @@
 #include "dsi_cfg.h"
 #include "msm_kms.h"
 
+#define DSI_RESET_TOGGLE_DELAY_MS 20
+
 static int dsi_get_version(const void __iomem *base, u32 *major, u32 *minor)
 {
        u32 ver;
@@ -986,7 +988,7 @@ static void dsi_sw_reset(struct msm_dsi_host *msm_host)
        wmb(); /* clocks need to be enabled before reset */
 
        dsi_write(msm_host, REG_DSI_RESET, 1);
-       wmb(); /* make sure reset happen */
+       msleep(DSI_RESET_TOGGLE_DELAY_MS); /* make sure reset happen */
        dsi_write(msm_host, REG_DSI_RESET, 0);
 }
 
@@ -1396,7 +1398,7 @@ static void dsi_sw_reset_restore(struct msm_dsi_host *msm_host)
 
        /* dsi controller can only be reset while clocks are running */
        dsi_write(msm_host, REG_DSI_RESET, 1);
-       wmb();  /* make sure reset happen */
+       msleep(DSI_RESET_TOGGLE_DELAY_MS); /* make sure reset happen */
        dsi_write(msm_host, REG_DSI_RESET, 0);
        wmb();  /* controller out of reset */
        dsi_write(msm_host, REG_DSI_CTRL, data0);
index 2db0293..5193b62 100644 (file)
@@ -267,7 +267,7 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, bool modeset,
                        asyw->image.pitch[0] = fb->base.pitches[0];
                }
 
-               if (!(asyh->state.pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC))
+               if (!asyh->state.async_flip)
                        asyw->image.interval = 1;
                else
                        asyw->image.interval = 0;
@@ -383,7 +383,7 @@ nv50_wndw_atomic_check_lut(struct nv50_wndw *wndw,
        }
 
        /* Can't do an immediate flip while changing the LUT. */
-       asyh->state.pageflip_flags &= ~DRM_MODE_PAGE_FLIP_ASYNC;
+       asyh->state.async_flip = false;
 }
 
 static int
index e226324..4bdd63b 100644 (file)
@@ -1083,7 +1083,7 @@ static const struct dss_features omap34xx_dss_feats = {
 
 static const struct dss_features omap3630_dss_feats = {
        .model                  =       DSS_MODEL_OMAP3,
-       .fck_div_max            =       32,
+       .fck_div_max            =       31,
        .fck_freq_max           =       173000000,
        .dss_fck_multiplier     =       1,
        .parent_clk_name        =       "dpll4_ck",
index fc82a52..ee43797 100644 (file)
@@ -220,9 +220,17 @@ static const struct of_device_id lb035q02_of_match[] = {
 
 MODULE_DEVICE_TABLE(of, lb035q02_of_match);
 
+static const struct spi_device_id lb035q02_ids[] = {
+       { "lb035q02", 0 },
+       { /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(spi, lb035q02_ids);
+
 static struct spi_driver lb035q02_driver = {
        .probe          = lb035q02_probe,
        .remove         = lb035q02_remove,
+       .id_table       = lb035q02_ids,
        .driver         = {
                .name   = "panel-lg-lb035q02",
                .of_match_table = lb035q02_of_match,
@@ -231,7 +239,6 @@ static struct spi_driver lb035q02_driver = {
 
 module_spi_driver(lb035q02_driver);
 
-MODULE_ALIAS("spi:lgphilips,lb035q02");
 MODULE_AUTHOR("Tomi Valkeinen <tomi.valkeinen@ti.com>");
 MODULE_DESCRIPTION("LG.Philips LB035Q02 LCD Panel driver");
 MODULE_LICENSE("GPL");
index 299b217..20f17e4 100644 (file)
@@ -230,9 +230,17 @@ static const struct of_device_id nl8048_of_match[] = {
 
 MODULE_DEVICE_TABLE(of, nl8048_of_match);
 
+static const struct spi_device_id nl8048_ids[] = {
+       { "nl8048hl11", 0 },
+       { /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(spi, nl8048_ids);
+
 static struct spi_driver nl8048_driver = {
        .probe          = nl8048_probe,
        .remove         = nl8048_remove,
+       .id_table       = nl8048_ids,
        .driver         = {
                .name   = "panel-nec-nl8048hl11",
                .pm     = &nl8048_pm_ops,
@@ -242,7 +250,6 @@ static struct spi_driver nl8048_driver = {
 
 module_spi_driver(nl8048_driver);
 
-MODULE_ALIAS("spi:nec,nl8048hl11");
 MODULE_AUTHOR("Erik Gilling <konkers@android.com>");
 MODULE_DESCRIPTION("NEC-NL8048HL11 Driver");
 MODULE_LICENSE("GPL");
index 305259b..3d5b9c4 100644 (file)
@@ -684,9 +684,17 @@ static const struct of_device_id acx565akm_of_match[] = {
 
 MODULE_DEVICE_TABLE(of, acx565akm_of_match);
 
+static const struct spi_device_id acx565akm_ids[] = {
+       { "acx565akm", 0 },
+       { /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(spi, acx565akm_ids);
+
 static struct spi_driver acx565akm_driver = {
        .probe          = acx565akm_probe,
        .remove         = acx565akm_remove,
+       .id_table       = acx565akm_ids,
        .driver         = {
                .name   = "panel-sony-acx565akm",
                .of_match_table = acx565akm_of_match,
@@ -695,7 +703,6 @@ static struct spi_driver acx565akm_driver = {
 
 module_spi_driver(acx565akm_driver);
 
-MODULE_ALIAS("spi:sony,acx565akm");
 MODULE_AUTHOR("Nokia Corporation");
 MODULE_DESCRIPTION("Sony ACX565AKM LCD Panel Driver");
 MODULE_LICENSE("GPL");
index d7b2e34..f2baff8 100644 (file)
@@ -375,8 +375,7 @@ static const struct of_device_id td028ttec1_of_match[] = {
 MODULE_DEVICE_TABLE(of, td028ttec1_of_match);
 
 static const struct spi_device_id td028ttec1_ids[] = {
-       { "tpo,td028ttec1", 0},
-       { "toppoly,td028ttec1", 0 },
+       { "td028ttec1", 0 },
        { /* sentinel */ }
 };
 
index 8437056..ba163c7 100644 (file)
@@ -491,9 +491,17 @@ static const struct of_device_id td043mtea1_of_match[] = {
 
 MODULE_DEVICE_TABLE(of, td043mtea1_of_match);
 
+static const struct spi_device_id td043mtea1_ids[] = {
+       { "td043mtea1", 0 },
+       { /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(spi, td043mtea1_ids);
+
 static struct spi_driver td043mtea1_driver = {
        .probe          = td043mtea1_probe,
        .remove         = td043mtea1_remove,
+       .id_table       = td043mtea1_ids,
        .driver         = {
                .name   = "panel-tpo-td043mtea1",
                .pm     = &td043mtea1_pm_ops,
@@ -503,7 +511,6 @@ static struct spi_driver td043mtea1_driver = {
 
 module_spi_driver(td043mtea1_driver);
 
-MODULE_ALIAS("spi:tpo,td043mtea1");
 MODULE_AUTHOR("Gražvydas Ignotas <notasas@gmail.com>");
 MODULE_DESCRIPTION("TPO TD043MTEA1 Panel Driver");
 MODULE_LICENSE("GPL");
index a1f5fa6..12ff77d 100644 (file)
@@ -39,7 +39,7 @@ static int panfrost_devfreq_target(struct device *dev, unsigned long *freq,
         * If frequency scaling from low to high, adjust voltage first.
         * If frequency scaling from high to low, adjust frequency first.
         */
-       if (old_clk_rate < target_rate && pfdev->regulator) {
+       if (old_clk_rate < target_rate) {
                err = regulator_set_voltage(pfdev->regulator, target_volt,
                                            target_volt);
                if (err) {
@@ -53,14 +53,12 @@ static int panfrost_devfreq_target(struct device *dev, unsigned long *freq,
        if (err) {
                dev_err(dev, "Cannot set frequency %lu (%d)\n", target_rate,
                        err);
-               if (pfdev->regulator)
-                       regulator_set_voltage(pfdev->regulator,
-                                             pfdev->devfreq.cur_volt,
-                                             pfdev->devfreq.cur_volt);
+               regulator_set_voltage(pfdev->regulator, pfdev->devfreq.cur_volt,
+                                     pfdev->devfreq.cur_volt);
                return err;
        }
 
-       if (old_clk_rate > target_rate && pfdev->regulator) {
+       if (old_clk_rate > target_rate) {
                err = regulator_set_voltage(pfdev->regulator, target_volt,
                                            target_volt);
                if (err)
index 46b0b02..238fb6d 100644 (file)
@@ -89,12 +89,9 @@ static int panfrost_regulator_init(struct panfrost_device *pfdev)
 {
        int ret;
 
-       pfdev->regulator = devm_regulator_get_optional(pfdev->dev, "mali");
+       pfdev->regulator = devm_regulator_get(pfdev->dev, "mali");
        if (IS_ERR(pfdev->regulator)) {
                ret = PTR_ERR(pfdev->regulator);
-               pfdev->regulator = NULL;
-               if (ret == -ENODEV)
-                       return 0;
                dev_err(pfdev->dev, "failed to get regulator: %d\n", ret);
                return ret;
        }
@@ -110,8 +107,7 @@ static int panfrost_regulator_init(struct panfrost_device *pfdev)
 
 static void panfrost_regulator_fini(struct panfrost_device *pfdev)
 {
-       if (pfdev->regulator)
-               regulator_disable(pfdev->regulator);
+       regulator_disable(pfdev->regulator);
 }
 
 int panfrost_device_init(struct panfrost_device *pfdev)
index bc2ddeb..f21bc8a 100644 (file)
@@ -556,11 +556,11 @@ static int panfrost_probe(struct platform_device *pdev)
        return 0;
 
 err_out2:
+       pm_runtime_disable(pfdev->dev);
        panfrost_devfreq_fini(pfdev);
 err_out1:
        panfrost_device_fini(pfdev);
 err_out0:
-       pm_runtime_disable(pfdev->dev);
        drm_dev_put(ddev);
        return err;
 }
index f67ed92..8822ec1 100644 (file)
@@ -208,6 +208,9 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev)
        pfdev->features.mem_features = gpu_read(pfdev, GPU_MEM_FEATURES);
        pfdev->features.mmu_features = gpu_read(pfdev, GPU_MMU_FEATURES);
        pfdev->features.thread_features = gpu_read(pfdev, GPU_THREAD_FEATURES);
+       pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS);
+       pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE);
+       pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE);
        pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES);
        for (i = 0; i < 4; i++)
                pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i));
index a585516..21f34d4 100644 (file)
@@ -381,13 +381,19 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
                job_read(pfdev, JS_TAIL_LO(js)),
                sched_job);
 
-       mutex_lock(&pfdev->reset_lock);
+       if (!mutex_trylock(&pfdev->reset_lock))
+               return;
 
-       for (i = 0; i < NUM_JOB_SLOTS; i++)
-               drm_sched_stop(&pfdev->js->queue[i].sched, sched_job);
+       for (i = 0; i < NUM_JOB_SLOTS; i++) {
+               struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
+
+               drm_sched_stop(sched, sched_job);
+               if (js != i)
+                       /* Ensure any timeouts on other slots have finished */
+                       cancel_delayed_work_sync(&sched->work_tdr);
+       }
 
-       if (sched_job)
-               drm_sched_increase_karma(sched_job);
+       drm_sched_increase_karma(sched_job);
 
        spin_lock_irqsave(&pfdev->js->job_lock, flags);
        for (i = 0; i < NUM_JOB_SLOTS; i++) {
index 6010f9e..a3ed64a 100644 (file)
@@ -224,9 +224,9 @@ static size_t get_pgsize(u64 addr, size_t size)
        return SZ_2M;
 }
 
-void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
-                             struct panfrost_mmu *mmu,
-                             u64 iova, size_t size)
+static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
+                                    struct panfrost_mmu *mmu,
+                                    u64 iova, size_t size)
 {
        if (mmu->as < 0)
                return;
@@ -394,58 +394,70 @@ void panfrost_mmu_pgtable_free(struct panfrost_file_priv *priv)
        free_io_pgtable_ops(mmu->pgtbl_ops);
 }
 
-static struct drm_mm_node *addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr)
+static struct panfrost_gem_object *
+addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr)
 {
-       struct drm_mm_node *node = NULL;
+       struct panfrost_gem_object *bo = NULL;
+       struct panfrost_file_priv *priv;
+       struct drm_mm_node *node;
        u64 offset = addr >> PAGE_SHIFT;
        struct panfrost_mmu *mmu;
 
        spin_lock(&pfdev->as_lock);
        list_for_each_entry(mmu, &pfdev->as_lru_list, list) {
-               struct panfrost_file_priv *priv;
-               if (as != mmu->as)
-                       continue;
+               if (as == mmu->as)
+                       goto found_mmu;
+       }
+       goto out;
+
+found_mmu:
+       priv = container_of(mmu, struct panfrost_file_priv, mmu);
+
+       spin_lock(&priv->mm_lock);
 
-               priv = container_of(mmu, struct panfrost_file_priv, mmu);
-               drm_mm_for_each_node(node, &priv->mm) {
-                       if (offset >= node->start && offset < (node->start + node->size))
-                               goto out;
+       drm_mm_for_each_node(node, &priv->mm) {
+               if (offset >= node->start &&
+                   offset < (node->start + node->size)) {
+                       bo = drm_mm_node_to_panfrost_bo(node);
+                       drm_gem_object_get(&bo->base.base);
+                       break;
                }
        }
 
+       spin_unlock(&priv->mm_lock);
 out:
        spin_unlock(&pfdev->as_lock);
-       return node;
+       return bo;
 }
 
 #define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE)
 
-int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr)
+static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
+                                      u64 addr)
 {
        int ret, i;
-       struct drm_mm_node *node;
        struct panfrost_gem_object *bo;
        struct address_space *mapping;
        pgoff_t page_offset;
        struct sg_table *sgt;
        struct page **pages;
 
-       node = addr_to_drm_mm_node(pfdev, as, addr);
-       if (!node)
+       bo = addr_to_drm_mm_node(pfdev, as, addr);
+       if (!bo)
                return -ENOENT;
 
-       bo = drm_mm_node_to_panfrost_bo(node);
        if (!bo->is_heap) {
                dev_WARN(pfdev->dev, "matching BO is not heap type (GPU VA = %llx)",
-                        node->start << PAGE_SHIFT);
-               return -EINVAL;
+                        bo->node.start << PAGE_SHIFT);
+               ret = -EINVAL;
+               goto err_bo;
        }
        WARN_ON(bo->mmu->as != as);
 
        /* Assume 2MB alignment and size multiple */
        addr &= ~((u64)SZ_2M - 1);
        page_offset = addr >> PAGE_SHIFT;
-       page_offset -= node->start;
+       page_offset -= bo->node.start;
 
        mutex_lock(&bo->base.pages_lock);
 
@@ -454,7 +466,8 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr)
                                     sizeof(struct sg_table), GFP_KERNEL | __GFP_ZERO);
                if (!bo->sgts) {
                        mutex_unlock(&bo->base.pages_lock);
-                       return -ENOMEM;
+                       ret = -ENOMEM;
+                       goto err_bo;
                }
 
                pages = kvmalloc_array(bo->base.base.size >> PAGE_SHIFT,
@@ -463,7 +476,8 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr)
                        kfree(bo->sgts);
                        bo->sgts = NULL;
                        mutex_unlock(&bo->base.pages_lock);
-                       return -ENOMEM;
+                       ret = -ENOMEM;
+                       goto err_bo;
                }
                bo->base.pages = pages;
                bo->base.pages_use_count = 1;
@@ -501,12 +515,16 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr)
 
        dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", as, addr);
 
+       drm_gem_object_put_unlocked(&bo->base.base);
+
        return 0;
 
 err_map:
        sg_free_table(sgt);
 err_pages:
        drm_gem_shmem_put_pages(&bo->base);
+err_bo:
+       drm_gem_object_put_unlocked(&bo->base.base);
        return ret;
 }
 
index 83c57d3..2dba192 100644 (file)
@@ -16,6 +16,7 @@
 #include "panfrost_issues.h"
 #include "panfrost_job.h"
 #include "panfrost_mmu.h"
+#include "panfrost_perfcnt.h"
 #include "panfrost_regs.h"
 
 #define COUNTERS_PER_BLOCK             64
index 431e6b6..4528f4d 100644 (file)
@@ -324,8 +324,39 @@ bool radeon_device_is_virtual(void);
 static int radeon_pci_probe(struct pci_dev *pdev,
                            const struct pci_device_id *ent)
 {
+       unsigned long flags = 0;
        int ret;
 
+       if (!ent)
+               return -ENODEV; /* Avoid NULL-ptr deref in drm_get_pci_dev */
+
+       flags = ent->driver_data;
+
+       if (!radeon_si_support) {
+               switch (flags & RADEON_FAMILY_MASK) {
+               case CHIP_TAHITI:
+               case CHIP_PITCAIRN:
+               case CHIP_VERDE:
+               case CHIP_OLAND:
+               case CHIP_HAINAN:
+                       dev_info(&pdev->dev,
+                                "SI support disabled by module param\n");
+                       return -ENODEV;
+               }
+       }
+       if (!radeon_cik_support) {
+               switch (flags & RADEON_FAMILY_MASK) {
+               case CHIP_KAVERI:
+               case CHIP_BONAIRE:
+               case CHIP_HAWAII:
+               case CHIP_KABINI:
+               case CHIP_MULLINS:
+                       dev_info(&pdev->dev,
+                                "CIK support disabled by module param\n");
+                       return -ENODEV;
+               }
+       }
+
        if (vga_switcheroo_client_probe_defer(pdev))
                return -EPROBE_DEFER;
 
@@ -348,7 +379,9 @@ radeon_pci_remove(struct pci_dev *pdev)
 static void
 radeon_pci_shutdown(struct pci_dev *pdev)
 {
+#ifdef CONFIG_PPC64
        struct drm_device *ddev = pci_get_drvdata(pdev);
+#endif
 
        /* if we are running in a VM, make sure the device
         * torn down properly on reboot/shutdown
@@ -356,11 +389,15 @@ radeon_pci_shutdown(struct pci_dev *pdev)
        if (radeon_device_is_virtual())
                radeon_pci_remove(pdev);
 
+#ifdef CONFIG_PPC64
        /* Some adapters need to be suspended before a
-       * shutdown occurs in order to prevent an error
-       * during kexec.
-       */
+        * shutdown occurs in order to prevent an error
+        * during kexec.
+        * Make this power specific becauase it breaks
+        * some non-power boards.
+        */
        radeon_suspend_kms(ddev, true, true, false);
+#endif
 }
 
 static int radeon_pmops_suspend(struct device *dev)
index 4cf58db..b2b0766 100644 (file)
@@ -296,6 +296,8 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
        uint32_t handle;
        int r;
 
+       args->addr = untagged_addr(args->addr);
+
        if (offset_in_page(args->addr | args->size))
                return -EINVAL;
 
index 07f7ace..e85c554 100644 (file)
@@ -100,31 +100,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
        struct radeon_device *rdev;
        int r, acpi_status;
 
-       if (!radeon_si_support) {
-               switch (flags & RADEON_FAMILY_MASK) {
-               case CHIP_TAHITI:
-               case CHIP_PITCAIRN:
-               case CHIP_VERDE:
-               case CHIP_OLAND:
-               case CHIP_HAINAN:
-                       dev_info(dev->dev,
-                                "SI support disabled by module param\n");
-                       return -ENODEV;
-               }
-       }
-       if (!radeon_cik_support) {
-               switch (flags & RADEON_FAMILY_MASK) {
-               case CHIP_KAVERI:
-               case CHIP_BONAIRE:
-               case CHIP_HAWAII:
-               case CHIP_KABINI:
-               case CHIP_MULLINS:
-                       dev_info(dev->dev,
-                                "CIK support disabled by module param\n");
-                       return -ENODEV;
-               }
-       }
-
        rdev = kzalloc(sizeof(struct radeon_device), GFP_KERNEL);
        if (rdev == NULL) {
                return -ENOMEM;
index ae07290..04efa78 100644 (file)
@@ -147,7 +147,7 @@ static int rcar_du_wb_enc_atomic_check(struct drm_encoder *encoder,
        struct drm_device *dev = encoder->dev;
        struct drm_framebuffer *fb;
 
-       if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
+       if (!conn_state->writeback_job)
                return 0;
 
        fb = conn_state->writeback_job->fb;
@@ -221,7 +221,7 @@ void rcar_du_writeback_setup(struct rcar_du_crtc *rcrtc,
        unsigned int i;
 
        state = rcrtc->writeback.base.state;
-       if (!state || !state->writeback_job || !state->writeback_job->fb)
+       if (!state || !state->writeback_job)
                return;
 
        fb = state->writeback_job->fb;
index 2f821c5..613404f 100644 (file)
@@ -39,8 +39,6 @@
 #include "rockchip_drm_vop.h"
 #include "rockchip_rgb.h"
 
-#define VOP_SELF_REFRESH_ENTRY_DELAY_MS 100
-
 #define VOP_WIN_SET(vop, win, name, v) \
                vop_reg_set(vop, &win->phy->name, win->base, ~0, v, #name)
 #define VOP_SCL_SET(vop, win, name, v) \
@@ -1563,8 +1561,7 @@ static int vop_create_crtc(struct vop *vop)
        init_completion(&vop->line_flag_completion);
        crtc->port = port;
 
-       ret = drm_self_refresh_helper_init(crtc,
-                                          VOP_SELF_REFRESH_ENTRY_DELAY_MS);
+       ret = drm_self_refresh_helper_init(crtc);
        if (ret)
                DRM_DEV_DEBUG_KMS(vop->dev,
                        "Failed to init %s with SR helpers %d, ignoring\n",
index 9a0ee74..f39b97e 100644 (file)
@@ -479,6 +479,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
        struct drm_sched_job *s_job, *tmp;
        uint64_t guilty_context;
        bool found_guilty = false;
+       struct dma_fence *fence;
 
        list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
                struct drm_sched_fence *s_fence = s_job->s_fence;
@@ -492,7 +493,16 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
                        dma_fence_set_error(&s_fence->finished, -ECANCELED);
 
                dma_fence_put(s_job->s_fence->parent);
-               s_job->s_fence->parent = sched->ops->run_job(s_job);
+               fence = sched->ops->run_job(s_job);
+
+               if (IS_ERR_OR_NULL(fence)) {
+                       s_job->s_fence->parent = NULL;
+                       dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
+               } else {
+                       s_job->s_fence->parent = fence;
+               }
+
+
        }
 }
 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
@@ -720,7 +730,7 @@ static int drm_sched_main(void *param)
                fence = sched->ops->run_job(sched_job);
                drm_sched_fence_scheduled(s_fence);
 
-               if (fence) {
+               if (!IS_ERR_OR_NULL(fence)) {
                        s_fence->parent = dma_fence_get(fence);
                        r = dma_fence_add_callback(fence, &sched_job->cb,
                                                   drm_sched_process_job);
@@ -730,8 +740,11 @@ static int drm_sched_main(void *param)
                                DRM_ERROR("fence add callback failed (%d)\n",
                                          r);
                        dma_fence_put(fence);
-               } else
+               } else {
+
+                       dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
                        drm_sched_process_job(NULL, &sched_job->cb);
+               }
 
                wake_up(&sched->job_scheduled);
        }
index 525dc1c..530edb3 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/gpio.h>
 #include <linux/mod_devicetable.h>
 #include <linux/of_gpio.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 
 #include <drm/drm_atomic_helper.h>
index 5047634..a46ac28 100644 (file)
@@ -63,7 +63,6 @@ config TINYDRM_REPAPER
        depends on DRM && SPI
        select DRM_KMS_HELPER
        select DRM_KMS_CMA_HELPER
-       depends on THERMAL || !THERMAL
        help
          DRM driver for the following Pervasive Displays panels:
          1.44" TFT EPD Panel (E1144CS021)
index 20ff56f..9881946 100644 (file)
@@ -185,8 +185,9 @@ static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
        list_add_tail(&bo->lru, &man->lru[bo->priority]);
        kref_get(&bo->list_kref);
 
-       if (bo->ttm && !(bo->ttm->page_flags &
-                        (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) {
+       if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm &&
+           !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
+                                    TTM_PAGE_FLAG_SWAPPED))) {
                list_add_tail(&bo->swap, &bdev->glob->swap_lru[bo->priority]);
                kref_get(&bo->list_kref);
        }
@@ -878,11 +879,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 
        if (!bo) {
                if (busy_bo)
-                       ttm_bo_get(busy_bo);
+                       kref_get(&busy_bo->list_kref);
                spin_unlock(&glob->lru_lock);
                ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
                if (busy_bo)
-                       ttm_bo_put(busy_bo);
+                       kref_put(&busy_bo->list_kref, ttm_bo_release_list);
                return ret;
        }
 
index 76eedb9..46dc3de 100644 (file)
@@ -278,15 +278,13 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
                else
                        ret = vmf_insert_pfn(&cvma, address, pfn);
 
-               /*
-                * Somebody beat us to this PTE or prefaulting to
-                * an already populated PTE, or prefaulting error.
-                */
-
-               if (unlikely((ret == VM_FAULT_NOPAGE && i > 0)))
-                       break;
-               else if (unlikely(ret & VM_FAULT_ERROR))
-                       goto out_io_unlock;
+               /* Never error on prefaulted PTEs */
+               if (unlikely((ret & VM_FAULT_ERROR))) {
+                       if (i == 0)
+                               goto out_io_unlock;
+                       else
+                               break;
+               }
 
                address += PAGE_SIZE;
                if (unlikely(++page_offset >= page_last))
index 5d80507..19c092d 100644 (file)
@@ -557,13 +557,16 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 
        if (args->bcl_start != args->bcl_end) {
                bin = kcalloc(1, sizeof(*bin), GFP_KERNEL);
-               if (!bin)
+               if (!bin) {
+                       v3d_job_put(&render->base);
                        return -ENOMEM;
+               }
 
                ret = v3d_job_init(v3d, file_priv, &bin->base,
                                   v3d_job_free, args->in_sync_bcl);
                if (ret) {
                        v3d_job_put(&render->base);
+                       kfree(bin);
                        return ret;
                }
 
index 1ce4d71..bf72020 100644 (file)
@@ -231,7 +231,7 @@ static int vc4_txp_connector_atomic_check(struct drm_connector *conn,
        int i;
 
        conn_state = drm_atomic_get_new_connector_state(state, conn);
-       if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
+       if (!conn_state->writeback_job)
                return 0;
 
        crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc);
@@ -271,8 +271,7 @@ static void vc4_txp_connector_atomic_commit(struct drm_connector *conn,
        u32 ctrl;
        int i;
 
-       if (WARN_ON(!conn_state->writeback_job ||
-                   !conn_state->writeback_job->fb))
+       if (WARN_ON(!conn_state->writeback_job))
                return;
 
        mode = &conn_state->crtc->state->adjusted_mode;
index feaa538..3db000a 100644 (file)
@@ -174,7 +174,6 @@ via_map_blit_for_device(struct pci_dev *pdev,
 static void
 via_free_sg_info(struct pci_dev *pdev, drm_via_sg_info_t *vsg)
 {
-       struct page *page;
        int i;
 
        switch (vsg->state) {
@@ -189,13 +188,8 @@ via_free_sg_info(struct pci_dev *pdev, drm_via_sg_info_t *vsg)
                kfree(vsg->desc_pages);
                /* fall through */
        case dr_via_pages_locked:
-               for (i = 0; i < vsg->num_pages; ++i) {
-                       if (NULL != (page = vsg->pages[i])) {
-                               if (!PageReserved(page) && (DMA_FROM_DEVICE == vsg->direction))
-                                       SetPageDirty(page);
-                               put_page(page);
-                       }
-               }
+               put_user_pages_dirty_lock(vsg->pages, vsg->num_pages,
+                                         (vsg->direction == DMA_FROM_DEVICE));
                /* fall through */
        case dr_via_pages_alloc:
                vfree(vsg->pages);
index ba1828a..4be49c1 100644 (file)
@@ -718,17 +718,9 @@ static int xen_drv_probe(struct xenbus_device *xb_dev,
        struct device *dev = &xb_dev->dev;
        int ret;
 
-       /*
-        * The device is not spawn from a device tree, so arch_setup_dma_ops
-        * is not called, thus leaving the device with dummy DMA ops.
-        * This makes the device return error on PRIME buffer import, which
-        * is not correct: to fix this call of_dma_configure() with a NULL
-        * node to set default DMA ops.
-        */
-       dev->coherent_dma_mask = DMA_BIT_MASK(32);
-       ret = of_dma_configure(dev, NULL, true);
+       ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64));
        if (ret < 0) {
-               DRM_ERROR("Cannot setup DMA ops, ret %d", ret);
+               DRM_ERROR("Cannot setup DMA mask, ret %d", ret);
                return ret;
        }
 
index 6654c15..fbe4e16 100644 (file)
@@ -63,13 +63,20 @@ static int axff_init(struct hid_device *hid)
 {
        struct axff_device *axff;
        struct hid_report *report;
-       struct hid_input *hidinput = list_first_entry(&hid->inputs, struct hid_input, list);
+       struct hid_input *hidinput;
        struct list_head *report_list =&hid->report_enum[HID_OUTPUT_REPORT].report_list;
-       struct input_dev *dev = hidinput->input;
+       struct input_dev *dev;
        int field_count = 0;
        int i, j;
        int error;
 
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_first_entry(&hid->inputs, struct hid_input, list);
+       dev = hidinput->input;
+
        if (list_empty(report_list)) {
                hid_err(hid, "no output reports found\n");
                return -ENODEV;
index 3eaee2c..e0b241b 100644 (file)
@@ -211,6 +211,18 @@ static unsigned hid_lookup_collection(struct hid_parser *parser, unsigned type)
        return 0; /* we know nothing about this usage type */
 }
 
+/*
+ * Concatenate usage which defines 16 bits or less with the
+ * currently defined usage page to form a 32 bit usage
+ */
+
+static void complete_usage(struct hid_parser *parser, unsigned int index)
+{
+       parser->local.usage[index] &= 0xFFFF;
+       parser->local.usage[index] |=
+               (parser->global.usage_page & 0xFFFF) << 16;
+}
+
 /*
  * Add a usage to the temporary parser table.
  */
@@ -222,6 +234,14 @@ static int hid_add_usage(struct hid_parser *parser, unsigned usage, u8 size)
                return -1;
        }
        parser->local.usage[parser->local.usage_index] = usage;
+
+       /*
+        * If Usage item only includes usage id, concatenate it with
+        * currently defined usage page
+        */
+       if (size <= 2)
+               complete_usage(parser, parser->local.usage_index);
+
        parser->local.usage_size[parser->local.usage_index] = size;
        parser->local.collection_index[parser->local.usage_index] =
                parser->collection_stack_ptr ?
@@ -543,13 +563,32 @@ static int hid_parser_local(struct hid_parser *parser, struct hid_item *item)
  * usage value."
  */
 
-static void hid_concatenate_usage_page(struct hid_parser *parser)
+static void hid_concatenate_last_usage_page(struct hid_parser *parser)
 {
        int i;
+       unsigned int usage_page;
+       unsigned int current_page;
+
+       if (!parser->local.usage_index)
+               return;
 
-       for (i = 0; i < parser->local.usage_index; i++)
-               if (parser->local.usage_size[i] <= 2)
-                       parser->local.usage[i] += parser->global.usage_page << 16;
+       usage_page = parser->global.usage_page;
+
+       /*
+        * Concatenate usage page again only if last declared Usage Page
+        * has not been already used in previous usages concatenation
+        */
+       for (i = parser->local.usage_index - 1; i >= 0; i--) {
+               if (parser->local.usage_size[i] > 2)
+                       /* Ignore extended usages */
+                       continue;
+
+               current_page = parser->local.usage[i] >> 16;
+               if (current_page == usage_page)
+                       break;
+
+               complete_usage(parser, i);
+       }
 }
 
 /*
@@ -561,7 +600,7 @@ static int hid_parser_main(struct hid_parser *parser, struct hid_item *item)
        __u32 data;
        int ret;
 
-       hid_concatenate_usage_page(parser);
+       hid_concatenate_last_usage_page(parser);
 
        data = item_udata(item);
 
@@ -742,6 +781,10 @@ static void hid_scan_feature_usage(struct hid_parser *parser, u32 usage)
        if (usage == 0xff0000c5 && parser->global.report_count == 256 &&
            parser->global.report_size == 8)
                parser->scan_flags |= HID_SCAN_FLAG_MT_WIN_8;
+
+       if (usage == 0xff0000c6 && parser->global.report_count == 1 &&
+           parser->global.report_size == 8)
+               parser->scan_flags |= HID_SCAN_FLAG_MT_WIN_8;
 }
 
 static void hid_scan_collection(struct hid_parser *parser, unsigned type)
@@ -772,7 +815,7 @@ static int hid_scan_main(struct hid_parser *parser, struct hid_item *item)
        __u32 data;
        int i;
 
-       hid_concatenate_usage_page(parser);
+       hid_concatenate_last_usage_page(parser);
 
        data = item_udata(item);
 
@@ -1139,6 +1182,7 @@ int hid_open_report(struct hid_device *device)
        __u8 *start;
        __u8 *buf;
        __u8 *end;
+       __u8 *next;
        int ret;
        static int (*dispatch_type[])(struct hid_parser *parser,
                                      struct hid_item *item) = {
@@ -1192,7 +1236,8 @@ int hid_open_report(struct hid_device *device)
        device->collection_size = HID_DEFAULT_NUM_COLLECTIONS;
 
        ret = -EINVAL;
-       while ((start = fetch_item(start, end, &item)) != NULL) {
+       while ((next = fetch_item(start, end, &item)) != NULL) {
+               start = next;
 
                if (item.format != HID_ITEM_FORMAT_SHORT) {
                        hid_err(device, "unexpected long global item\n");
@@ -1230,7 +1275,8 @@ int hid_open_report(struct hid_device *device)
                }
        }
 
-       hid_err(device, "item fetching failed at offset %d\n", (int)(end - start));
+       hid_err(device, "item fetching failed at offset %u/%u\n",
+               size - (unsigned int)(end - start), size);
 err:
        kfree(parser->collection_stack);
 alloc_err:
index 17e17f9..947f19f 100644 (file)
@@ -75,13 +75,19 @@ static int drff_init(struct hid_device *hid)
 {
        struct drff_device *drff;
        struct hid_report *report;
-       struct hid_input *hidinput = list_first_entry(&hid->inputs,
-                                               struct hid_input, list);
+       struct hid_input *hidinput;
        struct list_head *report_list =
                        &hid->report_enum[HID_OUTPUT_REPORT].report_list;
-       struct input_dev *dev = hidinput->input;
+       struct input_dev *dev;
        int error;
 
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_first_entry(&hid->inputs, struct hid_input, list);
+       dev = hidinput->input;
+
        if (list_empty(report_list)) {
                hid_err(hid, "no output reports found\n");
                return -ENODEV;
index 7cd5651..c34f2e5 100644 (file)
@@ -47,13 +47,19 @@ static int emsff_init(struct hid_device *hid)
 {
        struct emsff_device *emsff;
        struct hid_report *report;
-       struct hid_input *hidinput = list_first_entry(&hid->inputs,
-                                               struct hid_input, list);
+       struct hid_input *hidinput;
        struct list_head *report_list =
                        &hid->report_enum[HID_OUTPUT_REPORT].report_list;
-       struct input_dev *dev = hidinput->input;
+       struct input_dev *dev;
        int error;
 
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_first_entry(&hid->inputs, struct hid_input, list);
+       dev = hidinput->input;
+
        if (list_empty(report_list)) {
                hid_err(hid, "no output reports found\n");
                return -ENODEV;
index 0f95c96..ecbd399 100644 (file)
@@ -64,14 +64,20 @@ static int gaff_init(struct hid_device *hid)
 {
        struct gaff_device *gaff;
        struct hid_report *report;
-       struct hid_input *hidinput = list_entry(hid->inputs.next,
-                                               struct hid_input, list);
+       struct hid_input *hidinput;
        struct list_head *report_list =
                        &hid->report_enum[HID_OUTPUT_REPORT].report_list;
        struct list_head *report_ptr = report_list;
-       struct input_dev *dev = hidinput->input;
+       struct input_dev *dev;
        int error;
 
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+       dev = hidinput->input;
+
        if (list_empty(report_list)) {
                hid_err(hid, "no output reports found\n");
                return -ENODEV;
index 84f8c12..d86a918 100644 (file)
@@ -469,6 +469,10 @@ static int hammer_probe(struct hid_device *hdev,
 static const struct hid_device_id hammer_devices[] = {
        { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
                     USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_HAMMER) },
+       { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+                    USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MAGNEMITE) },
+       { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+                    USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MASTERBALL) },
        { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
                     USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STAFF) },
        { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
index 10a7205..8619b80 100644 (file)
@@ -124,13 +124,19 @@ static int holtekff_init(struct hid_device *hid)
 {
        struct holtekff_device *holtekff;
        struct hid_report *report;
-       struct hid_input *hidinput = list_entry(hid->inputs.next,
-                                               struct hid_input, list);
+       struct hid_input *hidinput;
        struct list_head *report_list =
                        &hid->report_enum[HID_OUTPUT_REPORT].report_list;
-       struct input_dev *dev = hidinput->input;
+       struct input_dev *dev;
        int error;
 
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+       dev = hidinput->input;
+
        if (list_empty(report_list)) {
                hid_err(hid, "no output report found\n");
                return -ENODEV;
index 7795831..79a28fc 100644 (file)
@@ -104,8 +104,8 @@ struct synthhid_input_report {
 
 #pragma pack(pop)
 
-#define INPUTVSC_SEND_RING_BUFFER_SIZE         (10*PAGE_SIZE)
-#define INPUTVSC_RECV_RING_BUFFER_SIZE         (10*PAGE_SIZE)
+#define INPUTVSC_SEND_RING_BUFFER_SIZE         (40 * 1024)
+#define INPUTVSC_RECV_RING_BUFFER_SIZE         (40 * 1024)
 
 
 enum pipe_prot_msg_type {
@@ -314,60 +314,24 @@ static void mousevsc_on_receive(struct hv_device *device,
 
 static void mousevsc_on_channel_callback(void *context)
 {
-       const int packet_size = 0x100;
-       int ret;
        struct hv_device *device = context;
-       u32 bytes_recvd;
-       u64 req_id;
        struct vmpacket_descriptor *desc;
-       unsigned char   *buffer;
-       int     bufferlen = packet_size;
-
-       buffer = kmalloc(bufferlen, GFP_ATOMIC);
-       if (!buffer)
-               return;
-
-       do {
-               ret = vmbus_recvpacket_raw(device->channel, buffer,
-                                       bufferlen, &bytes_recvd, &req_id);
-
-               switch (ret) {
-               case 0:
-                       if (bytes_recvd <= 0) {
-                               kfree(buffer);
-                               return;
-                       }
-                       desc = (struct vmpacket_descriptor *)buffer;
-
-                       switch (desc->type) {
-                       case VM_PKT_COMP:
-                               break;
-
-                       case VM_PKT_DATA_INBAND:
-                               mousevsc_on_receive(device, desc);
-                               break;
-
-                       default:
-                               pr_err("unhandled packet type %d, tid %llx len %d\n",
-                                       desc->type, req_id, bytes_recvd);
-                               break;
-                       }
 
+       foreach_vmbus_pkt(desc, device->channel) {
+               switch (desc->type) {
+               case VM_PKT_COMP:
                        break;
 
-               case -ENOBUFS:
-                       kfree(buffer);
-                       /* Handle large packet */
-                       bufferlen = bytes_recvd;
-                       buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
-
-                       if (!buffer)
-                               return;
+               case VM_PKT_DATA_INBAND:
+                       mousevsc_on_receive(device, desc);
+                       break;
 
+               default:
+                       pr_err("Unhandled packet type %d, tid %llx len %d\n",
+                              desc->type, desc->trans_id, desc->len8 * 8);
                        break;
                }
-       } while (1);
-
+       }
 }
 
 static int mousevsc_connect_to_vsp(struct hv_device *device)
index 75d8cf7..7e1689e 100644 (file)
 #define USB_DEVICE_ID_GOOGLE_STAFF     0x502b
 #define USB_DEVICE_ID_GOOGLE_WAND      0x502d
 #define USB_DEVICE_ID_GOOGLE_WHISKERS  0x5030
+#define USB_DEVICE_ID_GOOGLE_MASTERBALL        0x503c
+#define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d
 
 #define USB_VENDOR_ID_GOTOP            0x08f2
 #define USB_DEVICE_ID_SUPER_Q2         0x007f
 #define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_094A    0x094a
 #define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_0941    0x0941
 #define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_0641    0x0641
+#define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_1f4a    0x1f4a
 
 #define USB_VENDOR_ID_HUION            0x256c
 #define USB_DEVICE_ID_HUION_TABLET     0x006e
 
 #define I2C_VENDOR_ID_RAYDIUM          0x2386
 #define I2C_PRODUCT_ID_RAYDIUM_4B33    0x4b33
+#define I2C_PRODUCT_ID_RAYDIUM_3118    0x3118
 
 #define USB_VENDOR_ID_RAZER            0x1532
 #define USB_DEVICE_ID_RAZER_BLADE_14   0x011D
index dd1a6c3..73d07e3 100644 (file)
@@ -50,11 +50,17 @@ int lg2ff_init(struct hid_device *hid)
 {
        struct lg2ff_device *lg2ff;
        struct hid_report *report;
-       struct hid_input *hidinput = list_entry(hid->inputs.next,
-                                               struct hid_input, list);
-       struct input_dev *dev = hidinput->input;
+       struct hid_input *hidinput;
+       struct input_dev *dev;
        int error;
 
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+       dev = hidinput->input;
+
        /* Check that the report looks ok */
        report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7);
        if (!report)
index 9ecb6fd..b7e1949 100644 (file)
@@ -117,12 +117,19 @@ static const signed short ff3_joystick_ac[] = {
 
 int lg3ff_init(struct hid_device *hid)
 {
-       struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
-       struct input_dev *dev = hidinput->input;
+       struct hid_input *hidinput;
+       struct input_dev *dev;
        const signed short *ff_bits = ff3_joystick_ac;
        int error;
        int i;
 
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+       dev = hidinput->input;
+
        /* Check that the report looks ok */
        if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 35))
                return -ENODEV;
index 03f0220..5e6a0ce 100644 (file)
@@ -1253,8 +1253,8 @@ static int lg4ff_handle_multimode_wheel(struct hid_device *hid, u16 *real_produc
 
 int lg4ff_init(struct hid_device *hid)
 {
-       struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
-       struct input_dev *dev = hidinput->input;
+       struct hid_input *hidinput;
+       struct input_dev *dev;
        struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list;
        struct hid_report *report = list_entry(report_list->next, struct hid_report, list);
        const struct usb_device_descriptor *udesc = &(hid_to_usb_dev(hid)->descriptor);
@@ -1266,6 +1266,13 @@ int lg4ff_init(struct hid_device *hid)
        int mmode_ret, mmode_idx = -1;
        u16 real_product_id;
 
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+       dev = hidinput->input;
+
        /* Check that the report looks ok */
        if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7))
                return -1;
index c79a6ec..aed4ddc 100644 (file)
@@ -115,12 +115,19 @@ static void hid_lgff_set_autocenter(struct input_dev *dev, u16 magnitude)
 
 int lgff_init(struct hid_device* hid)
 {
-       struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
-       struct input_dev *dev = hidinput->input;
+       struct hid_input *hidinput;
+       struct input_dev *dev;
        const signed short *ff_bits = ff_joystick;
        int error;
        int i;
 
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+       dev = hidinput->input;
+
        /* Check that the report looks ok */
        if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7))
                return -ENODEV;
index d2421f3..cd91930 100644 (file)
@@ -1672,6 +1672,7 @@ static void hidpp_touchpad_raw_xy_event(struct hidpp_device *hidpp_dev,
 
 #define HIDPP_FF_EFFECTID_NONE         -1
 #define HIDPP_FF_EFFECTID_AUTOCENTER   -2
+#define HIDPP_AUTOCENTER_PARAMS_LENGTH 18
 
 #define HIDPP_FF_MAX_PARAMS    20
 #define HIDPP_FF_RESERVED_SLOTS        1
@@ -2012,7 +2013,7 @@ static int hidpp_ff_erase_effect(struct input_dev *dev, int effect_id)
 static void hidpp_ff_set_autocenter(struct input_dev *dev, u16 magnitude)
 {
        struct hidpp_ff_private_data *data = dev->ff->private;
-       u8 params[18];
+       u8 params[HIDPP_AUTOCENTER_PARAMS_LENGTH];
 
        dbg_hid("Setting autocenter to %d.\n", magnitude);
 
@@ -2080,23 +2081,34 @@ static DEVICE_ATTR(range, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH, hidpp
 static void hidpp_ff_destroy(struct ff_device *ff)
 {
        struct hidpp_ff_private_data *data = ff->private;
+       struct hid_device *hid = data->hidpp->hid_dev;
 
+       hid_info(hid, "Unloading HID++ force feedback.\n");
+
+       device_remove_file(&hid->dev, &dev_attr_range);
+       destroy_workqueue(data->wq);
        kfree(data->effect_ids);
 }
 
-static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index)
+static int hidpp_ff_init(struct hidpp_device *hidpp,
+                        struct hidpp_ff_private_data *data)
 {
        struct hid_device *hid = hidpp->hid_dev;
-       struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
-       struct input_dev *dev = hidinput->input;
+       struct hid_input *hidinput;
+       struct input_dev *dev;
        const struct usb_device_descriptor *udesc = &(hid_to_usb_dev(hid)->descriptor);
        const u16 bcdDevice = le16_to_cpu(udesc->bcdDevice);
        struct ff_device *ff;
-       struct hidpp_report response;
-       struct hidpp_ff_private_data *data;
-       int error, j, num_slots;
+       int error, j, num_slots = data->num_effects;
        u8 version;
 
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+       dev = hidinput->input;
+
        if (!dev) {
                hid_err(hid, "Struct input_dev not set!\n");
                return -EINVAL;
@@ -2112,27 +2124,17 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index)
                for (j = 0; hidpp_ff_effects_v2[j] >= 0; j++)
                        set_bit(hidpp_ff_effects_v2[j], dev->ffbit);
 
-       /* Read number of slots available in device */
-       error = hidpp_send_fap_command_sync(hidpp, feature_index,
-               HIDPP_FF_GET_INFO, NULL, 0, &response);
-       if (error) {
-               if (error < 0)
-                       return error;
-               hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
-                       __func__, error);
-               return -EPROTO;
-       }
-
-       num_slots = response.fap.params[0] - HIDPP_FF_RESERVED_SLOTS;
-
        error = input_ff_create(dev, num_slots);
 
        if (error) {
                hid_err(dev, "Failed to create FF device!\n");
                return error;
        }
-
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       /*
+        * Create a copy of passed data, so we can transfer memory
+        * ownership to FF core
+        */
+       data = kmemdup(data, sizeof(*data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;
        data->effect_ids = kcalloc(num_slots, sizeof(int), GFP_KERNEL);
@@ -2148,10 +2150,7 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index)
        }
 
        data->hidpp = hidpp;
-       data->feature_index = feature_index;
        data->version = version;
-       data->slot_autocenter = 0;
-       data->num_effects = num_slots;
        for (j = 0; j < num_slots; j++)
                data->effect_ids[j] = -1;
 
@@ -2165,68 +2164,20 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index)
        ff->set_autocenter = hidpp_ff_set_autocenter;
        ff->destroy = hidpp_ff_destroy;
 
-
-       /* reset all forces */
-       error = hidpp_send_fap_command_sync(hidpp, feature_index,
-               HIDPP_FF_RESET_ALL, NULL, 0, &response);
-
-       /* Read current Range */
-       error = hidpp_send_fap_command_sync(hidpp, feature_index,
-               HIDPP_FF_GET_APERTURE, NULL, 0, &response);
-       if (error)
-               hid_warn(hidpp->hid_dev, "Failed to read range from device!\n");
-       data->range = error ? 900 : get_unaligned_be16(&response.fap.params[0]);
-
        /* Create sysfs interface */
        error = device_create_file(&(hidpp->hid_dev->dev), &dev_attr_range);
        if (error)
                hid_warn(hidpp->hid_dev, "Unable to create sysfs interface for \"range\", errno %d!\n", error);
 
-       /* Read the current gain values */
-       error = hidpp_send_fap_command_sync(hidpp, feature_index,
-               HIDPP_FF_GET_GLOBAL_GAINS, NULL, 0, &response);
-       if (error)
-               hid_warn(hidpp->hid_dev, "Failed to read gain values from device!\n");
-       data->gain = error ? 0xffff : get_unaligned_be16(&response.fap.params[0]);
-       /* ignore boost value at response.fap.params[2] */
-
        /* init the hardware command queue */
        atomic_set(&data->workqueue_size, 0);
 
-       /* initialize with zero autocenter to get wheel in usable state */
-       hidpp_ff_set_autocenter(dev, 0);
-
        hid_info(hid, "Force feedback support loaded (firmware release %d).\n",
                 version);
 
        return 0;
 }
 
-static int hidpp_ff_deinit(struct hid_device *hid)
-{
-       struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
-       struct input_dev *dev = hidinput->input;
-       struct hidpp_ff_private_data *data;
-
-       if (!dev) {
-               hid_err(hid, "Struct input_dev not found!\n");
-               return -EINVAL;
-       }
-
-       hid_info(hid, "Unloading HID++ force feedback.\n");
-       data = dev->ff->private;
-       if (!data) {
-               hid_err(hid, "Private data not found!\n");
-               return -EINVAL;
-       }
-
-       destroy_workqueue(data->wq);
-       device_remove_file(&hid->dev, &dev_attr_range);
-
-       return 0;
-}
-
-
 /* ************************************************************************** */
 /*                                                                            */
 /* Device Support                                                             */
@@ -2728,24 +2679,93 @@ static int k400_connect(struct hid_device *hdev, bool connected)
 
 #define HIDPP_PAGE_G920_FORCE_FEEDBACK                 0x8123
 
-static int g920_get_config(struct hidpp_device *hidpp)
+static int g920_ff_set_autocenter(struct hidpp_device *hidpp,
+                                 struct hidpp_ff_private_data *data)
 {
+       struct hidpp_report response;
+       u8 params[HIDPP_AUTOCENTER_PARAMS_LENGTH] = {
+               [1] = HIDPP_FF_EFFECT_SPRING | HIDPP_FF_EFFECT_AUTOSTART,
+       };
+       int ret;
+
+       /* initialize with zero autocenter to get wheel in usable state */
+
+       dbg_hid("Setting autocenter to 0.\n");
+       ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
+                                         HIDPP_FF_DOWNLOAD_EFFECT,
+                                         params, ARRAY_SIZE(params),
+                                         &response);
+       if (ret)
+               hid_warn(hidpp->hid_dev, "Failed to autocenter device!\n");
+       else
+               data->slot_autocenter = response.fap.params[0];
+
+       return ret;
+}
+
+static int g920_get_config(struct hidpp_device *hidpp,
+                          struct hidpp_ff_private_data *data)
+{
+       struct hidpp_report response;
        u8 feature_type;
-       u8 feature_index;
        int ret;
 
+       memset(data, 0, sizeof(*data));
+
        /* Find feature and store for later use */
        ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_G920_FORCE_FEEDBACK,
-               &feature_index, &feature_type);
+                                    &data->feature_index, &feature_type);
        if (ret)
                return ret;
 
-       ret = hidpp_ff_init(hidpp, feature_index);
+       /* Read number of slots available in device */
+       ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
+                                         HIDPP_FF_GET_INFO,
+                                         NULL, 0,
+                                         &response);
+       if (ret) {
+               if (ret < 0)
+                       return ret;
+               hid_err(hidpp->hid_dev,
+                       "%s: received protocol error 0x%02x\n", __func__, ret);
+               return -EPROTO;
+       }
+
+       data->num_effects = response.fap.params[0] - HIDPP_FF_RESERVED_SLOTS;
+
+       /* reset all forces */
+       ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
+                                         HIDPP_FF_RESET_ALL,
+                                         NULL, 0,
+                                         &response);
        if (ret)
-               hid_warn(hidpp->hid_dev, "Unable to initialize force feedback support, errno %d\n",
-                               ret);
+               hid_warn(hidpp->hid_dev, "Failed to reset all forces!\n");
 
-       return 0;
+       ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
+                                         HIDPP_FF_GET_APERTURE,
+                                         NULL, 0,
+                                         &response);
+       if (ret) {
+               hid_warn(hidpp->hid_dev,
+                        "Failed to read range from device!\n");
+       }
+       data->range = ret ?
+               900 : get_unaligned_be16(&response.fap.params[0]);
+
+       /* Read the current gain values */
+       ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
+                                         HIDPP_FF_GET_GLOBAL_GAINS,
+                                         NULL, 0,
+                                         &response);
+       if (ret)
+               hid_warn(hidpp->hid_dev,
+                        "Failed to read gain values from device!\n");
+       data->gain = ret ?
+               0xffff : get_unaligned_be16(&response.fap.params[0]);
+
+       /* ignore boost value at response.fap.params[2] */
+
+       return g920_ff_set_autocenter(hidpp, data);
 }
 
 /* -------------------------------------------------------------------------- */
@@ -3461,34 +3481,45 @@ static int hidpp_get_report_length(struct hid_device *hdev, int id)
        return report->field[0]->report_count + 1;
 }
 
-static bool hidpp_validate_report(struct hid_device *hdev, int id,
-                                 int expected_length, bool optional)
+static bool hidpp_validate_device(struct hid_device *hdev)
 {
-       int report_length;
+       struct hidpp_device *hidpp = hid_get_drvdata(hdev);
+       int id, report_length, supported_reports = 0;
 
-       if (id >= HID_MAX_IDS || id < 0) {
-               hid_err(hdev, "invalid HID report id %u\n", id);
-               return false;
+       id = REPORT_ID_HIDPP_SHORT;
+       report_length = hidpp_get_report_length(hdev, id);
+       if (report_length) {
+               if (report_length < HIDPP_REPORT_SHORT_LENGTH)
+                       goto bad_device;
+
+               supported_reports++;
        }
 
+       id = REPORT_ID_HIDPP_LONG;
        report_length = hidpp_get_report_length(hdev, id);
-       if (!report_length)
-               return optional;
+       if (report_length) {
+               if (report_length < HIDPP_REPORT_LONG_LENGTH)
+                       goto bad_device;
 
-       if (report_length < expected_length) {
-               hid_warn(hdev, "not enough values in hidpp report %d\n", id);
-               return false;
+               supported_reports++;
        }
 
-       return true;
-}
+       id = REPORT_ID_HIDPP_VERY_LONG;
+       report_length = hidpp_get_report_length(hdev, id);
+       if (report_length) {
+               if (report_length < HIDPP_REPORT_LONG_LENGTH ||
+                   report_length > HIDPP_REPORT_VERY_LONG_MAX_LENGTH)
+                       goto bad_device;
 
-static bool hidpp_validate_device(struct hid_device *hdev)
-{
-       return hidpp_validate_report(hdev, REPORT_ID_HIDPP_SHORT,
-                                    HIDPP_REPORT_SHORT_LENGTH, false) &&
-              hidpp_validate_report(hdev, REPORT_ID_HIDPP_LONG,
-                                    HIDPP_REPORT_LONG_LENGTH, true);
+               supported_reports++;
+               hidpp->very_long_report_length = report_length;
+       }
+
+       return supported_reports;
+
+bad_device:
+       hid_warn(hdev, "not enough values in hidpp report %d\n", id);
+       return false;
 }
 
 static bool hidpp_application_equals(struct hid_device *hdev,
@@ -3508,6 +3539,7 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
        int ret;
        bool connected;
        unsigned int connect_mask = HID_CONNECT_DEFAULT;
+       struct hidpp_ff_private_data data;
 
        /* report_fixup needs drvdata to be set before we call hid_parse */
        hidpp = devm_kzalloc(&hdev->dev, sizeof(*hidpp), GFP_KERNEL);
@@ -3534,11 +3566,6 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
                return hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        }
 
-       hidpp->very_long_report_length =
-               hidpp_get_report_length(hdev, REPORT_ID_HIDPP_VERY_LONG);
-       if (hidpp->very_long_report_length > HIDPP_REPORT_VERY_LONG_MAX_LENGTH)
-               hidpp->very_long_report_length = HIDPP_REPORT_VERY_LONG_MAX_LENGTH;
-
        if (id->group == HID_GROUP_LOGITECH_DJ_DEVICE)
                hidpp->quirks |= HIDPP_QUIRK_UNIFYING;
 
@@ -3617,7 +3644,7 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
                if (ret)
                        goto hid_hw_init_fail;
        } else if (connected && (hidpp->quirks & HIDPP_QUIRK_CLASS_G920)) {
-               ret = g920_get_config(hidpp);
+               ret = g920_get_config(hidpp, &data);
                if (ret)
                        goto hid_hw_init_fail;
        }
@@ -3639,6 +3666,14 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
                goto hid_hw_start_fail;
        }
 
+       if (hidpp->quirks & HIDPP_QUIRK_CLASS_G920) {
+               ret = hidpp_ff_init(hidpp, &data);
+               if (ret)
+                       hid_warn(hidpp->hid_dev,
+                    "Unable to initialize force feedback support, errno %d\n",
+                                ret);
+       }
+
        return ret;
 
 hid_hw_init_fail:
@@ -3661,9 +3696,6 @@ static void hidpp_remove(struct hid_device *hdev)
 
        sysfs_remove_group(&hdev->dev.kobj, &ps_attribute_group);
 
-       if (hidpp->quirks & HIDPP_QUIRK_CLASS_G920)
-               hidpp_ff_deinit(hdev);
-
        hid_hw_stop(hdev);
        cancel_work_sync(&hidpp->work);
        mutex_destroy(&hidpp->send_mutex);
index 2cf8385..2d8b589 100644 (file)
@@ -328,11 +328,17 @@ static int ms_play_effect(struct input_dev *dev, void *data,
 
 static int ms_init_ff(struct hid_device *hdev)
 {
-       struct hid_input *hidinput = list_entry(hdev->inputs.next,
-                                               struct hid_input, list);
-       struct input_dev *input_dev = hidinput->input;
+       struct hid_input *hidinput;
+       struct input_dev *input_dev;
        struct ms_data *ms = hid_get_drvdata(hdev);
 
+       if (list_empty(&hdev->inputs)) {
+               hid_err(hdev, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_entry(hdev->inputs.next, struct hid_input, list);
+       input_dev = hidinput->input;
+
        if (!(ms->quirks & MS_QUIRK_FF))
                return 0;
 
index 5a3b3d9..2666af0 100644 (file)
@@ -516,7 +516,7 @@ static void pcmidi_setup_extra_keys(
                MY PICTURES =>  KEY_WORDPROCESSOR
                MY MUSIC=>      KEY_SPREADSHEET
        */
-       unsigned int keys[] = {
+       static const unsigned int keys[] = {
                KEY_FN,
                KEY_MESSENGER, KEY_CALENDAR,
                KEY_ADDRESSBOOK, KEY_DOCUMENTS,
@@ -532,7 +532,7 @@ static void pcmidi_setup_extra_keys(
                0
        };
 
-       unsigned int *pkeys = &keys[0];
+       const unsigned int *pkeys = &keys[0];
        unsigned short i;
 
        if (pm->ifnum != 1)  /* only set up ONCE for interace 1 */
index c50bcd9..d1b39c2 100644 (file)
@@ -94,6 +94,7 @@ static const struct hid_device_id hid_quirks[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_094A), HID_QUIRK_ALWAYS_POLL },
        { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_0941), HID_QUIRK_ALWAYS_POLL },
        { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_0641), HID_QUIRK_ALWAYS_POLL },
+       { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_1f4a), HID_QUIRK_ALWAYS_POLL },
        { HID_USB_DEVICE(USB_VENDOR_ID_IDEACOM, USB_DEVICE_ID_IDEACOM_IDC6680), HID_QUIRK_MULTI_INPUT },
        { HID_USB_DEVICE(USB_VENDOR_ID_INNOMEDIA, USB_DEVICE_ID_INNEX_GENESIS_ATARI), HID_QUIRK_MULTI_INPUT },
        { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X), HID_QUIRK_MULTI_INPUT },
@@ -419,13 +420,6 @@ static const struct hid_device_id hid_have_special_driver[] = {
 #if IS_ENABLED(CONFIG_HID_LCPOWER)
        { HID_USB_DEVICE(USB_VENDOR_ID_LCPOWER, USB_DEVICE_ID_LCPOWER_LC1000) },
 #endif
-#if IS_ENABLED(CONFIG_HID_LED)
-       { HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_WN) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_FA) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_LUXAFOR) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_RISO_KAGAKU, USB_DEVICE_ID_RI_KA_WEBMAIL) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_THINGM, USB_DEVICE_ID_BLINK1) },
-#endif
 #if IS_ENABLED(CONFIG_HID_LENOVO)
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPKBD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CUSBKBD) },
index 7c6abd7..9ce22ac 100644 (file)
@@ -744,7 +744,8 @@ static void rmi_remove(struct hid_device *hdev)
 {
        struct rmi_data *hdata = hid_get_drvdata(hdev);
 
-       if (hdata->device_flags & RMI_DEVICE) {
+       if ((hdata->device_flags & RMI_DEVICE)
+           && test_bit(RMI_STARTED, &hdata->flags)) {
                clear_bit(RMI_STARTED, &hdata->flags);
                cancel_work_sync(&hdata->reset_work);
                rmi_unregister_transport_device(&hdata->xport);
index 73c0f7a..4c6ed6e 100644 (file)
@@ -2254,9 +2254,15 @@ static int sony_play_effect(struct input_dev *dev, void *data,
 
 static int sony_init_ff(struct sony_sc *sc)
 {
-       struct hid_input *hidinput = list_entry(sc->hdev->inputs.next,
-                                               struct hid_input, list);
-       struct input_dev *input_dev = hidinput->input;
+       struct hid_input *hidinput;
+       struct input_dev *input_dev;
+
+       if (list_empty(&sc->hdev->inputs)) {
+               hid_err(sc->hdev, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_entry(sc->hdev->inputs.next, struct hid_input, list);
+       input_dev = hidinput->input;
 
        input_set_capability(input_dev, EV_FF, FF_RUMBLE);
        return input_ff_create_memless(input_dev, NULL, sony_play_effect);
index bdfc5ff..90acef3 100644 (file)
@@ -124,12 +124,18 @@ static int tmff_init(struct hid_device *hid, const signed short *ff_bits)
        struct tmff_device *tmff;
        struct hid_report *report;
        struct list_head *report_list;
-       struct hid_input *hidinput = list_entry(hid->inputs.next,
-                                                       struct hid_input, list);
-       struct input_dev *input_dev = hidinput->input;
+       struct hid_input *hidinput;
+       struct input_dev *input_dev;
        int error;
        int i;
 
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+       input_dev = hidinput->input;
+
        tmff = kzalloc(sizeof(struct tmff_device), GFP_KERNEL);
        if (!tmff)
                return -ENOMEM;
index f90959e..3abaca0 100644 (file)
@@ -54,11 +54,17 @@ static int zpff_init(struct hid_device *hid)
 {
        struct zpff_device *zpff;
        struct hid_report *report;
-       struct hid_input *hidinput = list_entry(hid->inputs.next,
-                                               struct hid_input, list);
-       struct input_dev *dev = hidinput->input;
+       struct hid_input *hidinput;
+       struct input_dev *dev;
        int i, error;
 
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+       dev = hidinput->input;
+
        for (i = 0; i < 4; i++) {
                report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, i, 1);
                if (!report)
index bbc6ec1..9432670 100644 (file)
@@ -197,15 +197,15 @@ static ssize_t hidraw_get_report(struct file *file, char __user *buffer, size_t
        }
 
        if (count > HID_MAX_BUFFER_SIZE) {
-               printk(KERN_WARNING "hidraw: pid %d passed too large report\n",
-                               task_pid_nr(current));
+               hid_warn(dev, "pid %d passed too large report\n",
+                       task_pid_nr(current));
                ret = -EINVAL;
                goto out;
        }
 
        if (count < 2) {
-               printk(KERN_WARNING "hidraw: pid %d passed too short report\n",
-                               task_pid_nr(current));
+               hid_warn(dev, "pid %d passed too short report\n",
+                       task_pid_nr(current));
                ret = -EINVAL;
                goto out;
        }
@@ -597,7 +597,7 @@ int __init hidraw_init(void)
        if (result < 0)
                goto error_class;
 
-       printk(KERN_INFO "hidraw: raw HID events driver (C) Jiri Kosina\n");
+       pr_info("raw HID events driver (C) Jiri Kosina\n");
 out:
        return result;
 
index 2a7c6e3..a358e61 100644 (file)
@@ -26,7 +26,6 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/pm.h>
-#include <linux/pm_runtime.h>
 #include <linux/device.h>
 #include <linux/wait.h>
 #include <linux/err.h>
@@ -48,9 +47,8 @@
 /* quirks to control the device */
 #define I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV       BIT(0)
 #define I2C_HID_QUIRK_NO_IRQ_AFTER_RESET       BIT(1)
-#define I2C_HID_QUIRK_NO_RUNTIME_PM            BIT(2)
-#define I2C_HID_QUIRK_DELAY_AFTER_SLEEP                BIT(3)
 #define I2C_HID_QUIRK_BOGUS_IRQ                        BIT(4)
+#define I2C_HID_QUIRK_RESET_ON_RESUME          BIT(5)
 
 /* flags */
 #define I2C_HID_STARTED                0
@@ -160,8 +158,6 @@ struct i2c_hid {
 
        bool                    irq_wake_enabled;
        struct mutex            reset_lock;
-
-       unsigned long           sleep_delay;
 };
 
 static const struct i2c_hid_quirks {
@@ -172,16 +168,13 @@ static const struct i2c_hid_quirks {
        { USB_VENDOR_ID_WEIDA, HID_ANY_ID,
                I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV },
        { I2C_VENDOR_ID_HANTICK, I2C_PRODUCT_ID_HANTICK_5288,
-               I2C_HID_QUIRK_NO_IRQ_AFTER_RESET |
-               I2C_HID_QUIRK_NO_RUNTIME_PM },
-       { I2C_VENDOR_ID_RAYDIUM, I2C_PRODUCT_ID_RAYDIUM_4B33,
-               I2C_HID_QUIRK_DELAY_AFTER_SLEEP },
-       { USB_VENDOR_ID_LG, I2C_DEVICE_ID_LG_8001,
-               I2C_HID_QUIRK_NO_RUNTIME_PM },
-       { I2C_VENDOR_ID_GOODIX, I2C_DEVICE_ID_GOODIX_01F0,
-               I2C_HID_QUIRK_NO_RUNTIME_PM },
+               I2C_HID_QUIRK_NO_IRQ_AFTER_RESET },
+       { I2C_VENDOR_ID_RAYDIUM, I2C_PRODUCT_ID_RAYDIUM_3118,
+               I2C_HID_QUIRK_NO_IRQ_AFTER_RESET },
        { USB_VENDOR_ID_ELAN, HID_ANY_ID,
                 I2C_HID_QUIRK_BOGUS_IRQ },
+       { USB_VENDOR_ID_ALPS_JP, HID_ANY_ID,
+                I2C_HID_QUIRK_RESET_ON_RESUME },
        { 0, 0 }
 };
 
@@ -397,7 +390,6 @@ static int i2c_hid_set_power(struct i2c_client *client, int power_state)
 {
        struct i2c_hid *ihid = i2c_get_clientdata(client);
        int ret;
-       unsigned long now, delay;
 
        i2c_hid_dbg(ihid, "%s\n", __func__);
 
@@ -415,22 +407,9 @@ static int i2c_hid_set_power(struct i2c_client *client, int power_state)
                        goto set_pwr_exit;
        }
 
-       if (ihid->quirks & I2C_HID_QUIRK_DELAY_AFTER_SLEEP &&
-           power_state == I2C_HID_PWR_ON) {
-               now = jiffies;
-               if (time_after(ihid->sleep_delay, now)) {
-                       delay = jiffies_to_usecs(ihid->sleep_delay - now);
-                       usleep_range(delay, delay + 1);
-               }
-       }
-
        ret = __i2c_hid_command(client, &hid_set_power_cmd, power_state,
                0, NULL, 0, NULL, 0);
 
-       if (ihid->quirks & I2C_HID_QUIRK_DELAY_AFTER_SLEEP &&
-           power_state == I2C_HID_PWR_SLEEP)
-               ihid->sleep_delay = jiffies + msecs_to_jiffies(20);
-
        if (ret)
                dev_err(&client->dev, "failed to change power setting.\n");
 
@@ -471,8 +450,12 @@ static int i2c_hid_hwreset(struct i2c_client *client)
        if (ret) {
                dev_err(&client->dev, "failed to reset device.\n");
                i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
+               goto out_unlock;
        }
 
+       /* At least some SIS devices need this after reset */
+       ret = i2c_hid_set_power(client, I2C_HID_PWR_ON);
+
 out_unlock:
        mutex_unlock(&ihid->reset_lock);
        return ret;
@@ -791,11 +774,6 @@ static int i2c_hid_open(struct hid_device *hid)
 {
        struct i2c_client *client = hid->driver_data;
        struct i2c_hid *ihid = i2c_get_clientdata(client);
-       int ret = 0;
-
-       ret = pm_runtime_get_sync(&client->dev);
-       if (ret < 0)
-               return ret;
 
        set_bit(I2C_HID_STARTED, &ihid->flags);
        return 0;
@@ -807,27 +785,6 @@ static void i2c_hid_close(struct hid_device *hid)
        struct i2c_hid *ihid = i2c_get_clientdata(client);
 
        clear_bit(I2C_HID_STARTED, &ihid->flags);
-
-       /* Save some power */
-       pm_runtime_put(&client->dev);
-}
-
-static int i2c_hid_power(struct hid_device *hid, int lvl)
-{
-       struct i2c_client *client = hid->driver_data;
-       struct i2c_hid *ihid = i2c_get_clientdata(client);
-
-       i2c_hid_dbg(ihid, "%s lvl:%d\n", __func__, lvl);
-
-       switch (lvl) {
-       case PM_HINT_FULLON:
-               pm_runtime_get_sync(&client->dev);
-               break;
-       case PM_HINT_NORMAL:
-               pm_runtime_put(&client->dev);
-               break;
-       }
-       return 0;
 }
 
 struct hid_ll_driver i2c_hid_ll_driver = {
@@ -836,7 +793,6 @@ struct hid_ll_driver i2c_hid_ll_driver = {
        .stop = i2c_hid_stop,
        .open = i2c_hid_open,
        .close = i2c_hid_close,
-       .power = i2c_hid_power,
        .output_report = i2c_hid_output_report,
        .raw_request = i2c_hid_raw_request,
 };
@@ -1104,9 +1060,6 @@ static int i2c_hid_probe(struct i2c_client *client,
 
        i2c_hid_acpi_fix_up_power(&client->dev);
 
-       pm_runtime_get_noresume(&client->dev);
-       pm_runtime_set_active(&client->dev);
-       pm_runtime_enable(&client->dev);
        device_enable_async_suspend(&client->dev);
 
        /* Make sure there is something at this address */
@@ -1114,16 +1067,16 @@ static int i2c_hid_probe(struct i2c_client *client,
        if (ret < 0) {
                dev_dbg(&client->dev, "nothing at this address: %d\n", ret);
                ret = -ENXIO;
-               goto err_pm;
+               goto err_regulator;
        }
 
        ret = i2c_hid_fetch_hid_descriptor(ihid);
        if (ret < 0)
-               goto err_pm;
+               goto err_regulator;
 
        ret = i2c_hid_init_irq(client);
        if (ret < 0)
-               goto err_pm;
+               goto err_regulator;
 
        hid = hid_allocate_device();
        if (IS_ERR(hid)) {
@@ -1154,9 +1107,6 @@ static int i2c_hid_probe(struct i2c_client *client,
                goto err_mem_free;
        }
 
-       if (!(ihid->quirks & I2C_HID_QUIRK_NO_RUNTIME_PM))
-               pm_runtime_put(&client->dev);
-
        return 0;
 
 err_mem_free:
@@ -1165,10 +1115,6 @@ err_mem_free:
 err_irq:
        free_irq(client->irq, ihid);
 
-err_pm:
-       pm_runtime_put_noidle(&client->dev);
-       pm_runtime_disable(&client->dev);
-
 err_regulator:
        regulator_bulk_disable(ARRAY_SIZE(ihid->pdata.supplies),
                               ihid->pdata.supplies);
@@ -1181,12 +1127,6 @@ static int i2c_hid_remove(struct i2c_client *client)
        struct i2c_hid *ihid = i2c_get_clientdata(client);
        struct hid_device *hid;
 
-       if (!(ihid->quirks & I2C_HID_QUIRK_NO_RUNTIME_PM))
-               pm_runtime_get_sync(&client->dev);
-       pm_runtime_disable(&client->dev);
-       pm_runtime_set_suspended(&client->dev);
-       pm_runtime_put_noidle(&client->dev);
-
        hid = ihid->hid;
        hid_destroy_device(hid);
 
@@ -1219,25 +1159,15 @@ static int i2c_hid_suspend(struct device *dev)
        int wake_status;
 
        if (hid->driver && hid->driver->suspend) {
-               /*
-                * Wake up the device so that IO issues in
-                * HID driver's suspend code can succeed.
-                */
-               ret = pm_runtime_resume(dev);
-               if (ret < 0)
-                       return ret;
-
                ret = hid->driver->suspend(hid, PMSG_SUSPEND);
                if (ret < 0)
                        return ret;
        }
 
-       if (!pm_runtime_suspended(dev)) {
-               /* Save some power */
-               i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
+       /* Save some power */
+       i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
 
-               disable_irq(client->irq);
-       }
+       disable_irq(client->irq);
 
        if (device_may_wakeup(&client->dev)) {
                wake_status = enable_irq_wake(client->irq);
@@ -1279,19 +1209,21 @@ static int i2c_hid_resume(struct device *dev)
                                wake_status);
        }
 
-       /* We'll resume to full power */
-       pm_runtime_disable(dev);
-       pm_runtime_set_active(dev);
-       pm_runtime_enable(dev);
-
        enable_irq(client->irq);
 
        /* Instead of resetting device, simply powers the device on. This
         * solves "incomplete reports" on Raydium devices 2386:3118 and
         * 2386:4B33 and fixes various SIS touchscreens no longer sending
         * data after a suspend/resume.
+        *
+        * However some ALPS touchpads generate IRQ storm without reset, so
+        * let's still reset them here.
         */
-       ret = i2c_hid_set_power(client, I2C_HID_PWR_ON);
+       if (ihid->quirks & I2C_HID_QUIRK_RESET_ON_RESUME)
+               ret = i2c_hid_hwreset(client);
+       else
+               ret = i2c_hid_set_power(client, I2C_HID_PWR_ON);
+
        if (ret)
                return ret;
 
@@ -1304,30 +1236,8 @@ static int i2c_hid_resume(struct device *dev)
 }
 #endif
 
-#ifdef CONFIG_PM
-static int i2c_hid_runtime_suspend(struct device *dev)
-{
-       struct i2c_client *client = to_i2c_client(dev);
-
-       i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
-       disable_irq(client->irq);
-       return 0;
-}
-
-static int i2c_hid_runtime_resume(struct device *dev)
-{
-       struct i2c_client *client = to_i2c_client(dev);
-
-       enable_irq(client->irq);
-       i2c_hid_set_power(client, I2C_HID_PWR_ON);
-       return 0;
-}
-#endif
-
 static const struct dev_pm_ops i2c_hid_pm = {
        SET_SYSTEM_SLEEP_PM_OPS(i2c_hid_suspend, i2c_hid_resume)
-       SET_RUNTIME_PM_OPS(i2c_hid_runtime_suspend, i2c_hid_runtime_resume,
-                          NULL)
 };
 
 static const struct i2c_device_id i2c_hid_id_table[] = {
index 75078c8..d31ea82 100644 (file)
@@ -322,6 +322,25 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = {
                },
                .driver_data = (void *)&sipodev_desc
        },
+       {
+               /*
+                * There are at least 2 Primebook C11B versions, the older
+                * version has a product-name of "Primebook C11B", and a
+                * bios version / release / firmware revision of:
+                * V2.1.2 / 05/03/2018 / 18.2
+                * The new version has "PRIMEBOOK C11B" as product-name and a
+                * bios version / release / firmware revision of:
+                * CFALKSW05_BIOS_V1.1.2 / 11/19/2018 / 19.2
+                * Only the older version needs this quirk, note the newer
+                * version will not match as it has a different product-name.
+                */
+               .ident = "Trekstor Primebook C11B",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Primebook C11B"),
+               },
+               .driver_data = (void *)&sipodev_desc
+       },
        {
                .ident = "Direkt-Tek DTLAPY116-2",
                .matches = {
index 1b0a0cc..513d7a4 100644 (file)
@@ -84,7 +84,7 @@ int ishtp_cl_alloc_tx_ring(struct ishtp_cl *cl)
        return  0;
 out:
        dev_err(&cl->device->dev, "error in allocating Tx pool\n");
-       ishtp_cl_free_rx_ring(cl);
+       ishtp_cl_free_tx_ring(cl);
        return  -ENOMEM;
 }
 
index c6c9ac0..30a91d0 100644 (file)
@@ -402,7 +402,7 @@ static void ishtp_hbm_cl_connect_res(struct ishtp_device *dev,
  * @dev: ISHTP device instance
  * @disconnect_req: disconnect request structure
  *
- * Disconnect request bus message from the fw. Send diconnect response.
+ * Disconnect request bus message from the fw. Send disconnect response.
  */
 static void ishtp_hbm_fw_disconnect_req(struct ishtp_device *dev,
        struct hbm_client_connect_request *disconnect_req)
index 4a7f8d3..203d27d 100644 (file)
@@ -202,6 +202,21 @@ static inline void wacom_schedule_work(struct wacom_wac *wacom_wac,
        }
 }
 
+/*
+ * Convert a signed 32-bit integer to an unsigned n-bit integer. Undoes
+ * the normally-helpful work of 'hid_snto32' for fields that use signed
+ * ranges for questionable reasons.
+ */
+static inline __u32 wacom_s32tou(s32 value, __u8 n)
+{
+       switch (n) {
+       case 8:  return ((__u8)value);
+       case 16: return ((__u16)value);
+       case 32: return ((__u32)value);
+       }
+       return value & (1 << (n - 1)) ? value & (~(~0U << n)) : value;
+}
+
 extern const struct hid_device_id wacom_ids[];
 
 void wacom_wac_irq(struct wacom_wac *wacom_wac, size_t len);
index 2b0a5b8..ccb7452 100644 (file)
@@ -2303,7 +2303,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
        case HID_DG_TOOLSERIALNUMBER:
                if (value) {
                        wacom_wac->serial[0] = (wacom_wac->serial[0] & ~0xFFFFFFFFULL);
-                       wacom_wac->serial[0] |= (__u32)value;
+                       wacom_wac->serial[0] |= wacom_s32tou(value, field->report_size);
                }
                return;
        case HID_DG_TWIST:
@@ -2319,15 +2319,17 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
                return;
        case WACOM_HID_WD_SERIALHI:
                if (value) {
+                       __u32 raw_value = wacom_s32tou(value, field->report_size);
+
                        wacom_wac->serial[0] = (wacom_wac->serial[0] & 0xFFFFFFFF);
-                       wacom_wac->serial[0] |= ((__u64)value) << 32;
+                       wacom_wac->serial[0] |= ((__u64)raw_value) << 32;
                        /*
                         * Non-USI EMR devices may contain additional tool type
                         * information here. See WACOM_HID_WD_TOOLTYPE case for
                         * more details.
                         */
                        if (value >> 20 == 1) {
-                               wacom_wac->id[0] |= value & 0xFFFFF;
+                               wacom_wac->id[0] |= raw_value & 0xFFFFF;
                        }
                }
                return;
@@ -2339,7 +2341,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
                 * bitwise OR so the complete value can be built
                 * up over time :(
                 */
-               wacom_wac->id[0] |= value;
+               wacom_wac->id[0] |= wacom_s32tou(value, field->report_size);
                return;
        case WACOM_HID_WD_OFFSETLEFT:
                if (features->offset_left && value != features->offset_left)
index addcef5..8eb1675 100644 (file)
@@ -407,7 +407,15 @@ void hv_process_channel_removal(struct vmbus_channel *channel)
                cpumask_clear_cpu(channel->target_cpu,
                                  &primary_channel->alloced_cpus_in_node);
 
-       vmbus_release_relid(channel->offermsg.child_relid);
+       /*
+        * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
+        * the relid is invalidated; after hibernation, when the user-space app
+        * destroys the channel, the relid is INVALID_RELID, and in this case
+        * it's unnecessary and unsafe to release the old relid, since the same
+        * relid can refer to a completely different channel now.
+        */
+       if (channel->offermsg.child_relid != INVALID_RELID)
+               vmbus_release_relid(channel->offermsg.child_relid);
 
        free_channel(channel);
 }
@@ -545,6 +553,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 
        mutex_lock(&vmbus_connection.channel_mutex);
 
+       /* Remember the channels that should be cleaned up upon suspend. */
+       if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
+               atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
+
        /*
         * Now that we have acquired the channel_mutex,
         * we can release the potentially racing rescind thread.
@@ -847,6 +859,67 @@ void vmbus_initiate_unload(bool crash)
                vmbus_wait_for_unload();
 }
 
+static void check_ready_for_resume_event(void)
+{
+       /*
+        * If all the old primary channels have been fixed up, then it's safe
+        * to resume.
+        */
+       if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
+               complete(&vmbus_connection.ready_for_resume_event);
+}
+
+static void vmbus_setup_channel_state(struct vmbus_channel *channel,
+                                     struct vmbus_channel_offer_channel *offer)
+{
+       /*
+        * Setup state for signalling the host.
+        */
+       channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
+
+       if (vmbus_proto_version != VERSION_WS2008) {
+               channel->is_dedicated_interrupt =
+                               (offer->is_dedicated_interrupt != 0);
+               channel->sig_event = offer->connection_id;
+       }
+
+       memcpy(&channel->offermsg, offer,
+              sizeof(struct vmbus_channel_offer_channel));
+       channel->monitor_grp = (u8)offer->monitorid / 32;
+       channel->monitor_bit = (u8)offer->monitorid % 32;
+}
+
+/*
+ * find_primary_channel_by_offer - Get the channel object given the new offer.
+ * This is only used in the resume path of hibernation.
+ */
+static struct vmbus_channel *
+find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
+{
+       struct vmbus_channel *channel = NULL, *iter;
+       const guid_t *inst1, *inst2;
+
+       /* Ignore sub-channel offers. */
+       if (offer->offer.sub_channel_index != 0)
+               return NULL;
+
+       mutex_lock(&vmbus_connection.channel_mutex);
+
+       list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
+               inst1 = &iter->offermsg.offer.if_instance;
+               inst2 = &offer->offer.if_instance;
+
+               if (guid_equal(inst1, inst2)) {
+                       channel = iter;
+                       break;
+               }
+       }
+
+       mutex_unlock(&vmbus_connection.channel_mutex);
+
+       return channel;
+}
+
 /*
  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
  *
@@ -854,12 +927,58 @@ void vmbus_initiate_unload(bool crash)
 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 {
        struct vmbus_channel_offer_channel *offer;
-       struct vmbus_channel *newchannel;
+       struct vmbus_channel *oldchannel, *newchannel;
+       size_t offer_sz;
 
        offer = (struct vmbus_channel_offer_channel *)hdr;
 
        trace_vmbus_onoffer(offer);
 
+       oldchannel = find_primary_channel_by_offer(offer);
+
+       if (oldchannel != NULL) {
+               atomic_dec(&vmbus_connection.offer_in_progress);
+
+               /*
+                * We're resuming from hibernation: all the sub-channel and
+                * hv_sock channels we had before the hibernation should have
+                * been cleaned up, and now we must be seeing a re-offered
+                * primary channel that we had before the hibernation.
+                */
+
+               WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
+               /* Fix up the relid. */
+               oldchannel->offermsg.child_relid = offer->child_relid;
+
+               offer_sz = sizeof(*offer);
+               if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) {
+                       check_ready_for_resume_event();
+                       return;
+               }
+
+               /*
+                * This is not an error, since the host can also change the
+                * other field(s) of the offer, e.g. on WS RS5 (Build 17763),
+                * the offer->connection_id of the Mellanox VF vmbus device
+                * can change when the host reoffers the device upon resume.
+                */
+               pr_debug("vmbus offer changed: relid=%d\n",
+                        offer->child_relid);
+
+               print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
+                                    16, 4, &oldchannel->offermsg, offer_sz,
+                                    false);
+               print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
+                                    16, 4, offer, offer_sz, false);
+
+               /* Fix up the old channel. */
+               vmbus_setup_channel_state(oldchannel, offer);
+
+               check_ready_for_resume_event();
+
+               return;
+       }
+
        /* Allocate the channel object and save this offer. */
        newchannel = alloc_channel();
        if (!newchannel) {
@@ -869,25 +988,21 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
                return;
        }
 
-       /*
-        * Setup state for signalling the host.
-        */
-       newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID;
-
-       if (vmbus_proto_version != VERSION_WS2008) {
-               newchannel->is_dedicated_interrupt =
-                               (offer->is_dedicated_interrupt != 0);
-               newchannel->sig_event = offer->connection_id;
-       }
-
-       memcpy(&newchannel->offermsg, offer,
-              sizeof(struct vmbus_channel_offer_channel));
-       newchannel->monitor_grp = (u8)offer->monitorid / 32;
-       newchannel->monitor_bit = (u8)offer->monitorid % 32;
+       vmbus_setup_channel_state(newchannel, offer);
 
        vmbus_process_offer(newchannel);
 }
 
+static void check_ready_for_suspend_event(void)
+{
+       /*
+        * If all the sub-channels or hv_sock channels have been cleaned up,
+        * then it's safe to suspend.
+        */
+       if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
+               complete(&vmbus_connection.ready_for_suspend_event);
+}
+
 /*
  * vmbus_onoffer_rescind - Rescind offer handler.
  *
@@ -898,6 +1013,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
        struct vmbus_channel_rescind_offer *rescind;
        struct vmbus_channel *channel;
        struct device *dev;
+       bool clean_up_chan_for_suspend;
 
        rescind = (struct vmbus_channel_rescind_offer *)hdr;
 
@@ -937,6 +1053,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
                return;
        }
 
+       clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
+                                   is_sub_channel(channel);
        /*
         * Before setting channel->rescind in vmbus_rescind_cleanup(), we
         * should make sure the channel callback is not running any more.
@@ -962,6 +1080,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
        if (channel->device_obj) {
                if (channel->chn_rescind_callback) {
                        channel->chn_rescind_callback(channel);
+
+                       if (clean_up_chan_for_suspend)
+                               check_ready_for_suspend_event();
+
                        return;
                }
                /*
@@ -994,6 +1116,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
                }
                mutex_unlock(&vmbus_connection.channel_mutex);
        }
+
+       /* The "channel" may have been freed. Do not access it any longer. */
+
+       if (clean_up_chan_for_suspend)
+               check_ready_for_suspend_event();
 }
 
 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
index 09829e1..6e4c015 100644 (file)
 struct vmbus_connection vmbus_connection = {
        .conn_state             = DISCONNECTED,
        .next_gpadl_handle      = ATOMIC_INIT(0xE1E10),
+
+       .ready_for_suspend_event= COMPLETION_INITIALIZER(
+                                 vmbus_connection.ready_for_suspend_event),
+       .ready_for_resume_event = COMPLETION_INITIALIZER(
+                                 vmbus_connection.ready_for_resume_event),
 };
 EXPORT_SYMBOL_GPL(vmbus_connection);
 
@@ -59,8 +64,7 @@ static __u32 vmbus_get_next_version(__u32 current_version)
        }
 }
 
-static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
-                                       __u32 version)
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
 {
        int ret = 0;
        unsigned int cur_cpu;
index 6188fb7..fcc5279 100644 (file)
@@ -154,7 +154,7 @@ void hv_synic_free(void)
  * retrieve the initialized message and event pages.  Otherwise, we create and
  * initialize the message and event pages.
  */
-int hv_synic_init(unsigned int cpu)
+void hv_synic_enable_regs(unsigned int cpu)
 {
        struct hv_per_cpu_context *hv_cpu
                = per_cpu_ptr(hv_context.cpu_context, cpu);
@@ -196,6 +196,11 @@ int hv_synic_init(unsigned int cpu)
        sctrl.enable = 1;
 
        hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_init(unsigned int cpu)
+{
+       hv_synic_enable_regs(cpu);
 
        hv_stimer_init(cpu);
 
@@ -205,20 +210,45 @@ int hv_synic_init(unsigned int cpu)
 /*
  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
  */
-int hv_synic_cleanup(unsigned int cpu)
+void hv_synic_disable_regs(unsigned int cpu)
 {
        union hv_synic_sint shared_sint;
        union hv_synic_simp simp;
        union hv_synic_siefp siefp;
        union hv_synic_scontrol sctrl;
+
+       hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+       shared_sint.masked = 1;
+
+       /* Need to correctly cleanup in the case of SMP!!! */
+       /* Disable the interrupt */
+       hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+       hv_get_simp(simp.as_uint64);
+       simp.simp_enabled = 0;
+       simp.base_simp_gpa = 0;
+
+       hv_set_simp(simp.as_uint64);
+
+       hv_get_siefp(siefp.as_uint64);
+       siefp.siefp_enabled = 0;
+       siefp.base_siefp_gpa = 0;
+
+       hv_set_siefp(siefp.as_uint64);
+
+       /* Disable the global synic bit */
+       hv_get_synic_state(sctrl.as_uint64);
+       sctrl.enable = 0;
+       hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_cleanup(unsigned int cpu)
+{
        struct vmbus_channel *channel, *sc;
        bool channel_found = false;
        unsigned long flags;
 
-       hv_get_synic_state(sctrl.as_uint64);
-       if (sctrl.enable != 1)
-               return -EFAULT;
-
        /*
         * Search for channels which are bound to the CPU we're about to
         * cleanup. In case we find one and vmbus is still connected we need to
@@ -249,29 +279,7 @@ int hv_synic_cleanup(unsigned int cpu)
 
        hv_stimer_cleanup(cpu);
 
-       hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
-       shared_sint.masked = 1;
-
-       /* Need to correctly cleanup in the case of SMP!!! */
-       /* Disable the interrupt */
-       hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
-       hv_get_simp(simp.as_uint64);
-       simp.simp_enabled = 0;
-       simp.base_simp_gpa = 0;
-
-       hv_set_simp(simp.as_uint64);
-
-       hv_get_siefp(siefp.as_uint64);
-       siefp.siefp_enabled = 0;
-       siefp.base_siefp_gpa = 0;
-
-       hv_set_siefp(siefp.as_uint64);
-
-       /* Disable the global synic bit */
-       sctrl.enable = 0;
-       hv_set_synic_state(sctrl.as_uint64);
+       hv_synic_disable_regs(cpu);
 
        return 0;
 }
index 6fb4ea5..34bd735 100644 (file)
@@ -494,7 +494,7 @@ enum hv_dm_state {
 
 
 static __u8 recv_buffer[PAGE_SIZE];
-static __u8 *send_buffer;
+static __u8 balloon_up_send_buffer[PAGE_SIZE];
 #define PAGES_IN_2M    512
 #define HA_CHUNK (32 * 1024)
 
@@ -1292,8 +1292,8 @@ static void balloon_up(struct work_struct *dummy)
        }
 
        while (!done) {
-               bl_resp = (struct dm_balloon_response *)send_buffer;
-               memset(send_buffer, 0, PAGE_SIZE);
+               memset(balloon_up_send_buffer, 0, PAGE_SIZE);
+               bl_resp = (struct dm_balloon_response *)balloon_up_send_buffer;
                bl_resp->hdr.type = DM_BALLOON_RESPONSE;
                bl_resp->hdr.size = sizeof(struct dm_balloon_response);
                bl_resp->more_pages = 1;
@@ -1564,58 +1564,18 @@ static void balloon_onchannelcallback(void *context)
 
 }
 
-static int balloon_probe(struct hv_device *dev,
-                       const struct hv_vmbus_device_id *dev_id)
+static int balloon_connect_vsp(struct hv_device *dev)
 {
-       int ret;
-       unsigned long t;
        struct dm_version_request version_req;
        struct dm_capabilities cap_msg;
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-       do_hot_add = hot_add;
-#else
-       do_hot_add = false;
-#endif
-
-       /*
-        * First allocate a send buffer.
-        */
-
-       send_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
-       if (!send_buffer)
-               return -ENOMEM;
+       unsigned long t;
+       int ret;
 
        ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0,
-                       balloon_onchannelcallback, dev);
-
+                        balloon_onchannelcallback, dev);
        if (ret)
-               goto probe_error0;
+               return ret;
 
-       dm_device.dev = dev;
-       dm_device.state = DM_INITIALIZING;
-       dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
-       init_completion(&dm_device.host_event);
-       init_completion(&dm_device.config_event);
-       INIT_LIST_HEAD(&dm_device.ha_region_list);
-       spin_lock_init(&dm_device.ha_lock);
-       INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
-       INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
-       dm_device.host_specified_ha_region = false;
-
-       dm_device.thread =
-                kthread_run(dm_thread_func, &dm_device, "hv_balloon");
-       if (IS_ERR(dm_device.thread)) {
-               ret = PTR_ERR(dm_device.thread);
-               goto probe_error1;
-       }
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-       set_online_page_callback(&hv_online_page);
-       register_memory_notifier(&hv_memory_nb);
-#endif
-
-       hv_set_drvdata(dev, &dm_device);
        /*
         * Initiate the hand shake with the host and negotiate
         * a version that the host can support. We start with the
@@ -1631,16 +1591,15 @@ static int balloon_probe(struct hv_device *dev,
        dm_device.version = version_req.version.version;
 
        ret = vmbus_sendpacket(dev->channel, &version_req,
-                               sizeof(struct dm_version_request),
-                               (unsigned long)NULL,
-                               VM_PKT_DATA_INBAND, 0);
+                              sizeof(struct dm_version_request),
+                              (unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
        if (ret)
-               goto probe_error2;
+               goto out;
 
        t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
        if (t == 0) {
                ret = -ETIMEDOUT;
-               goto probe_error2;
+               goto out;
        }
 
        /*
@@ -1648,8 +1607,8 @@ static int balloon_probe(struct hv_device *dev,
         * fail the probe function.
         */
        if (dm_device.state == DM_INIT_ERROR) {
-               ret = -ETIMEDOUT;
-               goto probe_error2;
+               ret = -EPROTO;
+               goto out;
        }
 
        pr_info("Using Dynamic Memory protocol version %u.%u\n",
@@ -1682,16 +1641,15 @@ static int balloon_probe(struct hv_device *dev,
        cap_msg.max_page_number = -1;
 
        ret = vmbus_sendpacket(dev->channel, &cap_msg,
-                               sizeof(struct dm_capabilities),
-                               (unsigned long)NULL,
-                               VM_PKT_DATA_INBAND, 0);
+                              sizeof(struct dm_capabilities),
+                              (unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
        if (ret)
-               goto probe_error2;
+               goto out;
 
        t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
        if (t == 0) {
                ret = -ETIMEDOUT;
-               goto probe_error2;
+               goto out;
        }
 
        /*
@@ -1699,25 +1657,65 @@ static int balloon_probe(struct hv_device *dev,
         * fail the probe function.
         */
        if (dm_device.state == DM_INIT_ERROR) {
-               ret = -ETIMEDOUT;
-               goto probe_error2;
+               ret = -EPROTO;
+               goto out;
        }
 
+       return 0;
+out:
+       vmbus_close(dev->channel);
+       return ret;
+}
+
+static int balloon_probe(struct hv_device *dev,
+                        const struct hv_vmbus_device_id *dev_id)
+{
+       int ret;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+       do_hot_add = hot_add;
+#else
+       do_hot_add = false;
+#endif
+       dm_device.dev = dev;
+       dm_device.state = DM_INITIALIZING;
+       dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
+       init_completion(&dm_device.host_event);
+       init_completion(&dm_device.config_event);
+       INIT_LIST_HEAD(&dm_device.ha_region_list);
+       spin_lock_init(&dm_device.ha_lock);
+       INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
+       INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
+       dm_device.host_specified_ha_region = false;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+       set_online_page_callback(&hv_online_page);
+       register_memory_notifier(&hv_memory_nb);
+#endif
+
+       hv_set_drvdata(dev, &dm_device);
+
+       ret = balloon_connect_vsp(dev);
+       if (ret != 0)
+               return ret;
+
        dm_device.state = DM_INITIALIZED;
-       last_post_time = jiffies;
+
+       dm_device.thread =
+                kthread_run(dm_thread_func, &dm_device, "hv_balloon");
+       if (IS_ERR(dm_device.thread)) {
+               ret = PTR_ERR(dm_device.thread);
+               goto probe_error;
+       }
 
        return 0;
 
-probe_error2:
+probe_error:
+       vmbus_close(dev->channel);
 #ifdef CONFIG_MEMORY_HOTPLUG
+       unregister_memory_notifier(&hv_memory_nb);
        restore_online_page_callback(&hv_online_page);
 #endif
-       kthread_stop(dm_device.thread);
-
-probe_error1:
-       vmbus_close(dev->channel);
-probe_error0:
-       kfree(send_buffer);
        return ret;
 }
 
@@ -1734,12 +1732,11 @@ static int balloon_remove(struct hv_device *dev)
        cancel_work_sync(&dm->balloon_wrk.wrk);
        cancel_work_sync(&dm->ha_wrk.wrk);
 
-       vmbus_close(dev->channel);
        kthread_stop(dm->thread);
-       kfree(send_buffer);
+       vmbus_close(dev->channel);
 #ifdef CONFIG_MEMORY_HOTPLUG
-       restore_online_page_callback(&hv_online_page);
        unregister_memory_notifier(&hv_memory_nb);
+       restore_online_page_callback(&hv_online_page);
 #endif
        spin_lock_irqsave(&dm_device.ha_lock, flags);
        list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {
index 50eaa1f..af9379a 100644 (file)
@@ -169,8 +169,10 @@ extern int hv_synic_alloc(void);
 
 extern void hv_synic_free(void);
 
+extern void hv_synic_enable_regs(unsigned int cpu);
 extern int hv_synic_init(unsigned int cpu);
 
+extern void hv_synic_disable_regs(unsigned int cpu);
 extern int hv_synic_cleanup(unsigned int cpu);
 
 /* Interface */
@@ -256,6 +258,32 @@ struct vmbus_connection {
        struct workqueue_struct *work_queue;
        struct workqueue_struct *handle_primary_chan_wq;
        struct workqueue_struct *handle_sub_chan_wq;
+
+       /*
+        * The number of sub-channels and hv_sock channels that should be
+        * cleaned up upon suspend: sub-channels will be re-created upon
+        * resume, and hv_sock channels should not survive suspend.
+        */
+       atomic_t nr_chan_close_on_suspend;
+       /*
+        * vmbus_bus_suspend() waits for "nr_chan_close_on_suspend" to
+        * drop to zero.
+        */
+       struct completion ready_for_suspend_event;
+
+       /*
+        * The number of primary channels that should be "fixed up"
+        * upon resume: these channels are re-offered upon resume, and some
+        * fields of the channel offers (i.e. child_relid and connection_id)
+        * can change, so the old offermsg must be fixed up, before the resume
+        * callbacks of the VSC drivers start to further touch the channels.
+        */
+       atomic_t nr_chan_fixup_on_resume;
+       /*
+        * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to
+        * drop to zero.
+        */
+       struct completion ready_for_resume_event;
 };
 
 
@@ -270,6 +298,8 @@ struct vmbus_msginfo {
 
 extern struct vmbus_connection vmbus_connection;
 
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version);
+
 static inline void vmbus_send_interrupt(u32 relid)
 {
        sync_set_bit(relid, vmbus_connection.send_int_page);
index ebd35fc..53a60c8 100644 (file)
 #include <linux/sched/task_stack.h>
 
 #include <asm/mshyperv.h>
+#include <linux/delay.h>
 #include <linux/notifier.h>
 #include <linux/ptrace.h>
 #include <linux/screen_info.h>
 #include <linux/kdebug.h>
 #include <linux/efi.h>
 #include <linux/random.h>
+#include <linux/syscore_ops.h>
 #include <clocksource/hyperv_timer.h>
 #include "hyperv_vmbus.h"
 
@@ -910,6 +912,45 @@ static void vmbus_shutdown(struct device *child_device)
                drv->shutdown(dev);
 }
 
+#ifdef CONFIG_PM_SLEEP
+/*
+ * vmbus_suspend - Suspend a vmbus device
+ */
+static int vmbus_suspend(struct device *child_device)
+{
+       struct hv_driver *drv;
+       struct hv_device *dev = device_to_hv_device(child_device);
+
+       /* The device may not be attached yet */
+       if (!child_device->driver)
+               return 0;
+
+       drv = drv_to_hv_drv(child_device->driver);
+       if (!drv->suspend)
+               return -EOPNOTSUPP;
+
+       return drv->suspend(dev);
+}
+
+/*
+ * vmbus_resume - Resume a vmbus device
+ */
+static int vmbus_resume(struct device *child_device)
+{
+       struct hv_driver *drv;
+       struct hv_device *dev = device_to_hv_device(child_device);
+
+       /* The device may not be attached yet */
+       if (!child_device->driver)
+               return 0;
+
+       drv = drv_to_hv_drv(child_device->driver);
+       if (!drv->resume)
+               return -EOPNOTSUPP;
+
+       return drv->resume(dev);
+}
+#endif /* CONFIG_PM_SLEEP */
 
 /*
  * vmbus_device_release - Final callback release of the vmbus child device
@@ -925,6 +966,14 @@ static void vmbus_device_release(struct device *device)
        kfree(hv_dev);
 }
 
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS: see the comment before vmbus_bus_pm.
+ */
+static const struct dev_pm_ops vmbus_pm = {
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume)
+};
+
 /* The one and only one */
 static struct bus_type  hv_bus = {
        .name =         "vmbus",
@@ -935,6 +984,7 @@ static struct bus_type  hv_bus = {
        .uevent =               vmbus_uevent,
        .dev_groups =           vmbus_dev_groups,
        .drv_groups =           vmbus_drv_groups,
+       .pm =                   &vmbus_pm,
 };
 
 struct onmessage_work_context {
@@ -1022,6 +1072,43 @@ msg_handled:
        vmbus_signal_eom(msg, message_type);
 }
 
+#ifdef CONFIG_PM_SLEEP
+/*
+ * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
+ * hibernation, because hv_sock connections can not persist across hibernation.
+ */
+static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
+{
+       struct onmessage_work_context *ctx;
+       struct vmbus_channel_rescind_offer *rescind;
+
+       WARN_ON(!is_hvsock_channel(channel));
+
+       /*
+        * sizeof(*ctx) is small and the allocation should really not fail,
+        * otherwise the state of the hv_sock connections ends up in limbo.
+        */
+       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL);
+
+       /*
+        * So far, these are not really used by Linux. Just set them to the
+        * reasonable values conforming to the definitions of the fields.
+        */
+       ctx->msg.header.message_type = 1;
+       ctx->msg.header.payload_size = sizeof(*rescind);
+
+       /* These values are actually used by Linux. */
+       rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.u.payload;
+       rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
+       rescind->child_relid = channel->offermsg.child_relid;
+
+       INIT_WORK(&ctx->work, vmbus_onmessage_work);
+
+       queue_work_on(vmbus_connection.connect_cpu,
+                     vmbus_connection.work_queue,
+                     &ctx->work);
+}
+#endif /* CONFIG_PM_SLEEP */
 
 /*
  * Direct callback for channels using other deferred processing
@@ -2042,6 +2129,131 @@ acpi_walk_err:
        return ret_val;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int vmbus_bus_suspend(struct device *dev)
+{
+       struct vmbus_channel *channel, *sc;
+       unsigned long flags;
+
+       while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
+               /*
+                * We wait here until the completion of any channel
+                * offers that are currently in progress.
+                */
+               msleep(1);
+       }
+
+       mutex_lock(&vmbus_connection.channel_mutex);
+       list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+               if (!is_hvsock_channel(channel))
+                       continue;
+
+               vmbus_force_channel_rescinded(channel);
+       }
+       mutex_unlock(&vmbus_connection.channel_mutex);
+
+       /*
+        * Wait until all the sub-channels and hv_sock channels have been
+        * cleaned up. Sub-channels should be destroyed upon suspend, otherwise
+        * they would conflict with the new sub-channels that will be created
+        * in the resume path. hv_sock channels should also be destroyed, but
+        * a hv_sock channel of an established hv_sock connection can not be
+        * really destroyed since it may still be referenced by the userspace
+        * application, so we just force the hv_sock channel to be rescinded
+        * by vmbus_force_channel_rescinded(), and the userspace application
+        * will thoroughly destroy the channel after hibernation.
+        *
+        * Note: the counter nr_chan_close_on_suspend may never go above 0 if
+        * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM.
+        */
+       if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
+               wait_for_completion(&vmbus_connection.ready_for_suspend_event);
+
+       WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0);
+
+       mutex_lock(&vmbus_connection.channel_mutex);
+
+       list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+               /*
+                * Invalidate the field. Upon resume, vmbus_onoffer() will fix
+                * up the field, and the other fields (if necessary).
+                */
+               channel->offermsg.child_relid = INVALID_RELID;
+
+               if (is_hvsock_channel(channel)) {
+                       if (!channel->rescind) {
+                               pr_err("hv_sock channel not rescinded!\n");
+                               WARN_ON_ONCE(1);
+                       }
+                       continue;
+               }
+
+               spin_lock_irqsave(&channel->lock, flags);
+               list_for_each_entry(sc, &channel->sc_list, sc_list) {
+                       pr_err("Sub-channel not deleted!\n");
+                       WARN_ON_ONCE(1);
+               }
+               spin_unlock_irqrestore(&channel->lock, flags);
+
+               atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
+       }
+
+       mutex_unlock(&vmbus_connection.channel_mutex);
+
+       vmbus_initiate_unload(false);
+
+       vmbus_connection.conn_state = DISCONNECTED;
+
+       /* Reset the event for the next resume. */
+       reinit_completion(&vmbus_connection.ready_for_resume_event);
+
+       return 0;
+}
+
+static int vmbus_bus_resume(struct device *dev)
+{
+       struct vmbus_channel_msginfo *msginfo;
+       size_t msgsize;
+       int ret;
+
+       /*
+        * We only use the 'vmbus_proto_version', which was in use before
+        * hibernation, to re-negotiate with the host.
+        */
+       if (vmbus_proto_version == VERSION_INVAL ||
+           vmbus_proto_version == 0) {
+               pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version);
+               return -EINVAL;
+       }
+
+       msgsize = sizeof(*msginfo) +
+                 sizeof(struct vmbus_channel_initiate_contact);
+
+       msginfo = kzalloc(msgsize, GFP_KERNEL);
+
+       if (msginfo == NULL)
+               return -ENOMEM;
+
+       ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
+
+       kfree(msginfo);
+
+       if (ret != 0)
+               return ret;
+
+       WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
+
+       vmbus_request_offers();
+
+       wait_for_completion(&vmbus_connection.ready_for_resume_event);
+
+       /* Reset the event for the next suspend. */
+       reinit_completion(&vmbus_connection.ready_for_suspend_event);
+
+       return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
 static const struct acpi_device_id vmbus_acpi_device_ids[] = {
        {"VMBUS", 0},
        {"VMBus", 0},
@@ -2049,6 +2261,19 @@ static const struct acpi_device_id vmbus_acpi_device_ids[] = {
 };
 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
 
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS, otherwise NIC SR-IOV can not work, because the
+ * "pci_dev_pm_ops" uses the "noirq" callbacks: in the resume path, the
+ * pci "noirq" restore callback runs before "non-noirq" callbacks (see
+ * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() ->
+ * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's
+ * resume callback must also run via the "noirq" callbacks.
+ */
+static const struct dev_pm_ops vmbus_bus_pm = {
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_bus_suspend, vmbus_bus_resume)
+};
+
 static struct acpi_driver vmbus_acpi_driver = {
        .name = "vmbus",
        .ids = vmbus_acpi_device_ids,
@@ -2056,6 +2281,7 @@ static struct acpi_driver vmbus_acpi_driver = {
                .add = vmbus_acpi_add,
                .remove = vmbus_acpi_remove,
        },
+       .drv.pm = &vmbus_bus_pm,
 };
 
 static void hv_kexec_handler(void)
@@ -2086,6 +2312,47 @@ static void hv_crash_handler(struct pt_regs *regs)
        hyperv_cleanup();
 };
 
+static int hv_synic_suspend(void)
+{
+       /*
+        * When we reach here, all the non-boot CPUs have been offlined, and
+        * the stimers on them have been unbound in hv_synic_cleanup() ->
+        * hv_stimer_cleanup() -> clockevents_unbind_device().
+        *
+        * hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here
+        * we do not unbind the stimer on CPU0 because: 1) it's unnecessary
+        * because the interrupts remain disabled between syscore_suspend()
+        * and syscore_resume(): see create_image() and resume_target_kernel();
+        * 2) the stimer on CPU0 is automatically disabled later by
+        * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
+        * -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning
+        * would be triggered if we call clockevents_unbind_device(), which
+        * may sleep, in an interrupts-disabled context. So, we intentionally
+        * don't call hv_stimer_cleanup(0) here.
+        */
+
+       hv_synic_disable_regs(0);
+
+       return 0;
+}
+
+static void hv_synic_resume(void)
+{
+       hv_synic_enable_regs(0);
+
+       /*
+        * Note: we don't need to call hv_stimer_init(0), because the timer
+        * on CPU0 is not unbound in hv_synic_suspend(), and the timer is
+        * automatically re-enabled in timekeeping_resume().
+        */
+}
+
+/* The callbacks run only on CPU0, with irqs_disabled. */
+static struct syscore_ops hv_synic_syscore_ops = {
+       .suspend = hv_synic_suspend,
+       .resume = hv_synic_resume,
+};
+
 static int __init hv_acpi_init(void)
 {
        int ret, t;
@@ -2116,6 +2383,8 @@ static int __init hv_acpi_init(void)
        hv_setup_kexec_handler(hv_kexec_handler);
        hv_setup_crash_handler(hv_crash_handler);
 
+       register_syscore_ops(&hv_synic_syscore_ops);
+
        return 0;
 
 cleanup:
@@ -2128,6 +2397,8 @@ static void __exit vmbus_exit(void)
 {
        int cpu;
 
+       unregister_syscore_ops(&hv_synic_syscore_ops);
+
        hv_remove_kexec_handler();
        hv_remove_crash_handler();
        vmbus_connection.conn_state = DISCONNECTED;
index 0037e2b..8a51dcf 100644 (file)
@@ -170,7 +170,7 @@ static inline int ina3221_wait_for_data(struct ina3221_data *ina)
 
        /* Polling the CVRF bit to make sure read data is ready */
        return regmap_field_read_poll_timeout(ina->fields[F_CVRF],
-                                             cvrf, cvrf, wait, 100000);
+                                             cvrf, cvrf, wait, wait * 2);
 }
 
 static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg,
index 95b447c..281c81e 100644 (file)
 #define FANCTL1_FMR_REG                0x00    /* Bank 3; 1 reg per channel */
 #define FANCTL1_OUT_REG                0x10    /* Bank 3; 1 reg per channel */
 
+#define VOLT_MONITOR_MODE      0x0
+#define THERMAL_DIODE_MODE     0x1
+#define THERMISTOR_MODE                0x3
+
 #define ENABLE_TSI     BIT(1)
 
 static const unsigned short normal_i2c[] = {
@@ -99,6 +103,8 @@ struct nct7904_data {
        u8 enable_dts;
        u8 has_dts;
        u8 temp_mode; /* 0: TR mode, 1: TD mode */
+       u8 fan_alarm[2];
+       u8 vsen_alarm[3];
 };
 
 /* Access functions */
@@ -214,7 +220,15 @@ static int nct7904_read_fan(struct device *dev, u32 attr, int channel,
                                       SMI_STS5_REG + (channel >> 3));
                if (ret < 0)
                        return ret;
-               *val = (ret >> (channel & 0x07)) & 1;
+               if (!data->fan_alarm[channel >> 3])
+                       data->fan_alarm[channel >> 3] = ret & 0xff;
+               else
+                       /* If there is new alarm showing up */
+                       data->fan_alarm[channel >> 3] |= (ret & 0xff);
+               *val = (data->fan_alarm[channel >> 3] >> (channel & 0x07)) & 1;
+               /* Needs to clean the alarm if alarm existing */
+               if (*val)
+                       data->fan_alarm[channel >> 3] ^= 1 << (channel & 0x07);
                return 0;
        default:
                return -EOPNOTSUPP;
@@ -298,7 +312,15 @@ static int nct7904_read_in(struct device *dev, u32 attr, int channel,
                                       SMI_STS1_REG + (index >> 3));
                if (ret < 0)
                        return ret;
-               *val = (ret >> (index & 0x07)) & 1;
+               if (!data->vsen_alarm[index >> 3])
+                       data->vsen_alarm[index >> 3] = ret & 0xff;
+               else
+                       /* If there is new alarm showing up */
+                       data->vsen_alarm[index >> 3] |= (ret & 0xff);
+               *val = (data->vsen_alarm[index >> 3] >> (index & 0x07)) & 1;
+               /* Needs to clean the alarm if alarm existing */
+               if (*val)
+                       data->vsen_alarm[index >> 3] ^= 1 << (index & 0x07);
                return 0;
        default:
                return -EOPNOTSUPP;
@@ -915,12 +937,20 @@ static int nct7904_probe(struct i2c_client *client,
 
        data->temp_mode = 0;
        for (i = 0; i < 4; i++) {
-               val = (ret & (0x03 << i)) >> (i * 2);
+               val = (ret >> (i * 2)) & 0x03;
                bit = (1 << i);
-               if (val == 0)
+               if (val == VOLT_MONITOR_MODE) {
                        data->tcpu_mask &= ~bit;
-               else if (val == 0x1 || val == 0x2)
+               } else if (val == THERMAL_DIODE_MODE && i < 2) {
                        data->temp_mode |= bit;
+                       data->vsen_mask &= ~(0x06 << (i * 2));
+               } else if (val == THERMISTOR_MODE) {
+                       data->vsen_mask &= ~(0x02 << (i * 2));
+               } else {
+                       /* Reserved */
+                       data->tcpu_mask &= ~bit;
+                       data->vsen_mask &= ~(0x06 << (i * 2));
+               }
        }
 
        /* PECI */
index 5587215..146ce40 100644 (file)
@@ -429,6 +429,7 @@ config I2C_AXXIA
        tristate "Axxia I2C controller"
        depends on ARCH_AXXIA || COMPILE_TEST
        default ARCH_AXXIA
+       select I2C_SLAVE
        help
          Say yes if you want to support the I2C bus on Axxia platforms.
 
@@ -977,7 +978,7 @@ config I2C_SIRF
          will be called i2c-sirf.
 
 config I2C_SPRD
-       bool "Spreadtrum I2C interface"
+       tristate "Spreadtrum I2C interface"
        depends on I2C=y && ARCH_SPRD
        help
          If you say yes to this option, support will be included for the
@@ -1309,6 +1310,20 @@ config I2C_ELEKTOR
          This support is also available as a module.  If so, the module
          will be called i2c-elektor.
 
+config I2C_ICY
+       tristate "ICY Zorro card"
+       depends on ZORRO
+       select I2C_ALGOPCF
+       help
+         This supports the PCF8584 Zorro bus I2C adapter, known as ICY.
+         Say Y if you own such an adapter.
+
+         This support is also available as a module.  If so, the module
+         will be called i2c-icy.
+
+         If you have a 2019 edition board with an LTC2990 sensor at address
+         0x4c, loading the module 'ltc2990' is sufficient to enable it.
+
 config I2C_MLXCPLD
        tristate "Mellanox I2C driver"
        depends on X86_64
index 80c2389..3ab8aeb 100644 (file)
@@ -140,6 +140,7 @@ obj-$(CONFIG_I2C_BCM_KONA)  += i2c-bcm-kona.o
 obj-$(CONFIG_I2C_BRCMSTB)      += i2c-brcmstb.o
 obj-$(CONFIG_I2C_CROS_EC_TUNNEL)       += i2c-cros-ec-tunnel.o
 obj-$(CONFIG_I2C_ELEKTOR)      += i2c-elektor.o
+obj-$(CONFIG_I2C_ICY)          += i2c-icy.o
 obj-$(CONFIG_I2C_MLXCPLD)      += i2c-mlxcpld.o
 obj-$(CONFIG_I2C_OPAL)         += i2c-opal.o
 obj-$(CONFIG_I2C_PCA_ISA)      += i2c-pca-isa.o
index fa66951..7b098ff 100644 (file)
 #define ASPEED_I2CD_S_TX_CMD                           BIT(2)
 #define ASPEED_I2CD_M_TX_CMD                           BIT(1)
 #define ASPEED_I2CD_M_START_CMD                                BIT(0)
+#define ASPEED_I2CD_MASTER_CMDS_MASK                                          \
+               (ASPEED_I2CD_M_STOP_CMD |                                      \
+                ASPEED_I2CD_M_S_RX_CMD_LAST |                                 \
+                ASPEED_I2CD_M_RX_CMD |                                        \
+                ASPEED_I2CD_M_TX_CMD |                                        \
+                ASPEED_I2CD_M_START_CMD)
 
 /* 0x18 : I2CD Slave Device Address Register   */
 #define ASPEED_I2CD_DEV_ADDR_MASK                      GENMASK(6, 0)
@@ -336,18 +342,19 @@ static void aspeed_i2c_do_start(struct aspeed_i2c_bus *bus)
        struct i2c_msg *msg = &bus->msgs[bus->msgs_index];
        u8 slave_addr = i2c_8bit_addr_from_msg(msg);
 
-       bus->master_state = ASPEED_I2C_MASTER_START;
-
 #if IS_ENABLED(CONFIG_I2C_SLAVE)
        /*
         * If it's requested in the middle of a slave session, set the master
         * state to 'pending' then H/W will continue handling this master
         * command when the bus comes back to the idle state.
         */
-       if (bus->slave_state != ASPEED_I2C_SLAVE_INACTIVE)
+       if (bus->slave_state != ASPEED_I2C_SLAVE_INACTIVE) {
                bus->master_state = ASPEED_I2C_MASTER_PENDING;
+               return;
+       }
 #endif /* CONFIG_I2C_SLAVE */
 
+       bus->master_state = ASPEED_I2C_MASTER_START;
        bus->buf_index = 0;
 
        if (msg->flags & I2C_M_RD) {
@@ -422,20 +429,6 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
                }
        }
 
-#if IS_ENABLED(CONFIG_I2C_SLAVE)
-       /*
-        * A pending master command will be started by H/W when the bus comes
-        * back to idle state after completing a slave operation so change the
-        * master state from 'pending' to 'start' at here if slave is inactive.
-        */
-       if (bus->master_state == ASPEED_I2C_MASTER_PENDING) {
-               if (bus->slave_state != ASPEED_I2C_SLAVE_INACTIVE)
-                       goto out_no_complete;
-
-               bus->master_state = ASPEED_I2C_MASTER_START;
-       }
-#endif /* CONFIG_I2C_SLAVE */
-
        /* Master is not currently active, irq was for someone else. */
        if (bus->master_state == ASPEED_I2C_MASTER_INACTIVE ||
            bus->master_state == ASPEED_I2C_MASTER_PENDING)
@@ -462,11 +455,15 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
 #if IS_ENABLED(CONFIG_I2C_SLAVE)
                /*
                 * If a peer master starts a xfer immediately after it queues a
-                * master command, change its state to 'pending' then H/W will
-                * continue the queued master xfer just after completing the
-                * slave mode session.
+                * master command, clear the queued master command and change
+                * its state to 'pending'. To simplify handling of pending
+                * cases, it uses S/W solution instead of H/W command queue
+                * handling.
                 */
                if (unlikely(irq_status & ASPEED_I2CD_INTR_SLAVE_MATCH)) {
+                       writel(readl(bus->base + ASPEED_I2C_CMD_REG) &
+                               ~ASPEED_I2CD_MASTER_CMDS_MASK,
+                              bus->base + ASPEED_I2C_CMD_REG);
                        bus->master_state = ASPEED_I2C_MASTER_PENDING;
                        dev_dbg(bus->dev,
                                "master goes pending due to a slave start\n");
@@ -629,6 +626,14 @@ static irqreturn_t aspeed_i2c_bus_irq(int irq, void *dev_id)
                        irq_handled |= aspeed_i2c_master_irq(bus,
                                                             irq_remaining);
        }
+
+       /*
+        * Start a pending master command at here if a slave operation is
+        * completed.
+        */
+       if (bus->master_state == ASPEED_I2C_MASTER_PENDING &&
+           bus->slave_state == ASPEED_I2C_SLAVE_INACTIVE)
+               aspeed_i2c_do_start(bus);
 #else
        irq_handled = aspeed_i2c_master_irq(bus, irq_remaining);
 #endif /* CONFIG_I2C_SLAVE */
@@ -691,6 +696,15 @@ static int aspeed_i2c_master_xfer(struct i2c_adapter *adap,
                     ASPEED_I2CD_BUS_BUSY_STS))
                        aspeed_i2c_recover_bus(bus);
 
+               /*
+                * If timed out and the state is still pending, drop the pending
+                * master command.
+                */
+               spin_lock_irqsave(&bus->lock, flags);
+               if (bus->master_state == ASPEED_I2C_MASTER_PENDING)
+                       bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
+               spin_unlock_irqrestore(&bus->lock, flags);
+
                return -ETIMEDOUT;
        }
 
index ff3142b..0214daa 100644 (file)
                                 MST_STATUS_IP)
 #define MST_TX_BYTES_XFRD      0x50
 #define MST_RX_BYTES_XFRD      0x54
+#define SLV_ADDR_DEC_CTL       0x58
+#define   SLV_ADDR_DEC_GCE     BIT(0)  /* ACK to General Call Address from own master (loopback) */
+#define   SLV_ADDR_DEC_OGCE    BIT(1)  /* ACK to General Call Address from external masters */
+#define   SLV_ADDR_DEC_SA1E    BIT(2)  /* ACK to addr_1 enabled */
+#define   SLV_ADDR_DEC_SA1M    BIT(3)  /* 10-bit addressing for addr_1 enabled */
+#define   SLV_ADDR_DEC_SA2E    BIT(4)  /* ACK to addr_2 enabled */
+#define   SLV_ADDR_DEC_SA2M    BIT(5)  /* 10-bit addressing for addr_2 enabled */
+#define SLV_ADDR_1             0x5c
+#define SLV_ADDR_2             0x60
+#define SLV_RX_CTL             0x64
+#define   SLV_RX_ACSA1         BIT(0)  /* Generate ACK for writes to addr_1 */
+#define   SLV_RX_ACSA2         BIT(1)  /* Generate ACK for writes to addr_2 */
+#define   SLV_RX_ACGCA         BIT(2)  /* ACK data phase transfers to General Call Address */
+#define SLV_DATA               0x68
+#define SLV_RX_FIFO            0x6c
+#define   SLV_FIFO_DV1         BIT(0)  /* Data Valid for addr_1 */
+#define   SLV_FIFO_DV2         BIT(1)  /* Data Valid for addr_2 */
+#define   SLV_FIFO_AS          BIT(2)  /* (N)ACK Sent */
+#define   SLV_FIFO_TNAK                BIT(3)  /* Timeout NACK */
+#define   SLV_FIFO_STRC                BIT(4)  /* First byte after start condition received */
+#define   SLV_FIFO_RSC         BIT(5)  /* Repeated Start Condition */
+#define   SLV_FIFO_STPC                BIT(6)  /* Stop Condition */
+#define   SLV_FIFO_DV          (SLV_FIFO_DV1 | SLV_FIFO_DV2)
+#define SLV_INT_ENABLE         0x70
+#define SLV_INT_STATUS         0x74
+#define   SLV_STATUS_RFH       BIT(0)  /* FIFO service */
+#define   SLV_STATUS_WTC       BIT(1)  /* Write transfer complete */
+#define   SLV_STATUS_SRS1      BIT(2)  /* Slave read from addr 1 */
+#define   SLV_STATUS_SRRS1     BIT(3)  /* Repeated start from addr 1 */
+#define   SLV_STATUS_SRND1     BIT(4)  /* Read request not following start condition */
+#define   SLV_STATUS_SRC1      BIT(5)  /* Read canceled */
+#define   SLV_STATUS_SRAT1     BIT(6)  /* Slave Read timed out */
+#define   SLV_STATUS_SRDRE1    BIT(7)  /* Data written after timed out */
+#define SLV_READ_DUMMY         0x78
 #define SCL_HIGH_PERIOD                0x80
 #define SCL_LOW_PERIOD         0x84
 #define SPIKE_FLTR_LEN         0x88
@@ -111,6 +145,8 @@ struct axxia_i2c_dev {
        struct clk *i2c_clk;
        u32 bus_clk_rate;
        bool last;
+       struct i2c_client *slave;
+       int irq;
 };
 
 static void i2c_int_disable(struct axxia_i2c_dev *idev, u32 mask)
@@ -276,13 +312,65 @@ static int axxia_i2c_fill_tx_fifo(struct axxia_i2c_dev *idev)
        return ret;
 }
 
+static void axxia_i2c_slv_fifo_event(struct axxia_i2c_dev *idev)
+{
+       u32 fifo_status = readl(idev->base + SLV_RX_FIFO);
+       u8 val;
+
+       dev_dbg(idev->dev, "slave irq fifo_status=0x%x\n", fifo_status);
+
+       if (fifo_status & SLV_FIFO_DV1) {
+               if (fifo_status & SLV_FIFO_STRC)
+                       i2c_slave_event(idev->slave,
+                                       I2C_SLAVE_WRITE_REQUESTED, &val);
+
+               val = readl(idev->base + SLV_DATA);
+               i2c_slave_event(idev->slave, I2C_SLAVE_WRITE_RECEIVED, &val);
+       }
+       if (fifo_status & SLV_FIFO_STPC) {
+               readl(idev->base + SLV_DATA); /* dummy read */
+               i2c_slave_event(idev->slave, I2C_SLAVE_STOP, &val);
+       }
+       if (fifo_status & SLV_FIFO_RSC)
+               readl(idev->base + SLV_DATA); /* dummy read */
+}
+
+static irqreturn_t axxia_i2c_slv_isr(struct axxia_i2c_dev *idev)
+{
+       u32 status = readl(idev->base + SLV_INT_STATUS);
+       u8 val;
+
+       dev_dbg(idev->dev, "slave irq status=0x%x\n", status);
+
+       if (status & SLV_STATUS_RFH)
+               axxia_i2c_slv_fifo_event(idev);
+       if (status & SLV_STATUS_SRS1) {
+               i2c_slave_event(idev->slave, I2C_SLAVE_READ_REQUESTED, &val);
+               writel(val, idev->base + SLV_DATA);
+       }
+       if (status & SLV_STATUS_SRND1) {
+               i2c_slave_event(idev->slave, I2C_SLAVE_READ_PROCESSED, &val);
+               writel(val, idev->base + SLV_DATA);
+       }
+       if (status & SLV_STATUS_SRC1)
+               i2c_slave_event(idev->slave, I2C_SLAVE_STOP, &val);
+
+       writel(INT_SLV, idev->base + INTERRUPT_STATUS);
+       return IRQ_HANDLED;
+}
+
 static irqreturn_t axxia_i2c_isr(int irq, void *_dev)
 {
        struct axxia_i2c_dev *idev = _dev;
+       irqreturn_t ret = IRQ_NONE;
        u32 status;
 
-       if (!(readl(idev->base + INTERRUPT_STATUS) & INT_MST))
-               return IRQ_NONE;
+       status = readl(idev->base + INTERRUPT_STATUS);
+
+       if (status & INT_SLV)
+               ret = axxia_i2c_slv_isr(idev);
+       if (!(status & INT_MST))
+               return ret;
 
        /* Read interrupt status bits */
        status = readl(idev->base + MST_INT_STATUS);
@@ -583,9 +671,58 @@ static u32 axxia_i2c_func(struct i2c_adapter *adap)
        return caps;
 }
 
+static int axxia_i2c_reg_slave(struct i2c_client *slave)
+{
+       struct axxia_i2c_dev *idev = i2c_get_adapdata(slave->adapter);
+       u32 slv_int_mask = SLV_STATUS_RFH;
+       u32 dec_ctl;
+
+       if (idev->slave)
+               return -EBUSY;
+
+       idev->slave = slave;
+
+       /* Enable slave mode as well */
+       writel(GLOBAL_MST_EN | GLOBAL_SLV_EN, idev->base + GLOBAL_CONTROL);
+       writel(INT_MST | INT_SLV, idev->base + INTERRUPT_ENABLE);
+
+       /* Set slave address */
+       dec_ctl = SLV_ADDR_DEC_SA1E;
+       if (slave->flags & I2C_CLIENT_TEN)
+               dec_ctl |= SLV_ADDR_DEC_SA1M;
+
+       writel(SLV_RX_ACSA1, idev->base + SLV_RX_CTL);
+       writel(dec_ctl, idev->base + SLV_ADDR_DEC_CTL);
+       writel(slave->addr, idev->base + SLV_ADDR_1);
+
+       /* Enable interrupts */
+       slv_int_mask |= SLV_STATUS_SRS1 | SLV_STATUS_SRRS1 | SLV_STATUS_SRND1;
+       slv_int_mask |= SLV_STATUS_SRC1;
+       writel(slv_int_mask, idev->base + SLV_INT_ENABLE);
+
+       return 0;
+}
+
+static int axxia_i2c_unreg_slave(struct i2c_client *slave)
+{
+       struct axxia_i2c_dev *idev = i2c_get_adapdata(slave->adapter);
+
+       /* Disable slave mode */
+       writel(GLOBAL_MST_EN, idev->base + GLOBAL_CONTROL);
+       writel(INT_MST, idev->base + INTERRUPT_ENABLE);
+
+       synchronize_irq(idev->irq);
+
+       idev->slave = NULL;
+
+       return 0;
+}
+
 static const struct i2c_algorithm axxia_i2c_algo = {
        .master_xfer = axxia_i2c_xfer,
        .functionality = axxia_i2c_func,
+       .reg_slave = axxia_i2c_reg_slave,
+       .unreg_slave = axxia_i2c_unreg_slave,
 };
 
 static const struct i2c_adapter_quirks axxia_i2c_quirks = {
@@ -599,7 +736,6 @@ static int axxia_i2c_probe(struct platform_device *pdev)
        struct axxia_i2c_dev *idev = NULL;
        struct resource *res;
        void __iomem *base;
-       int irq;
        int ret = 0;
 
        idev = devm_kzalloc(&pdev->dev, sizeof(*idev), GFP_KERNEL);
@@ -611,10 +747,10 @@ static int axxia_i2c_probe(struct platform_device *pdev)
        if (IS_ERR(base))
                return PTR_ERR(base);
 
-       irq = platform_get_irq(pdev, 0);
-       if (irq < 0) {
+       idev->irq = platform_get_irq(pdev, 0);
+       if (idev->irq < 0) {
                dev_err(&pdev->dev, "missing interrupt resource\n");
-               return irq;
+               return idev->irq;
        }
 
        idev->i2c_clk = devm_clk_get(&pdev->dev, "i2c");
@@ -643,10 +779,10 @@ static int axxia_i2c_probe(struct platform_device *pdev)
                goto error_disable_clk;
        }
 
-       ret = devm_request_irq(&pdev->dev, irq, axxia_i2c_isr, 0,
+       ret = devm_request_irq(&pdev->dev, idev->irq, axxia_i2c_isr, 0,
                               pdev->name, idev);
        if (ret) {
-               dev_err(&pdev->dev, "failed to claim IRQ%d\n", irq);
+               dev_err(&pdev->dev, "failed to claim IRQ%d\n", idev->irq);
                goto error_disable_clk;
        }
 
index 19ef2b0..9ffdffa 100644 (file)
@@ -808,7 +808,7 @@ static struct i2c_algorithm bcm_iproc_algo = {
        .unreg_slave = bcm_iproc_i2c_unreg_slave,
 };
 
-static struct i2c_adapter_quirks bcm_iproc_i2c_quirks = {
+static const struct i2c_adapter_quirks bcm_iproc_i2c_quirks = {
        .max_read_len = M_RX_MAX_READ_LEN,
 };
 
@@ -922,7 +922,9 @@ static int bcm_iproc_i2c_probe(struct platform_device *pdev)
 
        adap = &iproc_i2c->adapter;
        i2c_set_adapdata(adap, iproc_i2c);
-       strlcpy(adap->name, "Broadcom iProc I2C adapter", sizeof(adap->name));
+       snprintf(adap->name, sizeof(adap->name),
+               "Broadcom iProc (%s)",
+               of_node_full_name(iproc_i2c->device->of_node));
        adap->algo = &bcm_iproc_algo;
        adap->quirks = &bcm_iproc_i2c_quirks;
        adap->dev.parent = &pdev->dev;
index 67752f7..e01b2b5 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
@@ -389,7 +390,7 @@ static const struct i2c_algorithm bcm2835_i2c_algo = {
 };
 
 /*
- * This HW was reported to have problems with clock stretching:
+ * The BCM2835 was reported to have problems with clock stretching:
  * http://www.advamation.com/knowhow/raspberrypi/rpi-i2c-bug.html
  * https://www.raspberrypi.org/forums/viewtopic.php?p=146272
  */
@@ -471,11 +472,12 @@ static int bcm2835_i2c_probe(struct platform_device *pdev)
        i2c_set_adapdata(adap, i2c_dev);
        adap->owner = THIS_MODULE;
        adap->class = I2C_CLASS_DEPRECATED;
-       strlcpy(adap->name, "bcm2835 I2C adapter", sizeof(adap->name));
+       snprintf(adap->name, sizeof(adap->name), "bcm2835 (%s)",
+                of_node_full_name(pdev->dev.of_node));
        adap->algo = &bcm2835_i2c_algo;
        adap->dev.parent = &pdev->dev;
        adap->dev.of_node = pdev->dev.of_node;
-       adap->quirks = &bcm2835_i2c_quirks;
+       adap->quirks = of_device_get_match_data(&pdev->dev);
 
        bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, 0);
 
@@ -501,7 +503,8 @@ static int bcm2835_i2c_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id bcm2835_i2c_of_match[] = {
-       { .compatible = "brcm,bcm2835-i2c" },
+       { .compatible = "brcm,bcm2711-i2c" },
+       { .compatible = "brcm,bcm2835-i2c", .data = &bcm2835_i2c_quirks },
        {},
 };
 MODULE_DEVICE_TABLE(of, bcm2835_i2c_of_match);
index 66af44b..b8fde61 100644 (file)
@@ -178,6 +178,51 @@ static const struct i2c_algorithm cht_wc_i2c_adap_algo = {
        .smbus_xfer = cht_wc_i2c_adap_smbus_xfer,
 };
 
+/*
+ * We are an i2c-adapter which itself is part of an i2c-client. This means that
+ * transfers done through us take adapter->bus_lock twice, once for our parent
+ * i2c-adapter and once to take our own bus_lock. Lockdep does not like this
+ * nested locking, to make lockdep happy in the case of busses with muxes, the
+ * i2c-core's i2c_adapter_lock_bus function calls:
+ * rt_mutex_lock_nested(&adapter->bus_lock, i2c_adapter_depth(adapter));
+ *
+ * But i2c_adapter_depth only works when the direct parent of the adapter is
+ * another adapter, as it is only meant for muxes. In our case there is an
+ * i2c-client and MFD instantiated platform_device in the parent->child chain
+ * between the 2 devices.
+ *
+ * So we override the default i2c_lock_operations and pass a hardcoded
+ * depth of 1 to rt_mutex_lock_nested, to make lockdep happy.
+ *
+ * Note that if there were to be a mux attached to our adapter, this would
+ * break things again since the i2c-mux code expects the root-adapter to have
+ * a locking depth of 0. But we always have only 1 client directly attached
+ * in the form of the Charger IC paired with the CHT Whiskey Cove PMIC.
+ */
+static void cht_wc_i2c_adap_lock_bus(struct i2c_adapter *adapter,
+                                unsigned int flags)
+{
+       rt_mutex_lock_nested(&adapter->bus_lock, 1);
+}
+
+static int cht_wc_i2c_adap_trylock_bus(struct i2c_adapter *adapter,
+                                  unsigned int flags)
+{
+       return rt_mutex_trylock(&adapter->bus_lock);
+}
+
+static void cht_wc_i2c_adap_unlock_bus(struct i2c_adapter *adapter,
+                                  unsigned int flags)
+{
+       rt_mutex_unlock(&adapter->bus_lock);
+}
+
+static const struct i2c_lock_operations cht_wc_i2c_adap_lock_ops = {
+       .lock_bus =    cht_wc_i2c_adap_lock_bus,
+       .trylock_bus = cht_wc_i2c_adap_trylock_bus,
+       .unlock_bus =  cht_wc_i2c_adap_unlock_bus,
+};
+
 /**** irqchip for the client connected to the extchgr i2c adapter ****/
 static void cht_wc_i2c_irq_lock(struct irq_data *data)
 {
@@ -286,6 +331,7 @@ static int cht_wc_i2c_adap_i2c_probe(struct platform_device *pdev)
        adap->adapter.owner = THIS_MODULE;
        adap->adapter.class = I2C_CLASS_HWMON;
        adap->adapter.algo = &cht_wc_i2c_adap_algo;
+       adap->adapter.lock_ops = &cht_wc_i2c_adap_lock_ops;
        strlcpy(adap->adapter.name, "PMIC I2C Adapter",
                sizeof(adap->adapter.name));
        adap->adapter.dev.parent = &pdev->dev;
@@ -363,8 +409,7 @@ static int cht_wc_i2c_adap_i2c_remove(struct platform_device *pdev)
 {
        struct cht_wc_i2c_adap *adap = platform_get_drvdata(pdev);
 
-       if (adap->client)
-               i2c_unregister_device(adap->client);
+       i2c_unregister_device(adap->client);
        i2c_del_adapter(&adap->adapter);
        irq_domain_remove(adap->irq_domain);
 
index d464799..e8b3282 100644 (file)
@@ -655,15 +655,11 @@ static int i2c_dw_init_recovery_info(struct dw_i2c_dev *dev)
        struct i2c_bus_recovery_info *rinfo = &dev->rinfo;
        struct i2c_adapter *adap = &dev->adapter;
        struct gpio_desc *gpio;
-       int r;
-
-       gpio = devm_gpiod_get(dev->dev, "scl", GPIOD_OUT_HIGH);
-       if (IS_ERR(gpio)) {
-               r = PTR_ERR(gpio);
-               if (r == -ENOENT || r == -ENOSYS)
-                       return 0;
-               return r;
-       }
+
+       gpio = devm_gpiod_get_optional(dev->dev, "scl", GPIOD_OUT_HIGH);
+       if (IS_ERR_OR_NULL(gpio))
+               return PTR_ERR_OR_ZERO(gpio);
+
        rinfo->scl_gpiod = gpio;
 
        gpio = devm_gpiod_get_optional(dev->dev, "sda", GPIOD_IN);
index 76810de..050adda 100644 (file)
@@ -33,6 +33,7 @@ enum dw_pci_ctl_id_t {
        baytrail,
        cherrytrail,
        haswell,
+       elkhartlake,
 };
 
 struct dw_scl_sda_cfg {
@@ -168,13 +169,20 @@ static struct dw_pci_controller dw_pci_controllers[] = {
                .flags = MODEL_CHERRYTRAIL,
                .scl_sda_cfg = &byt_config,
        },
+       [elkhartlake] = {
+               .bus_num = -1,
+               .bus_cfg = INTEL_MID_STD_CFG | DW_IC_CON_SPEED_FAST,
+               .tx_fifo_depth = 32,
+               .rx_fifo_depth = 32,
+               .functionality = I2C_FUNC_10BIT_ADDR,
+               .clk_khz = 100000,
+       },
 };
 
 #ifdef CONFIG_PM
 static int i2c_dw_pci_suspend(struct device *dev)
 {
-       struct pci_dev *pdev = to_pci_dev(dev);
-       struct dw_i2c_dev *i_dev = pci_get_drvdata(pdev);
+       struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
 
        i_dev->suspended = true;
        i_dev->disable(i_dev);
@@ -184,8 +192,7 @@ static int i2c_dw_pci_suspend(struct device *dev)
 
 static int i2c_dw_pci_resume(struct device *dev)
 {
-       struct pci_dev *pdev = to_pci_dev(dev);
-       struct dw_i2c_dev *i_dev = pci_get_drvdata(pdev);
+       struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
        int ret;
 
        ret = i_dev->init(i_dev);
@@ -227,6 +234,8 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev,
                return r;
        }
 
+       pci_set_master(pdev);
+
        r = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev));
        if (r) {
                dev_err(&pdev->dev, "I/O memory remapping failed\n");
@@ -237,18 +246,24 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev,
        if (!dev)
                return -ENOMEM;
 
+       r = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+       if (r < 0)
+               return r;
+
        dev->clk = NULL;
        dev->controller = controller;
        dev->get_clk_rate_khz = i2c_dw_get_clk_rate_khz;
        dev->base = pcim_iomap_table(pdev)[0];
        dev->dev = &pdev->dev;
-       dev->irq = pdev->irq;
+       dev->irq = pci_irq_vector(pdev, 0);
        dev->flags |= controller->flags;
 
        if (controller->setup) {
                r = controller->setup(pdev, controller);
-               if (r)
+               if (r) {
+                       pci_free_irq_vectors(pdev);
                        return r;
+               }
        }
 
        dev->functionality = controller->functionality |
@@ -276,8 +291,10 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev,
        adap->nr = controller->bus_num;
 
        r = i2c_dw_probe(dev);
-       if (r)
+       if (r) {
+               pci_free_irq_vectors(pdev);
                return r;
+       }
 
        pm_runtime_set_autosuspend_delay(&pdev->dev, 1000);
        pm_runtime_use_autosuspend(&pdev->dev);
@@ -296,6 +313,7 @@ static void i2c_dw_pci_remove(struct pci_dev *pdev)
        pm_runtime_get_noresume(&pdev->dev);
 
        i2c_del_adapter(&dev->adapter);
+       pci_free_irq_vectors(pdev);
 }
 
 /* work with hotplug and coldplug */
@@ -331,6 +349,15 @@ static const struct pci_device_id i2_designware_pci_ids[] = {
        { PCI_VDEVICE(INTEL, 0x22C5), cherrytrail },
        { PCI_VDEVICE(INTEL, 0x22C6), cherrytrail },
        { PCI_VDEVICE(INTEL, 0x22C7), cherrytrail },
+       /* Elkhart Lake (PSE I2C) */
+       { PCI_VDEVICE(INTEL, 0x4bb9), elkhartlake },
+       { PCI_VDEVICE(INTEL, 0x4bba), elkhartlake },
+       { PCI_VDEVICE(INTEL, 0x4bbb), elkhartlake },
+       { PCI_VDEVICE(INTEL, 0x4bbc), elkhartlake },
+       { PCI_VDEVICE(INTEL, 0x4bbd), elkhartlake },
+       { PCI_VDEVICE(INTEL, 0x4bbe), elkhartlake },
+       { PCI_VDEVICE(INTEL, 0x4bbf), elkhartlake },
+       { PCI_VDEVICE(INTEL, 0x4bc0), elkhartlake },
        { 0,}
 };
 MODULE_DEVICE_TABLE(pci, i2_designware_pci_ids);
index ddfb818..16dd338 100644 (file)
@@ -279,12 +279,10 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, dev);
 
        dev->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL);
-       if (IS_ERR(dev->rst)) {
-               if (PTR_ERR(dev->rst) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
-       } else {
-               reset_control_deassert(dev->rst);
-       }
+       if (IS_ERR(dev->rst))
+               return PTR_ERR(dev->rst);
+
+       reset_control_deassert(dev->rst);
 
        t = &dev->timings;
        if (pdata)
@@ -346,8 +344,10 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 
        /* Optional interface clock */
        dev->pclk = devm_clk_get_optional(&pdev->dev, "pclk");
-       if (IS_ERR(dev->pclk))
-               return PTR_ERR(dev->pclk);
+       if (IS_ERR(dev->pclk)) {
+               ret = PTR_ERR(dev->pclk);
+               goto exit_reset;
+       }
 
        dev->clk = devm_clk_get(&pdev->dev, NULL);
        if (!i2c_dw_prepare_clk(dev, true)) {
@@ -400,8 +400,7 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 exit_probe:
        dw_i2c_plat_pm_cleanup(dev);
 exit_reset:
-       if (!IS_ERR_OR_NULL(dev->rst))
-               reset_control_assert(dev->rst);
+       reset_control_assert(dev->rst);
        return ret;
 }
 
@@ -419,8 +418,7 @@ static int dw_i2c_plat_remove(struct platform_device *pdev)
        pm_runtime_put_sync(&pdev->dev);
        dw_i2c_plat_pm_cleanup(dev);
 
-       if (!IS_ERR_OR_NULL(dev->rst))
-               reset_control_assert(dev->rst);
+       reset_control_assert(dev->rst);
 
        return 0;
 }
index e4e7932..e7514c1 100644 (file)
@@ -791,9 +791,7 @@ static int exynos5_i2c_probe(struct platform_device *pdev)
        }
 
        ret = devm_request_irq(&pdev->dev, i2c->irq, exynos5_i2c_irq,
-                               IRQF_NO_SUSPEND | IRQF_ONESHOT,
-                               dev_name(&pdev->dev), i2c);
-
+                              IRQF_NO_SUSPEND, dev_name(&pdev->dev), i2c);
        if (ret != 0) {
                dev_err(&pdev->dev, "cannot request HS-I2C IRQ %d\n", i2c->irq);
                goto err_clk;
index da5eb39..e0c2569 100644 (file)
@@ -707,8 +707,10 @@ static int fsi_i2c_probe(struct device *dev)
                        continue;
 
                port = kzalloc(sizeof(*port), GFP_KERNEL);
-               if (!port)
+               if (!port) {
+                       of_node_put(np);
                        break;
+               }
 
                port->master = i2c;
                port->port = port_no;
index 4df1434..8497c7a 100644 (file)
@@ -445,8 +445,7 @@ static int hix5hd2_i2c_probe(struct platform_device *pdev)
        hix5hd2_i2c_init(priv);
 
        ret = devm_request_irq(&pdev->dev, irq, hix5hd2_i2c_irq,
-                              IRQF_NO_SUSPEND | IRQF_ONESHOT,
-                              dev_name(&pdev->dev), priv);
+                              IRQF_NO_SUSPEND, dev_name(&pdev->dev), priv);
        if (ret != 0) {
                dev_err(&pdev->dev, "cannot request HS-I2C IRQ %d\n", irq);
                goto err_clk;
index 36e9559..f1c714a 100644 (file)
@@ -292,7 +292,8 @@ struct i801_priv {
 #define FEATURE_HOST_NOTIFY    BIT(5)
 /* Not really a feature, but it's convenient to handle it as such */
 #define FEATURE_IDF            BIT(15)
-#define FEATURE_TCO            BIT(16)
+#define FEATURE_TCO_SPT                BIT(16)
+#define FEATURE_TCO_CNL                BIT(17)
 
 static const char *i801_feature_names[] = {
        "SMBus PEC",
@@ -1500,57 +1501,23 @@ static inline unsigned int i801_get_adapter_class(struct i801_priv *priv)
 }
 #endif
 
-static const struct itco_wdt_platform_data tco_platform_data = {
+static const struct itco_wdt_platform_data spt_tco_platform_data = {
        .name = "Intel PCH",
        .version = 4,
 };
 
 static DEFINE_SPINLOCK(p2sb_spinlock);
 
-static void i801_add_tco(struct i801_priv *priv)
+static struct platform_device *
+i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
+                struct resource *tco_res)
 {
-       struct pci_dev *pci_dev = priv->pci_dev;
-       struct resource tco_res[3], *res;
-       struct platform_device *pdev;
+       struct resource *res;
        unsigned int devfn;
-       u32 tco_base, tco_ctl;
-       u32 base_addr, ctrl_val;
        u64 base64_addr;
+       u32 base_addr;
        u8 hidden;
 
-       if (!(priv->features & FEATURE_TCO))
-               return;
-
-       pci_read_config_dword(pci_dev, TCOBASE, &tco_base);
-       pci_read_config_dword(pci_dev, TCOCTL, &tco_ctl);
-       if (!(tco_ctl & TCOCTL_EN))
-               return;
-
-       memset(tco_res, 0, sizeof(tco_res));
-
-       res = &tco_res[ICH_RES_IO_TCO];
-       res->start = tco_base & ~1;
-       res->end = res->start + 32 - 1;
-       res->flags = IORESOURCE_IO;
-
-       /*
-        * Power Management registers.
-        */
-       devfn = PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 2);
-       pci_bus_read_config_dword(pci_dev->bus, devfn, ACPIBASE, &base_addr);
-
-       res = &tco_res[ICH_RES_IO_SMI];
-       res->start = (base_addr & ~1) + ACPIBASE_SMI_OFF;
-       res->end = res->start + 3;
-       res->flags = IORESOURCE_IO;
-
-       /*
-        * Enable the ACPI I/O space.
-        */
-       pci_bus_read_config_dword(pci_dev->bus, devfn, ACPICTRL, &ctrl_val);
-       ctrl_val |= ACPICTRL_EN;
-       pci_bus_write_config_dword(pci_dev->bus, devfn, ACPICTRL, ctrl_val);
-
        /*
         * We must access the NO_REBOOT bit over the Primary to Sideband
         * bridge (P2SB). The BIOS prevents the P2SB device from being
@@ -1586,15 +1553,76 @@ static void i801_add_tco(struct i801_priv *priv)
        res->end = res->start + 3;
        res->flags = IORESOURCE_MEM;
 
-       pdev = platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
-                                                tco_res, 3, &tco_platform_data,
-                                                sizeof(tco_platform_data));
-       if (IS_ERR(pdev)) {
-               dev_warn(&pci_dev->dev, "failed to create iTCO device\n");
+       return platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
+                                       tco_res, 3, &spt_tco_platform_data,
+                                       sizeof(spt_tco_platform_data));
+}
+
+static const struct itco_wdt_platform_data cnl_tco_platform_data = {
+       .name = "Intel PCH",
+       .version = 6,
+};
+
+static struct platform_device *
+i801_add_tco_cnl(struct i801_priv *priv, struct pci_dev *pci_dev,
+                struct resource *tco_res)
+{
+       return platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
+                                       tco_res, 2, &cnl_tco_platform_data,
+                                       sizeof(cnl_tco_platform_data));
+}
+
+static void i801_add_tco(struct i801_priv *priv)
+{
+       u32 base_addr, tco_base, tco_ctl, ctrl_val;
+       struct pci_dev *pci_dev = priv->pci_dev;
+       struct resource tco_res[3], *res;
+       unsigned int devfn;
+
+       /* If we have ACPI based watchdog use that instead */
+       if (acpi_has_watchdog())
                return;
-       }
 
-       priv->tco_pdev = pdev;
+       if (!(priv->features & (FEATURE_TCO_SPT | FEATURE_TCO_CNL)))
+               return;
+
+       pci_read_config_dword(pci_dev, TCOBASE, &tco_base);
+       pci_read_config_dword(pci_dev, TCOCTL, &tco_ctl);
+       if (!(tco_ctl & TCOCTL_EN))
+               return;
+
+       memset(tco_res, 0, sizeof(tco_res));
+
+       res = &tco_res[ICH_RES_IO_TCO];
+       res->start = tco_base & ~1;
+       res->end = res->start + 32 - 1;
+       res->flags = IORESOURCE_IO;
+
+       /*
+        * Power Management registers.
+        */
+       devfn = PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 2);
+       pci_bus_read_config_dword(pci_dev->bus, devfn, ACPIBASE, &base_addr);
+
+       res = &tco_res[ICH_RES_IO_SMI];
+       res->start = (base_addr & ~1) + ACPIBASE_SMI_OFF;
+       res->end = res->start + 3;
+       res->flags = IORESOURCE_IO;
+
+       /*
+        * Enable the ACPI I/O space.
+        */
+       pci_bus_read_config_dword(pci_dev->bus, devfn, ACPICTRL, &ctrl_val);
+       ctrl_val |= ACPICTRL_EN;
+       pci_bus_write_config_dword(pci_dev->bus, devfn, ACPICTRL, ctrl_val);
+
+       if (priv->features & FEATURE_TCO_CNL)
+               priv->tco_pdev = i801_add_tco_cnl(priv, pci_dev, tco_res);
+       else
+               priv->tco_pdev = i801_add_tco_spt(priv, pci_dev, tco_res);
+
+       if (IS_ERR(priv->tco_pdev))
+               dev_warn(&pci_dev->dev, "failed to create iTCO device\n");
 }
 
 #ifdef CONFIG_ACPI
@@ -1704,13 +1732,22 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
        switch (dev->device) {
        case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS:
        case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS:
-       case PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS:
-       case PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS:
        case PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS:
        case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS:
-       case PCI_DEVICE_ID_INTEL_CDF_SMBUS:
        case PCI_DEVICE_ID_INTEL_DNV_SMBUS:
        case PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS:
+               priv->features |= FEATURE_BLOCK_PROC;
+               priv->features |= FEATURE_I2C_BLOCK_READ;
+               priv->features |= FEATURE_IRQ;
+               priv->features |= FEATURE_SMBUS_PEC;
+               priv->features |= FEATURE_BLOCK_BUFFER;
+               priv->features |= FEATURE_TCO_SPT;
+               priv->features |= FEATURE_HOST_NOTIFY;
+               break;
+
+       case PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS:
+       case PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS:
+       case PCI_DEVICE_ID_INTEL_CDF_SMBUS:
        case PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS:
        case PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS:
        case PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS:
@@ -1720,9 +1757,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
                priv->features |= FEATURE_IRQ;
                priv->features |= FEATURE_SMBUS_PEC;
                priv->features |= FEATURE_BLOCK_BUFFER;
-               /* If we have ACPI based watchdog use that instead */
-               if (!acpi_has_watchdog())
-                       priv->features |= FEATURE_TCO;
+               priv->features |= FEATURE_TCO_CNL;
                priv->features |= FEATURE_HOST_NOTIFY;
                break;
 
@@ -1921,8 +1956,7 @@ static int i801_suspend(struct device *dev)
 
 static int i801_resume(struct device *dev)
 {
-       struct pci_dev *pci_dev = to_pci_dev(dev);
-       struct i801_priv *priv = pci_get_drvdata(pci_dev);
+       struct i801_priv *priv = dev_get_drvdata(dev);
 
        i801_enable_host_notify(&priv->adapter);
 
diff --git a/drivers/i2c/busses/i2c-icy.c b/drivers/i2c/busses/i2c-icy.c
new file mode 100644 (file)
index 0000000..8382eb6
--- /dev/null
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I2C driver for stand-alone PCF8584 style adapters on Zorro cards
+ *
+ * Original ICY documentation can be found on Aminet:
+ * https://aminet.net/package/docs/hard/icy
+ *
+ * There has been a modern community re-print of this design in 2019:
+ * https://www.a1k.org/forum/index.php?threads/70106/
+ *
+ * The card is basically a Philips PCF8584 connected straight to the
+ * beginning of the AutoConfig'd address space (register S1 on base+2),
+ * with /INT on /INT2 on the Zorro bus.
+ *
+ * Copyright (c) 2019 Max Staudt <max@enpas.org>
+ *
+ * This started as a fork of i2c-elektor.c and has evolved since.
+ * Thanks go to its authors for providing a base to grow on.
+ *
+ *
+ * IRQ support is currently not implemented.
+ *
+ * As it turns out, i2c-algo-pcf is really written with i2c-elektor's
+ * edge-triggered ISA interrupts in mind, while the Amiga's Zorro bus has
+ * level-triggered interrupts. This means that once an interrupt occurs, we
+ * have to tell the PCF8584 to shut up immediately, or it will keep the
+ * interrupt line busy and cause an IRQ storm.
+
+ * However, because of the PCF8584's host-side protocol, there is no good
+ * way to just quieten it without side effects. Rather, we have to perform
+ * the next read/write operation straight away, which will reset the /INT
+ * pin. This entails re-designing the core of i2c-algo-pcf in the future.
+ * For now, we never request an IRQ from the PCF8584, and poll it instead.
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <linux/i2c.h>
+#include <linux/i2c-algo-pcf.h>
+
+#include <asm/amigaints.h>
+#include <linux/zorro.h>
+
+#include "../algos/i2c-algo-pcf.h"
+
+struct icy_i2c {
+       struct i2c_adapter adapter;
+
+       void __iomem *reg_s0;
+       void __iomem *reg_s1;
+       struct fwnode_handle *ltc2990_fwnode;
+       struct i2c_client *ltc2990_client;
+};
+
+/*
+ * Functions called by i2c-algo-pcf
+ */
+static void icy_pcf_setpcf(void *data, int ctl, int val)
+{
+       struct icy_i2c *i2c = (struct icy_i2c *)data;
+
+       u8 __iomem *address = ctl ? i2c->reg_s1 : i2c->reg_s0;
+
+       z_writeb(val, address);
+}
+
+static int icy_pcf_getpcf(void *data, int ctl)
+{
+       struct icy_i2c *i2c = (struct icy_i2c *)data;
+
+       u8 __iomem *address = ctl ? i2c->reg_s1 : i2c->reg_s0;
+
+       return z_readb(address);
+}
+
+static int icy_pcf_getown(void *data)
+{
+       return 0x55;
+}
+
+static int icy_pcf_getclock(void *data)
+{
+       return 0x1c;
+}
+
+static void icy_pcf_waitforpin(void *data)
+{
+       usleep_range(50, 150);
+}
+
+/*
+ * Main i2c-icy part
+ */
+static unsigned short const icy_ltc2990_addresses[] = {
+       0x4c, 0x4d, 0x4e, 0x4f, I2C_CLIENT_END
+};
+
+/*
+ * Additional sensors exposed once this property is applied:
+ *
+ * in1 will be the voltage of the 5V rail, divided by 2.
+ * in2 will be the voltage of the 12V rail, divided by 4.
+ * temp3 will be measured using a PCB loop next the chip.
+ */
+static const u32 icy_ltc2990_meas_mode[] = {0, 3};
+
+static const struct property_entry icy_ltc2990_props[] = {
+       PROPERTY_ENTRY_U32_ARRAY("lltc,meas-mode", icy_ltc2990_meas_mode),
+       { }
+};
+
+static int icy_probe(struct zorro_dev *z,
+                    const struct zorro_device_id *ent)
+{
+       struct icy_i2c *i2c;
+       struct i2c_algo_pcf_data *algo_data;
+       struct fwnode_handle *new_fwnode;
+       struct i2c_board_info ltc2990_info = {
+               .type           = "ltc2990",
+               .addr           = 0x4c,
+       };
+
+       i2c = devm_kzalloc(&z->dev, sizeof(*i2c), GFP_KERNEL);
+       if (!i2c)
+               return -ENOMEM;
+
+       algo_data = devm_kzalloc(&z->dev, sizeof(*algo_data), GFP_KERNEL);
+       if (!algo_data)
+               return -ENOMEM;
+
+       dev_set_drvdata(&z->dev, i2c);
+       i2c->adapter.dev.parent = &z->dev;
+       i2c->adapter.owner = THIS_MODULE;
+       /* i2c->adapter.algo assigned by i2c_pcf_add_bus() */
+       i2c->adapter.algo_data = algo_data;
+       strlcpy(i2c->adapter.name, "ICY I2C Zorro adapter",
+               sizeof(i2c->adapter.name));
+
+       if (!devm_request_mem_region(&z->dev,
+                                    z->resource.start,
+                                    4, i2c->adapter.name))
+               return -ENXIO;
+
+       /* Driver private data */
+       i2c->reg_s0 = ZTWO_VADDR(z->resource.start);
+       i2c->reg_s1 = ZTWO_VADDR(z->resource.start + 2);
+
+       algo_data->data = i2c;
+       algo_data->setpcf     = icy_pcf_setpcf;
+       algo_data->getpcf     = icy_pcf_getpcf;
+       algo_data->getown     = icy_pcf_getown;
+       algo_data->getclock   = icy_pcf_getclock;
+       algo_data->waitforpin = icy_pcf_waitforpin;
+
+       if (i2c_pcf_add_bus(&i2c->adapter)) {
+               dev_err(&z->dev, "i2c_pcf_add_bus() failed\n");
+               return -ENXIO;
+       }
+
+       dev_info(&z->dev, "ICY I2C controller at %pa, IRQ not implemented\n",
+                &z->resource.start);
+
+       /*
+        * The 2019 a1k.org PCBs have an LTC2990 at 0x4c, so start
+        * it automatically once ltc2990 is modprobed.
+        *
+        * in0 is the voltage of the internal 5V power supply.
+        * temp1 is the temperature inside the chip.
+        *
+        * See property_entry above for in1, in2, temp3.
+        */
+       new_fwnode = fwnode_create_software_node(icy_ltc2990_props, NULL);
+       if (IS_ERR(new_fwnode)) {
+               dev_info(&z->dev, "Failed to create fwnode for LTC2990, error: %ld\n",
+                        PTR_ERR(new_fwnode));
+       } else {
+               /*
+                * Store the fwnode so we can destroy it on .remove().
+                * Only store it on success, as fwnode_remove_software_node()
+                * is NULL safe, but not PTR_ERR safe.
+                */
+               i2c->ltc2990_fwnode = new_fwnode;
+               ltc2990_info.fwnode = new_fwnode;
+
+               i2c->ltc2990_client =
+                       i2c_new_probed_device(&i2c->adapter,
+                                             &ltc2990_info,
+                                             icy_ltc2990_addresses,
+                                             NULL);
+       }
+
+       return 0;
+}
+
+static void icy_remove(struct zorro_dev *z)
+{
+       struct icy_i2c *i2c = dev_get_drvdata(&z->dev);
+
+       i2c_unregister_device(i2c->ltc2990_client);
+       fwnode_remove_software_node(i2c->ltc2990_fwnode);
+
+       i2c_del_adapter(&i2c->adapter);
+}
+
+static const struct zorro_device_id icy_zorro_tbl[] = {
+       { ZORRO_ID(VMC, 15, 0), },
+       { 0 }
+};
+
+MODULE_DEVICE_TABLE(zorro, icy_zorro_tbl);
+
+static struct zorro_driver icy_driver = {
+       .name           = "i2c-icy",
+       .id_table       = icy_zorro_tbl,
+       .probe          = icy_probe,
+       .remove         = icy_remove,
+};
+
+module_driver(icy_driver,
+             zorro_register_driver,
+             zorro_unregister_driver);
+
+MODULE_AUTHOR("Max Staudt <max@enpas.org>");
+MODULE_DESCRIPTION("I2C bus via PCF8584 on ICY Zorro card");
+MODULE_LICENSE("GPL v2");
index dc00fab..c92b564 100644 (file)
@@ -545,7 +545,6 @@ MODULE_DEVICE_TABLE(of, lpi2c_imx_of_match);
 static int lpi2c_imx_probe(struct platform_device *pdev)
 {
        struct lpi2c_imx_struct *lpi2c_imx;
-       struct resource *res;
        unsigned int temp;
        int irq, ret;
 
@@ -553,8 +552,7 @@ static int lpi2c_imx_probe(struct platform_device *pdev)
        if (!lpi2c_imx)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       lpi2c_imx->base = devm_ioremap_resource(&pdev->dev, res);
+       lpi2c_imx->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(lpi2c_imx->base))
                return PTR_ERR(lpi2c_imx->base);
 
index 15f6cde..a3b6133 100644 (file)
@@ -20,6 +20,7 @@
  *
  */
 
+#include <linux/acpi.h>
 #include <linux/clk.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
@@ -255,6 +256,12 @@ static const struct of_device_id i2c_imx_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, i2c_imx_dt_ids);
 
+static const struct acpi_device_id i2c_imx_acpi_ids[] = {
+       {"NXP0001", .driver_data = (kernel_ulong_t)&vf610_i2c_hwdata},
+       { }
+};
+MODULE_DEVICE_TABLE(acpi, i2c_imx_acpi_ids);
+
 static inline int is_imx1_i2c(struct imx_i2c_struct *i2c_imx)
 {
        return i2c_imx->hwdata->devtype == IMX1_I2C;
@@ -1048,14 +1055,13 @@ static const struct i2c_algorithm i2c_imx_algo = {
 
 static int i2c_imx_probe(struct platform_device *pdev)
 {
-       const struct of_device_id *of_id = of_match_device(i2c_imx_dt_ids,
-                                                          &pdev->dev);
        struct imx_i2c_struct *i2c_imx;
        struct resource *res;
        struct imxi2c_platform_data *pdata = dev_get_platdata(&pdev->dev);
        void __iomem *base;
        int irq, ret;
        dma_addr_t phy_addr;
+       const struct imx_i2c_hwdata *match;
 
        dev_dbg(&pdev->dev, "<%s>\n", __func__);
 
@@ -1075,8 +1081,9 @@ static int i2c_imx_probe(struct platform_device *pdev)
        if (!i2c_imx)
                return -ENOMEM;
 
-       if (of_id)
-               i2c_imx->hwdata = of_id->data;
+       match = device_get_match_data(&pdev->dev);
+       if (match)
+               i2c_imx->hwdata = match;
        else
                i2c_imx->hwdata = (struct imx_i2c_hwdata *)
                                platform_get_device_id(pdev)->driver_data;
@@ -1089,6 +1096,7 @@ static int i2c_imx_probe(struct platform_device *pdev)
        i2c_imx->adapter.nr             = pdev->id;
        i2c_imx->adapter.dev.of_node    = pdev->dev.of_node;
        i2c_imx->base                   = base;
+       ACPI_COMPANION_SET(&i2c_imx->adapter.dev, ACPI_COMPANION(&pdev->dev));
 
        /* Get I2C clock */
        i2c_imx->clk = devm_clk_get(&pdev->dev, NULL);
@@ -1247,6 +1255,7 @@ static struct platform_driver i2c_imx_driver = {
                .name = DRIVER_NAME,
                .pm = &i2c_imx_pm_ops,
                .of_match_table = i2c_imx_dt_ids,
+               .acpi_match_table = i2c_imx_acpi_ids,
        },
        .id_table = imx_i2c_devtype,
 };
index 02d23ed..2f95e25 100644 (file)
@@ -781,8 +781,6 @@ static int ismt_dev_init(struct ismt_priv *priv)
        if (!priv->hw)
                return -ENOMEM;
 
-       memset(priv->hw, 0, (ISMT_DESC_ENTRIES * sizeof(struct ismt_desc)));
-
        priv->head = 0;
        init_completion(&priv->cmp);
 
index 29eae1b..2152ec5 100644 (file)
@@ -875,7 +875,7 @@ static irqreturn_t mtk_i2c_irq(int irqno, void *dev_id)
 
 static u32 mtk_i2c_functionality(struct i2c_adapter *adap)
 {
-       if (adap->quirks->flags & I2C_AQ_NO_ZERO_LEN)
+       if (i2c_check_quirks(adap, I2C_AQ_NO_ZERO_LEN))
                return I2C_FUNC_I2C |
                        (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK);
        else
index 7d79317..8922491 100644 (file)
@@ -802,7 +802,6 @@ static int mxs_i2c_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct mxs_i2c_dev *i2c;
        struct i2c_adapter *adap;
-       struct resource *res;
        int err, irq;
 
        i2c = devm_kzalloc(dev, sizeof(*i2c), GFP_KERNEL);
@@ -814,8 +813,7 @@ static int mxs_i2c_probe(struct platform_device *pdev)
                i2c->dev_type = device_id->driver_data;
        }
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       i2c->regs = devm_ioremap_resource(&pdev->dev, res);
+       i2c->regs = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(i2c->regs))
                return PTR_ERR(i2c->regs);
 
index 4117f1a..ca8b3ec 100644 (file)
@@ -703,8 +703,9 @@ static int ocores_i2c_probe(struct platform_device *pdev)
        }
 
        if (ocores_algorithm.master_xfer != ocores_xfer_polling) {
-               ret = devm_request_irq(&pdev->dev, irq, ocores_isr, 0,
-                                      pdev->name, i2c);
+               ret = devm_request_any_context_irq(&pdev->dev, irq,
+                                                  ocores_isr, 0,
+                                                  pdev->name, i2c);
                if (ret) {
                        dev_err(&pdev->dev, "Cannot claim IRQ\n");
                        goto err_clk;
index cba325e..30ded64 100644 (file)
@@ -72,7 +72,8 @@
 #define PIIX4_BLOCK_DATA       0x14
 
 /* Multi-port constants */
-#define PIIX4_MAX_ADAPTERS 4
+#define PIIX4_MAX_ADAPTERS     4
+#define HUDSON2_MAIN_PORTS     2 /* HUDSON2, KERNCZ reserves ports 3, 4 */
 
 /* SB800 constants */
 #define SB800_PIIX4_SMB_IDX            0xcd6
@@ -806,10 +807,12 @@ MODULE_DEVICE_TABLE (pci, piix4_ids);
 
 static struct i2c_adapter *piix4_main_adapters[PIIX4_MAX_ADAPTERS];
 static struct i2c_adapter *piix4_aux_adapter;
+static int piix4_adapter_count;
 
 static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
                             bool sb800_main, u8 port, bool notify_imc,
-                            const char *name, struct i2c_adapter **padap)
+                            u8 hw_port_nr, const char *name,
+                            struct i2c_adapter **padap)
 {
        struct i2c_adapter *adap;
        struct i2c_piix4_adapdata *adapdata;
@@ -841,6 +844,12 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
        /* set up the sysfs linkage to our parent device */
        adap->dev.parent = &dev->dev;
 
+       if (has_acpi_companion(&dev->dev)) {
+               acpi_preset_companion(&adap->dev,
+                                     ACPI_COMPANION(&dev->dev),
+                                     hw_port_nr);
+       }
+
        snprintf(adap->name, sizeof(adap->name),
                "SMBus PIIX4 adapter%s at %04x", name, smba);
 
@@ -865,8 +874,19 @@ static int piix4_add_adapters_sb800(struct pci_dev *dev, unsigned short smba,
        int port;
        int retval;
 
-       for (port = 0; port < PIIX4_MAX_ADAPTERS; port++) {
+       if (dev->device == PCI_DEVICE_ID_AMD_KERNCZ_SMBUS ||
+           (dev->device == PCI_DEVICE_ID_AMD_HUDSON2_SMBUS &&
+            dev->revision >= 0x1F)) {
+               piix4_adapter_count = HUDSON2_MAIN_PORTS;
+       } else {
+               piix4_adapter_count = PIIX4_MAX_ADAPTERS;
+       }
+
+       for (port = 0; port < piix4_adapter_count; port++) {
+               u8 hw_port_nr = port == 0 ? 0 : port + 1;
+
                retval = piix4_add_adapter(dev, smba, true, port, notify_imc,
+                                          hw_port_nr,
                                           piix4_main_port_names_sb800[port],
                                           &piix4_main_adapters[port]);
                if (retval < 0)
@@ -937,8 +957,8 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
                        return retval;
 
                /* Try to register main SMBus adapter, give up if we can't */
-               retval = piix4_add_adapter(dev, retval, false, 0, false, "",
-                                          &piix4_main_adapters[0]);
+               retval = piix4_add_adapter(dev, retval, false, 0, false, 0,
+                                          "", &piix4_main_adapters[0]);
                if (retval < 0)
                        return retval;
        }
@@ -964,7 +984,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
        if (retval > 0) {
                /* Try to add the aux adapter if it exists,
                 * piix4_add_adapter will clean up if this fails */
-               piix4_add_adapter(dev, retval, false, 0, false,
+               piix4_add_adapter(dev, retval, false, 0, false, 1,
                                  is_sb800 ? piix4_aux_port_name_sb800 : "",
                                  &piix4_aux_adapter);
        }
@@ -987,7 +1007,7 @@ static void piix4_adap_remove(struct i2c_adapter *adap)
 
 static void piix4_remove(struct pci_dev *dev)
 {
-       int port = PIIX4_MAX_ADAPTERS;
+       int port = piix4_adapter_count;
 
        while (--port >= 0) {
                if (piix4_main_adapters[port]) {
index a89bfce..17abf60 100644 (file)
@@ -355,11 +355,13 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
 {
        dma_addr_t rx_dma;
        unsigned long time_left;
-       void *dma_buf;
+       void *dma_buf = NULL;
        struct geni_se *se = &gi2c->se;
        size_t len = msg->len;
 
-       dma_buf = i2c_get_dma_safe_msg_buf(msg, 32);
+       if (!of_machine_is_compatible("lenovo,yoga-c630"))
+               dma_buf = i2c_get_dma_safe_msg_buf(msg, 32);
+
        if (dma_buf)
                geni_se_select_mode(se, GENI_SE_DMA);
        else
@@ -394,11 +396,13 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
 {
        dma_addr_t tx_dma;
        unsigned long time_left;
-       void *dma_buf;
+       void *dma_buf = NULL;
        struct geni_se *se = &gi2c->se;
        size_t len = msg->len;
 
-       dma_buf = i2c_get_dma_safe_msg_buf(msg, 32);
+       if (!of_machine_is_compatible("lenovo,yoga-c630"))
+               dma_buf = i2c_get_dma_safe_msg_buf(msg, 32);
+
        if (dma_buf)
                geni_se_select_mode(se, GENI_SE_DMA);
        else
index f31413f..8004148 100644 (file)
@@ -202,6 +202,7 @@ static irqreturn_t riic_tend_isr(int irq, void *data)
        if (readb(riic->base + RIIC_ICSR2) & ICSR2_NACKF) {
                /* We got a NACKIE */
                readb(riic->base + RIIC_ICDRR); /* dummy read */
+               riic_clear_set_bit(riic, ICSR2_NACKF, 0, RIIC_ICSR2);
                riic->err = -ENXIO;
        } else if (riic->bytes_left) {
                return IRQ_NONE;
index 9611235..b432e75 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
@@ -465,9 +466,9 @@ static int sprd_i2c_clk_init(struct sprd_i2c *i2c_dev)
 
        i2c_dev->clk = devm_clk_get(i2c_dev->dev, "enable");
        if (IS_ERR(i2c_dev->clk)) {
-               dev_warn(i2c_dev->dev, "i2c%d can't get the enable clock\n",
-                        i2c_dev->adap.nr);
-               i2c_dev->clk = NULL;
+               dev_err(i2c_dev->dev, "i2c%d can't get the enable clock\n",
+                       i2c_dev->adap.nr);
+               return PTR_ERR(i2c_dev->clk);
        }
 
        return 0;
@@ -477,7 +478,6 @@ static int sprd_i2c_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct sprd_i2c *i2c_dev;
-       struct resource *res;
        u32 prop;
        int ret;
 
@@ -487,8 +487,7 @@ static int sprd_i2c_probe(struct platform_device *pdev)
        if (!i2c_dev)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       i2c_dev->base = devm_ioremap_resource(dev, res);
+       i2c_dev->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(i2c_dev->base))
                return PTR_ERR(i2c_dev->base);
 
@@ -520,7 +519,10 @@ static int sprd_i2c_probe(struct platform_device *pdev)
        if (i2c_dev->bus_freq != 100000 && i2c_dev->bus_freq != 400000)
                return -EINVAL;
 
-       sprd_i2c_clk_init(i2c_dev);
+       ret = sprd_i2c_clk_init(i2c_dev);
+       if (ret)
+               return ret;
+
        platform_set_drvdata(pdev, i2c_dev);
 
        ret = clk_prepare_enable(i2c_dev->clk);
@@ -644,8 +646,7 @@ static struct platform_driver sprd_i2c_driver = {
        },
 };
 
-static int sprd_i2c_init(void)
-{
-       return platform_driver_register(&sprd_i2c_driver);
-}
-arch_initcall_sync(sprd_i2c_init);
+module_platform_driver(sprd_i2c_driver);
+
+MODULE_DESCRIPTION("Spreadtrum I2C master controller driver");
+MODULE_LICENSE("GPL v2");
index 266d1c2..b24e7b9 100644 (file)
@@ -305,7 +305,7 @@ struct stm32f7_i2c_dev {
        struct regmap *regmap;
 };
 
-/**
+/*
  * All these values are coming from I2C Specification, Version 6.0, 4th of
  * April 2014.
  *
@@ -1192,6 +1192,8 @@ static void stm32f7_i2c_slave_start(struct stm32f7_i2c_dev *i2c_dev)
                        STM32F7_I2C_CR1_TXIE;
                stm32f7_i2c_set_bits(base + STM32F7_I2C_CR1, mask);
 
+               /* Write 1st data byte */
+               writel_relaxed(value, base + STM32F7_I2C_TXDR);
        } else {
                /* Notify i2c slave that new write transfer is starting */
                i2c_slave_event(slave, I2C_SLAVE_WRITE_REQUESTED, &value);
@@ -1501,7 +1503,7 @@ static irqreturn_t stm32f7_i2c_isr_error(int irq, void *data)
        void __iomem *base = i2c_dev->base;
        struct device *dev = i2c_dev->dev;
        struct stm32_i2c_dma *dma = i2c_dev->dma;
-       u32 mask, status;
+       u32 status;
 
        status = readl_relaxed(i2c_dev->base + STM32F7_I2C_ISR);
 
@@ -1526,12 +1528,15 @@ static irqreturn_t stm32f7_i2c_isr_error(int irq, void *data)
                f7_msg->result = -EINVAL;
        }
 
-       /* Disable interrupts */
-       if (stm32f7_i2c_is_slave_registered(i2c_dev))
-               mask = STM32F7_I2C_XFER_IRQ_MASK;
-       else
-               mask = STM32F7_I2C_ALL_IRQ_MASK;
-       stm32f7_i2c_disable_irq(i2c_dev, mask);
+       if (!i2c_dev->slave_running) {
+               u32 mask;
+               /* Disable interrupts */
+               if (stm32f7_i2c_is_slave_registered(i2c_dev))
+                       mask = STM32F7_I2C_XFER_IRQ_MASK;
+               else
+                       mask = STM32F7_I2C_ALL_IRQ_MASK;
+               stm32f7_i2c_disable_irq(i2c_dev, mask);
+       }
 
        /* Disable dma */
        if (i2c_dev->use_dma) {
@@ -1809,7 +1814,7 @@ static u32 stm32f7_i2c_func(struct i2c_adapter *adap)
                I2C_FUNC_SMBUS_I2C_BLOCK;
 }
 
-static struct i2c_algorithm stm32f7_i2c_algo = {
+static const struct i2c_algorithm stm32f7_i2c_algo = {
        .master_xfer = stm32f7_i2c_xfer,
        .smbus_xfer = stm32f7_i2c_smbus_xfer,
        .functionality = stm32f7_i2c_func,
index f724c8e..39762f0 100644 (file)
@@ -526,7 +526,7 @@ static const struct i2c_algorithm synquacer_i2c_algo = {
        .functionality  = synquacer_i2c_functionality,
 };
 
-static struct i2c_adapter synquacer_i2c_ops = {
+static const struct i2c_adapter synquacer_i2c_ops = {
        .owner          = THIS_MODULE,
        .name           = "synquacer_i2c-adapter",
        .algo           = &synquacer_i2c_algo,
index 37347c9..0bff3f3 100644 (file)
@@ -39,7 +39,7 @@ struct taos_data {
 };
 
 /* TAOS TSL2550 EVM */
-static struct i2c_board_info tsl2550_info = {
+static const struct i2c_board_info tsl2550_info = {
        I2C_BOARD_INFO("tsl2550", 0x39),
 };
 
index 9fcb13b..c1683f9 100644 (file)
@@ -636,7 +636,7 @@ static void tegra_dvc_init(struct tegra_i2c_dev *i2c_dev)
        dvc_writel(i2c_dev, val, DVC_CTRL_REG1);
 }
 
-static int tegra_i2c_runtime_resume(struct device *dev)
+static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev)
 {
        struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
        int ret;
@@ -665,7 +665,7 @@ static int tegra_i2c_runtime_resume(struct device *dev)
        return 0;
 }
 
-static int tegra_i2c_runtime_suspend(struct device *dev)
+static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev)
 {
        struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
 
@@ -713,12 +713,6 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit)
        u32 tsu_thd;
        u8 tlow, thigh;
 
-       err = pm_runtime_get_sync(i2c_dev->dev);
-       if (err < 0) {
-               dev_err(i2c_dev->dev, "runtime resume failed %d\n", err);
-               return err;
-       }
-
        reset_control_assert(i2c_dev->rst);
        udelay(2);
        reset_control_deassert(i2c_dev->rst);
@@ -772,7 +766,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit)
                if (err) {
                        dev_err(i2c_dev->dev,
                                "failed changing clock rate: %d\n", err);
-                       goto err;
+                       return err;
                }
        }
 
@@ -787,23 +781,21 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit)
 
        err = tegra_i2c_flush_fifos(i2c_dev);
        if (err)
-               goto err;
+               return err;
 
        if (i2c_dev->is_multimaster_mode && i2c_dev->hw->has_slcg_override_reg)
                i2c_writel(i2c_dev, I2C_MST_CORE_CLKEN_OVR, I2C_CLKEN_OVERRIDE);
 
        err = tegra_i2c_wait_for_config_load(i2c_dev);
        if (err)
-               goto err;
+               return err;
 
        if (i2c_dev->irq_disabled) {
                i2c_dev->irq_disabled = false;
                enable_irq(i2c_dev->irq);
        }
 
-err:
-       pm_runtime_put(i2c_dev->dev);
-       return err;
+       return 0;
 }
 
 static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev)
@@ -1616,12 +1608,14 @@ static int tegra_i2c_probe(struct platform_device *pdev)
        }
 
        pm_runtime_enable(&pdev->dev);
-       if (!pm_runtime_enabled(&pdev->dev)) {
+       if (!pm_runtime_enabled(&pdev->dev))
                ret = tegra_i2c_runtime_resume(&pdev->dev);
-               if (ret < 0) {
-                       dev_err(&pdev->dev, "runtime resume failed\n");
-                       goto unprepare_div_clk;
-               }
+       else
+               ret = pm_runtime_get_sync(i2c_dev->dev);
+
+       if (ret < 0) {
+               dev_err(&pdev->dev, "runtime resume failed\n");
+               goto unprepare_div_clk;
        }
 
        if (i2c_dev->is_multimaster_mode) {
@@ -1666,6 +1660,8 @@ static int tegra_i2c_probe(struct platform_device *pdev)
        if (ret)
                goto release_dma;
 
+       pm_runtime_put(&pdev->dev);
+
        return 0;
 
 release_dma:
@@ -1711,8 +1707,7 @@ static int tegra_i2c_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int tegra_i2c_suspend(struct device *dev)
+static int __maybe_unused tegra_i2c_suspend(struct device *dev)
 {
        struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
 
@@ -1721,38 +1716,41 @@ static int tegra_i2c_suspend(struct device *dev)
        return 0;
 }
 
-static int tegra_i2c_resume(struct device *dev)
+static int __maybe_unused tegra_i2c_resume(struct device *dev)
 {
        struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
        int err;
 
+       err = tegra_i2c_runtime_resume(dev);
+       if (err)
+               return err;
+
        err = tegra_i2c_init(i2c_dev, false);
        if (err)
                return err;
 
+       err = tegra_i2c_runtime_suspend(dev);
+       if (err)
+               return err;
+
        i2c_mark_adapter_resumed(&i2c_dev->adapter);
 
        return 0;
 }
 
 static const struct dev_pm_ops tegra_i2c_pm = {
-       SET_SYSTEM_SLEEP_PM_OPS(tegra_i2c_suspend, tegra_i2c_resume)
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(tegra_i2c_suspend, tegra_i2c_resume)
        SET_RUNTIME_PM_OPS(tegra_i2c_runtime_suspend, tegra_i2c_runtime_resume,
                           NULL)
 };
 
-#define TEGRA_I2C_PM   (&tegra_i2c_pm)
-#else
-#define TEGRA_I2C_PM   NULL
-#endif
-
 static struct platform_driver tegra_i2c_driver = {
        .probe   = tegra_i2c_probe,
        .remove  = tegra_i2c_remove,
        .driver  = {
                .name  = "tegra-i2c",
                .of_match_table = tegra_i2c_of_match,
-               .pm    = TEGRA_I2C_PM,
+               .pm    = &tegra_i2c_pm,
        },
 };
 
index 7acca25..4241aac 100644 (file)
@@ -108,7 +108,6 @@ static void uniphier_fi2c_fill_txfifo(struct uniphier_fi2c_priv *priv,
                if (fifo_space-- <= 0)
                        break;
 
-               dev_dbg(&priv->adap.dev, "write data: %02x\n", *priv->buf);
                writel(*priv->buf++, priv->membase + UNIPHIER_FI2C_DTTX);
                priv->len--;
        }
@@ -124,7 +123,6 @@ static void uniphier_fi2c_drain_rxfifo(struct uniphier_fi2c_priv *priv)
                        break;
 
                *priv->buf++ = readl(priv->membase + UNIPHIER_FI2C_DTRX);
-               dev_dbg(&priv->adap.dev, "read data: %02x\n", priv->buf[-1]);
                priv->len--;
        }
 }
@@ -142,8 +140,6 @@ static void uniphier_fi2c_clear_irqs(struct uniphier_fi2c_priv *priv,
 
 static void uniphier_fi2c_stop(struct uniphier_fi2c_priv *priv)
 {
-       dev_dbg(&priv->adap.dev, "stop condition\n");
-
        priv->enabled_irqs |= UNIPHIER_FI2C_INT_STOP;
        uniphier_fi2c_set_irqs(priv);
        writel(UNIPHIER_FI2C_CR_MST | UNIPHIER_FI2C_CR_STO,
@@ -160,21 +156,15 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id)
        irq_status = readl(priv->membase + UNIPHIER_FI2C_INT);
        irq_status &= priv->enabled_irqs;
 
-       dev_dbg(&priv->adap.dev,
-               "interrupt: enabled_irqs=%04x, irq_status=%04x\n",
-               priv->enabled_irqs, irq_status);
-
        if (irq_status & UNIPHIER_FI2C_INT_STOP)
                goto complete;
 
        if (unlikely(irq_status & UNIPHIER_FI2C_INT_AL)) {
-               dev_dbg(&priv->adap.dev, "arbitration lost\n");
                priv->error = -EAGAIN;
                goto complete;
        }
 
        if (unlikely(irq_status & UNIPHIER_FI2C_INT_NA)) {
-               dev_dbg(&priv->adap.dev, "could not get ACK\n");
                priv->error = -ENXIO;
                if (priv->flags & UNIPHIER_FI2C_RD) {
                        /*
@@ -215,18 +205,14 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id)
                if (unlikely(priv->flags & UNIPHIER_FI2C_MANUAL_NACK)) {
                        if (priv->len <= UNIPHIER_FI2C_FIFO_SIZE &&
                            !(priv->flags & UNIPHIER_FI2C_BYTE_WISE)) {
-                               dev_dbg(&priv->adap.dev,
-                                       "enable read byte count IRQ\n");
                                priv->enabled_irqs |= UNIPHIER_FI2C_INT_RB;
                                uniphier_fi2c_set_irqs(priv);
                                priv->flags |= UNIPHIER_FI2C_BYTE_WISE;
                        }
-                       if (priv->len <= 1) {
-                               dev_dbg(&priv->adap.dev, "set NACK\n");
+                       if (priv->len <= 1)
                                writel(UNIPHIER_FI2C_CR_MST |
                                       UNIPHIER_FI2C_CR_NACK,
                                       priv->membase + UNIPHIER_FI2C_CR);
-                       }
                }
 
                goto handled;
@@ -334,10 +320,6 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap,
        bool is_read = msg->flags & I2C_M_RD;
        unsigned long time_left, flags;
 
-       dev_dbg(&adap->dev, "%s: addr=0x%02x, len=%d, repeat=%d, stop=%d\n",
-               is_read ? "receive" : "transmit", msg->addr, msg->len,
-               repeat, stop);
-
        priv->len = msg->len;
        priv->buf = msg->buf;
        priv->enabled_irqs = UNIPHIER_FI2C_INT_FAULTS;
@@ -359,7 +341,6 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap,
        else
                uniphier_fi2c_tx_init(priv, msg->addr, repeat);
 
-       dev_dbg(&adap->dev, "start condition\n");
        /*
         * For a repeated START condition, writing a slave address to the FIFO
         * kicks the controller. So, the UNIPHIER_FI2C_CR register should be
@@ -383,7 +364,6 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap,
                uniphier_fi2c_recover(priv);
                return -ETIMEDOUT;
        }
-       dev_dbg(&adap->dev, "complete\n");
 
        if (unlikely(priv->flags & UNIPHIER_FI2C_DEFER_STOP_COMP)) {
                u32 status;
@@ -538,7 +518,6 @@ static int uniphier_fi2c_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct uniphier_fi2c_priv *priv;
-       struct resource *regs;
        u32 bus_speed;
        unsigned long clk_rate;
        int irq, ret;
@@ -547,8 +526,7 @@ static int uniphier_fi2c_probe(struct platform_device *pdev)
        if (!priv)
                return -ENOMEM;
 
-       regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       priv->membase = devm_ioremap_resource(dev, regs);
+       priv->membase = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(priv->membase))
                return PTR_ERR(priv->membase);
 
index 0173840..0270090 100644 (file)
@@ -71,7 +71,6 @@ static int uniphier_i2c_xfer_byte(struct i2c_adapter *adap, u32 txdata,
        reinit_completion(&priv->comp);
 
        txdata |= UNIPHIER_I2C_DTRM_IRQEN;
-       dev_dbg(&adap->dev, "write data: 0x%04x\n", txdata);
        writel(txdata, priv->membase + UNIPHIER_I2C_DTRM);
 
        time_left = wait_for_completion_timeout(&priv->comp, adap->timeout);
@@ -81,8 +80,6 @@ static int uniphier_i2c_xfer_byte(struct i2c_adapter *adap, u32 txdata,
        }
 
        rxdata = readl(priv->membase + UNIPHIER_I2C_DREC);
-       dev_dbg(&adap->dev, "read data: 0x%04x\n", rxdata);
-
        if (rxdatap)
                *rxdatap = rxdata;
 
@@ -98,14 +95,11 @@ static int uniphier_i2c_send_byte(struct i2c_adapter *adap, u32 txdata)
        if (ret)
                return ret;
 
-       if (unlikely(rxdata & UNIPHIER_I2C_DREC_LAB)) {
-               dev_dbg(&adap->dev, "arbitration lost\n");
+       if (unlikely(rxdata & UNIPHIER_I2C_DREC_LAB))
                return -EAGAIN;
-       }
-       if (unlikely(rxdata & UNIPHIER_I2C_DREC_LRB)) {
-               dev_dbg(&adap->dev, "could not get ACK\n");
+
+       if (unlikely(rxdata & UNIPHIER_I2C_DREC_LRB))
                return -ENXIO;
-       }
 
        return 0;
 }
@@ -115,7 +109,6 @@ static int uniphier_i2c_tx(struct i2c_adapter *adap, u16 addr, u16 len,
 {
        int ret;
 
-       dev_dbg(&adap->dev, "start condition\n");
        ret = uniphier_i2c_send_byte(adap, addr << 1 |
                                     UNIPHIER_I2C_DTRM_STA |
                                     UNIPHIER_I2C_DTRM_NACK);
@@ -137,7 +130,6 @@ static int uniphier_i2c_rx(struct i2c_adapter *adap, u16 addr, u16 len,
 {
        int ret;
 
-       dev_dbg(&adap->dev, "start condition\n");
        ret = uniphier_i2c_send_byte(adap, addr << 1 |
                                     UNIPHIER_I2C_DTRM_STA |
                                     UNIPHIER_I2C_DTRM_NACK |
@@ -161,7 +153,6 @@ static int uniphier_i2c_rx(struct i2c_adapter *adap, u16 addr, u16 len,
 
 static int uniphier_i2c_stop(struct i2c_adapter *adap)
 {
-       dev_dbg(&adap->dev, "stop condition\n");
        return uniphier_i2c_send_byte(adap, UNIPHIER_I2C_DTRM_STO |
                                      UNIPHIER_I2C_DTRM_NACK);
 }
@@ -173,9 +164,6 @@ static int uniphier_i2c_master_xfer_one(struct i2c_adapter *adap,
        bool recovery = false;
        int ret;
 
-       dev_dbg(&adap->dev, "%s: addr=0x%02x, len=%d, stop=%d\n",
-               is_read ? "receive" : "transmit", msg->addr, msg->len, stop);
-
        if (is_read)
                ret = uniphier_i2c_rx(adap, msg->addr, msg->len, msg->buf);
        else
@@ -326,7 +314,6 @@ static int uniphier_i2c_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct uniphier_i2c_priv *priv;
-       struct resource *regs;
        u32 bus_speed;
        unsigned long clk_rate;
        int irq, ret;
@@ -335,8 +322,7 @@ static int uniphier_i2c_probe(struct platform_device *pdev)
        if (!priv)
                return -ENOMEM;
 
-       regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       priv->membase = devm_ioremap_resource(dev, regs);
+       priv->membase = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(priv->membase))
                return PTR_ERR(priv->membase);
 
index 72b3001..5f6a498 100644 (file)
@@ -350,13 +350,11 @@ static int i2c_device_probe(struct device *dev)
                return -ENODEV;
 
        if (client->flags & I2C_CLIENT_WAKE) {
-               int wakeirq = -ENOENT;
+               int wakeirq;
 
-               if (dev->of_node) {
-                       wakeirq = of_irq_get_byname(dev->of_node, "wakeup");
-                       if (wakeirq == -EPROBE_DEFER)
-                               return wakeirq;
-               }
+               wakeirq = of_irq_get_byname(dev->of_node, "wakeup");
+               if (wakeirq == -EPROBE_DEFER)
+                       return wakeirq;
 
                device_init_wakeup(&client->dev, true);
 
@@ -966,7 +964,7 @@ struct i2c_client *devm_i2c_new_dummy_device(struct device *dev,
 EXPORT_SYMBOL_GPL(devm_i2c_new_dummy_device);
 
 /**
- * i2c_new_secondary_device - Helper to get the instantiated secondary address
+ * i2c_new_ancillary_device - Helper to get the instantiated secondary address
  * and create the associated device
  * @client: Handle to the primary client
  * @name: Handle to specify which secondary address to get
@@ -985,9 +983,9 @@ EXPORT_SYMBOL_GPL(devm_i2c_new_dummy_device);
  * cell whose "reg-names" value matches the slave name.
  *
  * This returns the new i2c client, which should be saved for later use with
- * i2c_unregister_device(); or NULL to indicate an error.
+ * i2c_unregister_device(); or an ERR_PTR to describe the error.
  */
-struct i2c_client *i2c_new_secondary_device(struct i2c_client *client,
+struct i2c_client *i2c_new_ancillary_device(struct i2c_client *client,
                                                const char *name,
                                                u16 default_addr)
 {
@@ -1002,9 +1000,9 @@ struct i2c_client *i2c_new_secondary_device(struct i2c_client *client,
        }
 
        dev_dbg(&client->adapter->dev, "Address for %s : 0x%x\n", name, addr);
-       return i2c_new_dummy(client->adapter, addr);
+       return i2c_new_dummy_device(client->adapter, addr);
 }
-EXPORT_SYMBOL_GPL(i2c_new_secondary_device);
+EXPORT_SYMBOL_GPL(i2c_new_ancillary_device);
 
 /* ------------------------------------------------------------------------- */
 
index be65d38..db9763c 100644 (file)
  * pointer, yet implementation is deferred until the need actually arises.
  */
 
+/*
+ * FIXME: What to do if only 8 bits of a 16 bit address are sent?
+ * The ST-M24C64 sends only 0xff then. Needs verification with other
+ * EEPROMs, though. We currently use the 8 bit as a valid address.
+ */
+
+#include <linux/bitfield.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/module.h>
 
 struct eeprom_data {
        struct bin_attribute bin;
-       bool first_write;
        spinlock_t buffer_lock;
-       u8 buffer_idx;
+       u16 buffer_idx;
+       u16 address_mask;
+       u8 num_address_bytes;
+       u8 idx_write_cnt;
+       bool read_only;
        u8 buffer[];
 };
 
+#define I2C_SLAVE_BYTELEN GENMASK(15, 0)
+#define I2C_SLAVE_FLAG_ADDR16 BIT(16)
+#define I2C_SLAVE_FLAG_RO BIT(17)
+#define I2C_SLAVE_DEVICE_MAGIC(_len, _flags) ((_flags) | (_len))
+
 static int i2c_slave_eeprom_slave_cb(struct i2c_client *client,
                                     enum i2c_slave_event event, u8 *val)
 {
@@ -34,13 +49,17 @@ static int i2c_slave_eeprom_slave_cb(struct i2c_client *client,
 
        switch (event) {
        case I2C_SLAVE_WRITE_RECEIVED:
-               if (eeprom->first_write) {
-                       eeprom->buffer_idx = *val;
-                       eeprom->first_write = false;
+               if (eeprom->idx_write_cnt < eeprom->num_address_bytes) {
+                       if (eeprom->idx_write_cnt == 0)
+                               eeprom->buffer_idx = 0;
+                       eeprom->buffer_idx = *val | (eeprom->buffer_idx << 8);
+                       eeprom->idx_write_cnt++;
                } else {
-                       spin_lock(&eeprom->buffer_lock);
-                       eeprom->buffer[eeprom->buffer_idx++] = *val;
-                       spin_unlock(&eeprom->buffer_lock);
+                       if (!eeprom->read_only) {
+                               spin_lock(&eeprom->buffer_lock);
+                               eeprom->buffer[eeprom->buffer_idx++ & eeprom->address_mask] = *val;
+                               spin_unlock(&eeprom->buffer_lock);
+                       }
                }
                break;
 
@@ -50,7 +69,7 @@ static int i2c_slave_eeprom_slave_cb(struct i2c_client *client,
                /* fallthrough */
        case I2C_SLAVE_READ_REQUESTED:
                spin_lock(&eeprom->buffer_lock);
-               *val = eeprom->buffer[eeprom->buffer_idx];
+               *val = eeprom->buffer[eeprom->buffer_idx & eeprom->address_mask];
                spin_unlock(&eeprom->buffer_lock);
                /*
                 * Do not increment buffer_idx here, because we don't know if
@@ -61,7 +80,7 @@ static int i2c_slave_eeprom_slave_cb(struct i2c_client *client,
 
        case I2C_SLAVE_STOP:
        case I2C_SLAVE_WRITE_REQUESTED:
-               eeprom->first_write = true;
+               eeprom->idx_write_cnt = 0;
                break;
 
        default:
@@ -105,13 +124,17 @@ static int i2c_slave_eeprom_probe(struct i2c_client *client, const struct i2c_de
 {
        struct eeprom_data *eeprom;
        int ret;
-       unsigned size = id->driver_data;
+       unsigned int size = FIELD_GET(I2C_SLAVE_BYTELEN, id->driver_data);
+       unsigned int flag_addr16 = FIELD_GET(I2C_SLAVE_FLAG_ADDR16, id->driver_data);
 
        eeprom = devm_kzalloc(&client->dev, sizeof(struct eeprom_data) + size, GFP_KERNEL);
        if (!eeprom)
                return -ENOMEM;
 
-       eeprom->first_write = true;
+       eeprom->idx_write_cnt = 0;
+       eeprom->num_address_bytes = flag_addr16 ? 2 : 1;
+       eeprom->address_mask = size - 1;
+       eeprom->read_only = FIELD_GET(I2C_SLAVE_FLAG_RO, id->driver_data);
        spin_lock_init(&eeprom->buffer_lock);
        i2c_set_clientdata(client, eeprom);
 
@@ -146,7 +169,12 @@ static int i2c_slave_eeprom_remove(struct i2c_client *client)
 }
 
 static const struct i2c_device_id i2c_slave_eeprom_id[] = {
-       { "slave-24c02", 2048 / 8 },
+       { "slave-24c02", I2C_SLAVE_DEVICE_MAGIC(2048 / 8,  0) },
+       { "slave-24c02ro", I2C_SLAVE_DEVICE_MAGIC(2048 / 8,  I2C_SLAVE_FLAG_RO) },
+       { "slave-24c32", I2C_SLAVE_DEVICE_MAGIC(32768 / 8, I2C_SLAVE_FLAG_ADDR16) },
+       { "slave-24c32ro", I2C_SLAVE_DEVICE_MAGIC(32768 / 8, I2C_SLAVE_FLAG_ADDR16 | I2C_SLAVE_FLAG_RO) },
+       { "slave-24c64", I2C_SLAVE_DEVICE_MAGIC(65536 / 8, I2C_SLAVE_FLAG_ADDR16) },
+       { "slave-24c64ro", I2C_SLAVE_DEVICE_MAGIC(65536 / 8, I2C_SLAVE_FLAG_ADDR16 | I2C_SLAVE_FLAG_RO) },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, i2c_slave_eeprom_id);
index 055227c..67b8817 100644 (file)
@@ -474,12 +474,17 @@ static int adxl372_configure_fifo(struct adxl372_state *st)
        if (ret < 0)
                return ret;
 
-       fifo_samples = st->watermark & 0xFF;
+       /*
+        * watermark stores the number of sets; we need to write the FIFO
+        * registers with the number of samples
+        */
+       fifo_samples = (st->watermark * st->fifo_set_size);
        fifo_ctl = ADXL372_FIFO_CTL_FORMAT_MODE(st->fifo_format) |
                   ADXL372_FIFO_CTL_MODE_MODE(st->fifo_mode) |
-                  ADXL372_FIFO_CTL_SAMPLES_MODE(st->watermark);
+                  ADXL372_FIFO_CTL_SAMPLES_MODE(fifo_samples);
 
-       ret = regmap_write(st->regmap, ADXL372_FIFO_SAMPLES, fifo_samples);
+       ret = regmap_write(st->regmap,
+                          ADXL372_FIFO_SAMPLES, fifo_samples & 0xFF);
        if (ret < 0)
                return ret;
 
@@ -548,8 +553,7 @@ static irqreturn_t adxl372_trigger_handler(int irq, void  *p)
                        goto err;
 
                /* Each sample is 2 bytes */
-               for (i = 0; i < fifo_entries * sizeof(u16);
-                    i += st->fifo_set_size * sizeof(u16))
+               for (i = 0; i < fifo_entries; i += st->fifo_set_size)
                        iio_push_to_buffers(indio_dev, &st->fifo_buf[i]);
        }
 err:
@@ -571,6 +575,14 @@ static int adxl372_setup(struct adxl372_state *st)
                return -ENODEV;
        }
 
+       /*
+        * Perform a software reset to make sure the device is in a consistent
+        * state after start up.
+        */
+       ret = regmap_write(st->regmap, ADXL372_RESET, ADXL372_RESET_CODE);
+       if (ret < 0)
+               return ret;
+
        ret = adxl372_set_op_mode(st, ADXL372_STANDBY);
        if (ret < 0)
                return ret;
index cf6c0e3..121b4e8 100644 (file)
 #define BMC150_ACCEL_SLEEP_1_SEC               0x0F
 
 #define BMC150_ACCEL_REG_TEMP                  0x08
-#define BMC150_ACCEL_TEMP_CENTER_VAL           24
+#define BMC150_ACCEL_TEMP_CENTER_VAL           23
 
 #define BMC150_ACCEL_AXIS_TO_REG(axis) (BMC150_ACCEL_REG_XOUT_L + (axis * 2))
 #define BMC150_AUTO_SUSPEND_DELAY_MS           2000
index 5a3ca59..f658012 100644 (file)
@@ -810,10 +810,10 @@ static int ad799x_probe(struct i2c_client *client,
 
        ret = ad799x_write_config(st, st->chip_config->default_config);
        if (ret < 0)
-               goto error_disable_reg;
+               goto error_disable_vref;
        ret = ad799x_read_config(st);
        if (ret < 0)
-               goto error_disable_reg;
+               goto error_disable_vref;
        st->config = ret;
 
        ret = iio_triggered_buffer_setup(indio_dev, NULL,
index adc9cf7..8ea2aed 100644 (file)
@@ -7,6 +7,7 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
+#include <linux/dmi.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
 #define AXP288_ADC_EN_MASK                             0xF0
 #define AXP288_ADC_TS_ENABLE                           0x01
 
+#define AXP288_ADC_TS_BIAS_MASK                                GENMASK(5, 4)
+#define AXP288_ADC_TS_BIAS_20UA                                (0 << 4)
+#define AXP288_ADC_TS_BIAS_40UA                                (1 << 4)
+#define AXP288_ADC_TS_BIAS_60UA                                (2 << 4)
+#define AXP288_ADC_TS_BIAS_80UA                                (3 << 4)
 #define AXP288_ADC_TS_CURRENT_ON_OFF_MASK              GENMASK(1, 0)
 #define AXP288_ADC_TS_CURRENT_OFF                      (0 << 0)
 #define AXP288_ADC_TS_CURRENT_ON_WHEN_CHARGING         (1 << 0)
@@ -177,10 +183,36 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev,
        return ret;
 }
 
+/*
+ * We rely on the machine's firmware to correctly setup the TS pin bias current
+ * at boot. This lists systems with broken fw where we need to set it ourselves.
+ */
+static const struct dmi_system_id axp288_adc_ts_bias_override[] = {
+       {
+               /* Lenovo Ideapad 100S (11 inch) */
+               .matches = {
+                 DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                 DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 100S-11IBY"),
+               },
+               .driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA,
+       },
+       {}
+};
+
 static int axp288_adc_initialize(struct axp288_adc_info *info)
 {
+       const struct dmi_system_id *bias_override;
        int ret, adc_enable_val;
 
+       bias_override = dmi_first_match(axp288_adc_ts_bias_override);
+       if (bias_override) {
+               ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL,
+                                        AXP288_ADC_TS_BIAS_MASK,
+                                        (uintptr_t)bias_override->driver_data);
+               if (ret)
+                       return ret;
+       }
+
        /*
         * Determine if the TS pin is enabled and set the TS current-source
         * accordingly.
index 88c7fe1..62e6c8b 100644 (file)
@@ -100,14 +100,14 @@ struct hx711_data {
 
 static int hx711_cycle(struct hx711_data *hx711_data)
 {
-       int val;
+       unsigned long flags;
 
        /*
         * if preempted for more then 60us while PD_SCK is high:
         * hx711 is going in reset
         * ==> measuring is false
         */
-       preempt_disable();
+       local_irq_save(flags);
        gpiod_set_value(hx711_data->gpiod_pd_sck, 1);
 
        /*
@@ -117,7 +117,6 @@ static int hx711_cycle(struct hx711_data *hx711_data)
         */
        ndelay(hx711_data->data_ready_delay_ns);
 
-       val = gpiod_get_value(hx711_data->gpiod_dout);
        /*
         * here we are not waiting for 0.2 us as suggested by the datasheet,
         * because the oscilloscope showed in a test scenario
@@ -125,7 +124,7 @@ static int hx711_cycle(struct hx711_data *hx711_data)
         * and 0.56 us for PD_SCK low on TI Sitara with 800 MHz
         */
        gpiod_set_value(hx711_data->gpiod_pd_sck, 0);
-       preempt_enable();
+       local_irq_restore(flags);
 
        /*
         * make it a square wave for addressing cases with capacitance on
@@ -133,7 +132,8 @@ static int hx711_cycle(struct hx711_data *hx711_data)
         */
        ndelay(hx711_data->data_ready_delay_ns);
 
-       return val;
+       /* sample as late as possible */
+       return gpiod_get_value(hx711_data->gpiod_dout);
 }
 
 static int hx711_read(struct hx711_data *hx711_data)
index 7b28d04..7b27306 100644 (file)
@@ -1219,6 +1219,11 @@ static int meson_sar_adc_probe(struct platform_device *pdev)
        if (IS_ERR(base))
                return PTR_ERR(base);
 
+       priv->regmap = devm_regmap_init_mmio(&pdev->dev, base,
+                                            priv->param->regmap_config);
+       if (IS_ERR(priv->regmap))
+               return PTR_ERR(priv->regmap);
+
        irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
        if (!irq)
                return -EINVAL;
@@ -1228,11 +1233,6 @@ static int meson_sar_adc_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       priv->regmap = devm_regmap_init_mmio(&pdev->dev, base,
-                                            priv->param->regmap_config);
-       if (IS_ERR(priv->regmap))
-               return PTR_ERR(priv->regmap);
-
        priv->clkin = devm_clk_get(&pdev->dev, "clkin");
        if (IS_ERR(priv->clkin)) {
                dev_err(&pdev->dev, "failed to get clkin\n");
index 9b85fef..93a096a 100644 (file)
 
 #include "stm32-adc-core.h"
 
-/* STM32F4 - common registers for all ADC instances: 1, 2 & 3 */
-#define STM32F4_ADC_CSR                        (STM32_ADCX_COMN_OFFSET + 0x00)
-#define STM32F4_ADC_CCR                        (STM32_ADCX_COMN_OFFSET + 0x04)
-
-/* STM32F4_ADC_CSR - bit fields */
-#define STM32F4_EOC3                   BIT(17)
-#define STM32F4_EOC2                   BIT(9)
-#define STM32F4_EOC1                   BIT(1)
-
-/* STM32F4_ADC_CCR - bit fields */
-#define STM32F4_ADC_ADCPRE_SHIFT       16
-#define STM32F4_ADC_ADCPRE_MASK                GENMASK(17, 16)
-
-/* STM32H7 - common registers for all ADC instances */
-#define STM32H7_ADC_CSR                        (STM32_ADCX_COMN_OFFSET + 0x00)
-#define STM32H7_ADC_CCR                        (STM32_ADCX_COMN_OFFSET + 0x08)
-
-/* STM32H7_ADC_CSR - bit fields */
-#define STM32H7_EOC_SLV                        BIT(18)
-#define STM32H7_EOC_MST                        BIT(2)
-
-/* STM32H7_ADC_CCR - bit fields */
-#define STM32H7_PRESC_SHIFT            18
-#define STM32H7_PRESC_MASK             GENMASK(21, 18)
-#define STM32H7_CKMODE_SHIFT           16
-#define STM32H7_CKMODE_MASK            GENMASK(17, 16)
-
 #define STM32_ADC_CORE_SLEEP_DELAY_MS  2000
 
 /* SYSCFG registers */
@@ -71,6 +44,8 @@
  * @eoc1:      adc1 end of conversion flag in @csr
  * @eoc2:      adc2 end of conversion flag in @csr
  * @eoc3:      adc3 end of conversion flag in @csr
+ * @ier:       interrupt enable register offset for each adc
+ * @eocie_msk: end of conversion interrupt enable mask in @ier
  */
 struct stm32_adc_common_regs {
        u32 csr;
@@ -78,6 +53,8 @@ struct stm32_adc_common_regs {
        u32 eoc1_msk;
        u32 eoc2_msk;
        u32 eoc3_msk;
+       u32 ier;
+       u32 eocie_msk;
 };
 
 struct stm32_adc_priv;
@@ -303,6 +280,8 @@ static const struct stm32_adc_common_regs stm32f4_adc_common_regs = {
        .eoc1_msk = STM32F4_EOC1,
        .eoc2_msk = STM32F4_EOC2,
        .eoc3_msk = STM32F4_EOC3,
+       .ier = STM32F4_ADC_CR1,
+       .eocie_msk = STM32F4_EOCIE,
 };
 
 /* STM32H7 common registers definitions */
@@ -311,8 +290,24 @@ static const struct stm32_adc_common_regs stm32h7_adc_common_regs = {
        .ccr = STM32H7_ADC_CCR,
        .eoc1_msk = STM32H7_EOC_MST,
        .eoc2_msk = STM32H7_EOC_SLV,
+       .ier = STM32H7_ADC_IER,
+       .eocie_msk = STM32H7_EOCIE,
+};
+
+static const unsigned int stm32_adc_offset[STM32_ADC_MAX_ADCS] = {
+       0, STM32_ADC_OFFSET, STM32_ADC_OFFSET * 2,
 };
 
+static unsigned int stm32_adc_eoc_enabled(struct stm32_adc_priv *priv,
+                                         unsigned int adc)
+{
+       u32 ier, offset = stm32_adc_offset[adc];
+
+       ier = readl_relaxed(priv->common.base + offset + priv->cfg->regs->ier);
+
+       return ier & priv->cfg->regs->eocie_msk;
+}
+
 /* ADC common interrupt for all instances */
 static void stm32_adc_irq_handler(struct irq_desc *desc)
 {
@@ -323,13 +318,28 @@ static void stm32_adc_irq_handler(struct irq_desc *desc)
        chained_irq_enter(chip, desc);
        status = readl_relaxed(priv->common.base + priv->cfg->regs->csr);
 
-       if (status & priv->cfg->regs->eoc1_msk)
+       /*
+        * End of conversion may be handled by using IRQ or DMA. There may be a
+        * race here when two conversions complete at the same time on several
+        * ADCs. EOC may be read 'set' for several ADCs, with:
+        * - an ADC configured to use DMA (EOC triggers the DMA request, and
+        *   is then automatically cleared by DR read in hardware)
+        * - an ADC configured to use IRQs (EOCIE bit is set. The handler must
+        *   be called in this case)
+        * So both EOC status bit in CSR and EOCIE control bit must be checked
+        * before invoking the interrupt handler (e.g. call ISR only for
+        * IRQ-enabled ADCs).
+        */
+       if (status & priv->cfg->regs->eoc1_msk &&
+           stm32_adc_eoc_enabled(priv, 0))
                generic_handle_irq(irq_find_mapping(priv->domain, 0));
 
-       if (status & priv->cfg->regs->eoc2_msk)
+       if (status & priv->cfg->regs->eoc2_msk &&
+           stm32_adc_eoc_enabled(priv, 1))
                generic_handle_irq(irq_find_mapping(priv->domain, 1));
 
-       if (status & priv->cfg->regs->eoc3_msk)
+       if (status & priv->cfg->regs->eoc3_msk &&
+           stm32_adc_eoc_enabled(priv, 2))
                generic_handle_irq(irq_find_mapping(priv->domain, 2));
 
        chained_irq_exit(chip, desc);
index 8af507b..2579d51 100644 (file)
  * --------------------------------------------------------
  */
 #define STM32_ADC_MAX_ADCS             3
+#define STM32_ADC_OFFSET               0x100
 #define STM32_ADCX_COMN_OFFSET         0x300
 
+/* STM32F4 - Registers for each ADC instance */
+#define STM32F4_ADC_SR                 0x00
+#define STM32F4_ADC_CR1                        0x04
+#define STM32F4_ADC_CR2                        0x08
+#define STM32F4_ADC_SMPR1              0x0C
+#define STM32F4_ADC_SMPR2              0x10
+#define STM32F4_ADC_HTR                        0x24
+#define STM32F4_ADC_LTR                        0x28
+#define STM32F4_ADC_SQR1               0x2C
+#define STM32F4_ADC_SQR2               0x30
+#define STM32F4_ADC_SQR3               0x34
+#define STM32F4_ADC_JSQR               0x38
+#define STM32F4_ADC_JDR1               0x3C
+#define STM32F4_ADC_JDR2               0x40
+#define STM32F4_ADC_JDR3               0x44
+#define STM32F4_ADC_JDR4               0x48
+#define STM32F4_ADC_DR                 0x4C
+
+/* STM32F4 - common registers for all ADC instances: 1, 2 & 3 */
+#define STM32F4_ADC_CSR                        (STM32_ADCX_COMN_OFFSET + 0x00)
+#define STM32F4_ADC_CCR                        (STM32_ADCX_COMN_OFFSET + 0x04)
+
+/* STM32F4_ADC_SR - bit fields */
+#define STM32F4_STRT                   BIT(4)
+#define STM32F4_EOC                    BIT(1)
+
+/* STM32F4_ADC_CR1 - bit fields */
+#define STM32F4_RES_SHIFT              24
+#define STM32F4_RES_MASK               GENMASK(25, 24)
+#define STM32F4_SCAN                   BIT(8)
+#define STM32F4_EOCIE                  BIT(5)
+
+/* STM32F4_ADC_CR2 - bit fields */
+#define STM32F4_SWSTART                        BIT(30)
+#define STM32F4_EXTEN_SHIFT            28
+#define STM32F4_EXTEN_MASK             GENMASK(29, 28)
+#define STM32F4_EXTSEL_SHIFT           24
+#define STM32F4_EXTSEL_MASK            GENMASK(27, 24)
+#define STM32F4_EOCS                   BIT(10)
+#define STM32F4_DDS                    BIT(9)
+#define STM32F4_DMA                    BIT(8)
+#define STM32F4_ADON                   BIT(0)
+
+/* STM32F4_ADC_CSR - bit fields */
+#define STM32F4_EOC3                   BIT(17)
+#define STM32F4_EOC2                   BIT(9)
+#define STM32F4_EOC1                   BIT(1)
+
+/* STM32F4_ADC_CCR - bit fields */
+#define STM32F4_ADC_ADCPRE_SHIFT       16
+#define STM32F4_ADC_ADCPRE_MASK                GENMASK(17, 16)
+
+/* STM32H7 - Registers for each ADC instance */
+#define STM32H7_ADC_ISR                        0x00
+#define STM32H7_ADC_IER                        0x04
+#define STM32H7_ADC_CR                 0x08
+#define STM32H7_ADC_CFGR               0x0C
+#define STM32H7_ADC_SMPR1              0x14
+#define STM32H7_ADC_SMPR2              0x18
+#define STM32H7_ADC_PCSEL              0x1C
+#define STM32H7_ADC_SQR1               0x30
+#define STM32H7_ADC_SQR2               0x34
+#define STM32H7_ADC_SQR3               0x38
+#define STM32H7_ADC_SQR4               0x3C
+#define STM32H7_ADC_DR                 0x40
+#define STM32H7_ADC_DIFSEL             0xC0
+#define STM32H7_ADC_CALFACT            0xC4
+#define STM32H7_ADC_CALFACT2           0xC8
+
+/* STM32H7 - common registers for all ADC instances */
+#define STM32H7_ADC_CSR                        (STM32_ADCX_COMN_OFFSET + 0x00)
+#define STM32H7_ADC_CCR                        (STM32_ADCX_COMN_OFFSET + 0x08)
+
+/* STM32H7_ADC_ISR - bit fields */
+#define STM32MP1_VREGREADY             BIT(12)
+#define STM32H7_EOC                    BIT(2)
+#define STM32H7_ADRDY                  BIT(0)
+
+/* STM32H7_ADC_IER - bit fields */
+#define STM32H7_EOCIE                  STM32H7_EOC
+
+/* STM32H7_ADC_CR - bit fields */
+#define STM32H7_ADCAL                  BIT(31)
+#define STM32H7_ADCALDIF               BIT(30)
+#define STM32H7_DEEPPWD                        BIT(29)
+#define STM32H7_ADVREGEN               BIT(28)
+#define STM32H7_LINCALRDYW6            BIT(27)
+#define STM32H7_LINCALRDYW5            BIT(26)
+#define STM32H7_LINCALRDYW4            BIT(25)
+#define STM32H7_LINCALRDYW3            BIT(24)
+#define STM32H7_LINCALRDYW2            BIT(23)
+#define STM32H7_LINCALRDYW1            BIT(22)
+#define STM32H7_ADCALLIN               BIT(16)
+#define STM32H7_BOOST                  BIT(8)
+#define STM32H7_ADSTP                  BIT(4)
+#define STM32H7_ADSTART                        BIT(2)
+#define STM32H7_ADDIS                  BIT(1)
+#define STM32H7_ADEN                   BIT(0)
+
+/* STM32H7_ADC_CFGR bit fields */
+#define STM32H7_EXTEN_SHIFT            10
+#define STM32H7_EXTEN_MASK             GENMASK(11, 10)
+#define STM32H7_EXTSEL_SHIFT           5
+#define STM32H7_EXTSEL_MASK            GENMASK(9, 5)
+#define STM32H7_RES_SHIFT              2
+#define STM32H7_RES_MASK               GENMASK(4, 2)
+#define STM32H7_DMNGT_SHIFT            0
+#define STM32H7_DMNGT_MASK             GENMASK(1, 0)
+
+enum stm32h7_adc_dmngt {
+       STM32H7_DMNGT_DR_ONLY,          /* Regular data in DR only */
+       STM32H7_DMNGT_DMA_ONESHOT,      /* DMA one shot mode */
+       STM32H7_DMNGT_DFSDM,            /* DFSDM mode */
+       STM32H7_DMNGT_DMA_CIRC,         /* DMA circular mode */
+};
+
+/* STM32H7_ADC_CALFACT - bit fields */
+#define STM32H7_CALFACT_D_SHIFT                16
+#define STM32H7_CALFACT_D_MASK         GENMASK(26, 16)
+#define STM32H7_CALFACT_S_SHIFT                0
+#define STM32H7_CALFACT_S_MASK         GENMASK(10, 0)
+
+/* STM32H7_ADC_CALFACT2 - bit fields */
+#define STM32H7_LINCALFACT_SHIFT       0
+#define STM32H7_LINCALFACT_MASK                GENMASK(29, 0)
+
+/* STM32H7_ADC_CSR - bit fields */
+#define STM32H7_EOC_SLV                        BIT(18)
+#define STM32H7_EOC_MST                        BIT(2)
+
+/* STM32H7_ADC_CCR - bit fields */
+#define STM32H7_PRESC_SHIFT            18
+#define STM32H7_PRESC_MASK             GENMASK(21, 18)
+#define STM32H7_CKMODE_SHIFT           16
+#define STM32H7_CKMODE_MASK            GENMASK(17, 16)
+
 /**
  * struct stm32_adc_common - stm32 ADC driver common data (for all instances)
  * @base:              control registers base cpu addr
index 6a7dd08..663f8a5 100644 (file)
 
 #include "stm32-adc-core.h"
 
-/* STM32F4 - Registers for each ADC instance */
-#define STM32F4_ADC_SR                 0x00
-#define STM32F4_ADC_CR1                        0x04
-#define STM32F4_ADC_CR2                        0x08
-#define STM32F4_ADC_SMPR1              0x0C
-#define STM32F4_ADC_SMPR2              0x10
-#define STM32F4_ADC_HTR                        0x24
-#define STM32F4_ADC_LTR                        0x28
-#define STM32F4_ADC_SQR1               0x2C
-#define STM32F4_ADC_SQR2               0x30
-#define STM32F4_ADC_SQR3               0x34
-#define STM32F4_ADC_JSQR               0x38
-#define STM32F4_ADC_JDR1               0x3C
-#define STM32F4_ADC_JDR2               0x40
-#define STM32F4_ADC_JDR3               0x44
-#define STM32F4_ADC_JDR4               0x48
-#define STM32F4_ADC_DR                 0x4C
-
-/* STM32F4_ADC_SR - bit fields */
-#define STM32F4_STRT                   BIT(4)
-#define STM32F4_EOC                    BIT(1)
-
-/* STM32F4_ADC_CR1 - bit fields */
-#define STM32F4_RES_SHIFT              24
-#define STM32F4_RES_MASK               GENMASK(25, 24)
-#define STM32F4_SCAN                   BIT(8)
-#define STM32F4_EOCIE                  BIT(5)
-
-/* STM32F4_ADC_CR2 - bit fields */
-#define STM32F4_SWSTART                        BIT(30)
-#define STM32F4_EXTEN_SHIFT            28
-#define STM32F4_EXTEN_MASK             GENMASK(29, 28)
-#define STM32F4_EXTSEL_SHIFT           24
-#define STM32F4_EXTSEL_MASK            GENMASK(27, 24)
-#define STM32F4_EOCS                   BIT(10)
-#define STM32F4_DDS                    BIT(9)
-#define STM32F4_DMA                    BIT(8)
-#define STM32F4_ADON                   BIT(0)
-
-/* STM32H7 - Registers for each ADC instance */
-#define STM32H7_ADC_ISR                        0x00
-#define STM32H7_ADC_IER                        0x04
-#define STM32H7_ADC_CR                 0x08
-#define STM32H7_ADC_CFGR               0x0C
-#define STM32H7_ADC_SMPR1              0x14
-#define STM32H7_ADC_SMPR2              0x18
-#define STM32H7_ADC_PCSEL              0x1C
-#define STM32H7_ADC_SQR1               0x30
-#define STM32H7_ADC_SQR2               0x34
-#define STM32H7_ADC_SQR3               0x38
-#define STM32H7_ADC_SQR4               0x3C
-#define STM32H7_ADC_DR                 0x40
-#define STM32H7_ADC_DIFSEL             0xC0
-#define STM32H7_ADC_CALFACT            0xC4
-#define STM32H7_ADC_CALFACT2           0xC8
-
-/* STM32H7_ADC_ISR - bit fields */
-#define STM32MP1_VREGREADY             BIT(12)
-#define STM32H7_EOC                    BIT(2)
-#define STM32H7_ADRDY                  BIT(0)
-
-/* STM32H7_ADC_IER - bit fields */
-#define STM32H7_EOCIE                  STM32H7_EOC
-
-/* STM32H7_ADC_CR - bit fields */
-#define STM32H7_ADCAL                  BIT(31)
-#define STM32H7_ADCALDIF               BIT(30)
-#define STM32H7_DEEPPWD                        BIT(29)
-#define STM32H7_ADVREGEN               BIT(28)
-#define STM32H7_LINCALRDYW6            BIT(27)
-#define STM32H7_LINCALRDYW5            BIT(26)
-#define STM32H7_LINCALRDYW4            BIT(25)
-#define STM32H7_LINCALRDYW3            BIT(24)
-#define STM32H7_LINCALRDYW2            BIT(23)
-#define STM32H7_LINCALRDYW1            BIT(22)
-#define STM32H7_ADCALLIN               BIT(16)
-#define STM32H7_BOOST                  BIT(8)
-#define STM32H7_ADSTP                  BIT(4)
-#define STM32H7_ADSTART                        BIT(2)
-#define STM32H7_ADDIS                  BIT(1)
-#define STM32H7_ADEN                   BIT(0)
-
-/* STM32H7_ADC_CFGR bit fields */
-#define STM32H7_EXTEN_SHIFT            10
-#define STM32H7_EXTEN_MASK             GENMASK(11, 10)
-#define STM32H7_EXTSEL_SHIFT           5
-#define STM32H7_EXTSEL_MASK            GENMASK(9, 5)
-#define STM32H7_RES_SHIFT              2
-#define STM32H7_RES_MASK               GENMASK(4, 2)
-#define STM32H7_DMNGT_SHIFT            0
-#define STM32H7_DMNGT_MASK             GENMASK(1, 0)
-
-enum stm32h7_adc_dmngt {
-       STM32H7_DMNGT_DR_ONLY,          /* Regular data in DR only */
-       STM32H7_DMNGT_DMA_ONESHOT,      /* DMA one shot mode */
-       STM32H7_DMNGT_DFSDM,            /* DFSDM mode */
-       STM32H7_DMNGT_DMA_CIRC,         /* DMA circular mode */
-};
-
-/* STM32H7_ADC_CALFACT - bit fields */
-#define STM32H7_CALFACT_D_SHIFT                16
-#define STM32H7_CALFACT_D_MASK         GENMASK(26, 16)
-#define STM32H7_CALFACT_S_SHIFT                0
-#define STM32H7_CALFACT_S_MASK         GENMASK(10, 0)
-
-/* STM32H7_ADC_CALFACT2 - bit fields */
-#define STM32H7_LINCALFACT_SHIFT       0
-#define STM32H7_LINCALFACT_MASK                GENMASK(29, 0)
-
 /* Number of linear calibration shadow registers / LINCALRDYW control bits */
 #define STM32H7_LINCALFACT_NUM         6
 
index 9ac8356..4998a89 100644 (file)
@@ -35,8 +35,11 @@ static int adis_update_scan_mode_burst(struct iio_dev *indio_dev,
                return -ENOMEM;
 
        adis->buffer = kzalloc(burst_length + sizeof(u16), GFP_KERNEL);
-       if (!adis->buffer)
+       if (!adis->buffer) {
+               kfree(adis->xfer);
+               adis->xfer = NULL;
                return -ENOMEM;
+       }
 
        tx = adis->buffer + burst_length;
        tx[0] = ADIS_READ_REG(adis->burst->reg_cmd);
@@ -78,8 +81,11 @@ int adis_update_scan_mode(struct iio_dev *indio_dev,
                return -ENOMEM;
 
        adis->buffer = kcalloc(indio_dev->scan_bytes, 2, GFP_KERNEL);
-       if (!adis->buffer)
+       if (!adis->buffer) {
+               kfree(adis->xfer);
+               adis->xfer = NULL;
                return -ENOMEM;
+       }
 
        rx = adis->buffer;
        tx = rx + scan_count;
index 80e42c7..0fe6999 100644 (file)
@@ -99,7 +99,9 @@ struct st_lsm6dsx_fs {
 #define ST_LSM6DSX_FS_LIST_SIZE                4
 struct st_lsm6dsx_fs_table_entry {
        struct st_lsm6dsx_reg reg;
+
        struct st_lsm6dsx_fs fs_avl[ST_LSM6DSX_FS_LIST_SIZE];
+       int fs_len;
 };
 
 /**
index 2d34955..fd5ebe1 100644 (file)
@@ -145,6 +145,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
                                .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 },
                                .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 },
                                .fs_avl[3] = { IIO_G_TO_M_S_2(732), 0x1 },
+                               .fs_len = 4,
                        },
                        [ST_LSM6DSX_ID_GYRO] = {
                                .reg = {
@@ -154,6 +155,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
                                .fs_avl[0] = {  IIO_DEGREE_TO_RAD(245), 0x0 },
                                .fs_avl[1] = {  IIO_DEGREE_TO_RAD(500), 0x1 },
                                .fs_avl[2] = { IIO_DEGREE_TO_RAD(2000), 0x3 },
+                               .fs_len = 3,
                        },
                },
        },
@@ -215,6 +217,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
                                .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 },
                                .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 },
                                .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 },
+                               .fs_len = 4,
                        },
                        [ST_LSM6DSX_ID_GYRO] = {
                                .reg = {
@@ -225,6 +228,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
                                .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 },
                                .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 },
                                .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 },
+                               .fs_len = 4,
                        },
                },
                .decimator = {
@@ -327,6 +331,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
                                .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 },
                                .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 },
                                .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 },
+                               .fs_len = 4,
                        },
                        [ST_LSM6DSX_ID_GYRO] = {
                                .reg = {
@@ -337,6 +342,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
                                .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 },
                                .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 },
                                .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 },
+                               .fs_len = 4,
                        },
                },
                .decimator = {
@@ -448,6 +454,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
                                .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 },
                                .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 },
                                .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 },
+                               .fs_len = 4,
                        },
                        [ST_LSM6DSX_ID_GYRO] = {
                                .reg = {
@@ -458,6 +465,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
                                .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 },
                                .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 },
                                .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 },
+                               .fs_len = 4,
                        },
                },
                .decimator = {
@@ -563,6 +571,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
                                .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 },
                                .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 },
                                .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 },
+                               .fs_len = 4,
                        },
                        [ST_LSM6DSX_ID_GYRO] = {
                                .reg = {
@@ -573,6 +582,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
                                .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 },
                                .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 },
                                .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 },
+                               .fs_len = 4,
                        },
                },
                .batch = {
@@ -693,6 +703,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
                                .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 },
                                .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 },
                                .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 },
+                               .fs_len = 4,
                        },
                        [ST_LSM6DSX_ID_GYRO] = {
                                .reg = {
@@ -703,6 +714,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
                                .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 },
                                .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 },
                                .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 },
+                               .fs_len = 4,
                        },
                },
                .batch = {
@@ -800,6 +812,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
                                .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 },
                                .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 },
                                .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 },
+                               .fs_len = 4,
                        },
                        [ST_LSM6DSX_ID_GYRO] = {
                                .reg = {
@@ -810,6 +823,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
                                .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 },
                                .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 },
                                .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 },
+                               .fs_len = 4,
                        },
                },
                .batch = {
@@ -933,11 +947,12 @@ static int st_lsm6dsx_set_full_scale(struct st_lsm6dsx_sensor *sensor,
        int i, err;
 
        fs_table = &sensor->hw->settings->fs_table[sensor->id];
-       for (i = 0; i < ST_LSM6DSX_FS_LIST_SIZE; i++)
+       for (i = 0; i < fs_table->fs_len; i++) {
                if (fs_table->fs_avl[i].gain == gain)
                        break;
+       }
 
-       if (i == ST_LSM6DSX_FS_LIST_SIZE)
+       if (i == fs_table->fs_len)
                return -EINVAL;
 
        data = ST_LSM6DSX_SHIFT_VAL(fs_table->fs_avl[i].val,
@@ -1196,18 +1211,13 @@ static ssize_t st_lsm6dsx_sysfs_scale_avail(struct device *dev,
 {
        struct st_lsm6dsx_sensor *sensor = iio_priv(dev_get_drvdata(dev));
        const struct st_lsm6dsx_fs_table_entry *fs_table;
-       enum st_lsm6dsx_sensor_id id = sensor->id;
        struct st_lsm6dsx_hw *hw = sensor->hw;
        int i, len = 0;
 
-       fs_table = &hw->settings->fs_table[id];
-       for (i = 0; i < ST_LSM6DSX_FS_LIST_SIZE; i++) {
-               if (!fs_table->fs_avl[i].gain)
-                       break;
-
+       fs_table = &hw->settings->fs_table[sensor->id];
+       for (i = 0; i < fs_table->fs_len; i++)
                len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ",
                                 fs_table->fs_avl[i].gain);
-       }
        buf[len - 1] = '\n';
 
        return len;
index 66fbcd9..ea472cf 100644 (file)
@@ -61,6 +61,7 @@ static const struct st_lsm6dsx_ext_dev_settings st_lsm6dsx_ext_dev_table[] = {
                                .gain = 1500,
                                .val = 0x0,
                        }, /* 1500 uG/LSB */
+                       .fs_len = 1,
                },
                .temp_comp = {
                        .addr = 0x60,
@@ -92,9 +93,11 @@ static const struct st_lsm6dsx_ext_dev_settings st_lsm6dsx_ext_dev_table[] = {
 static void st_lsm6dsx_shub_wait_complete(struct st_lsm6dsx_hw *hw)
 {
        struct st_lsm6dsx_sensor *sensor;
+       u16 odr;
 
        sensor = iio_priv(hw->iio_devs[ST_LSM6DSX_ID_ACC]);
-       msleep((2000U / sensor->odr) + 1);
+       odr = (hw->enable_mask & BIT(ST_LSM6DSX_ID_ACC)) ? sensor->odr : 13;
+       msleep((2000U / odr) + 1);
 }
 
 /**
@@ -555,13 +558,9 @@ static ssize_t st_lsm6dsx_shub_scale_avail(struct device *dev,
        int i, len = 0;
 
        settings = sensor->ext_info.settings;
-       for (i = 0; i < ST_LSM6DSX_FS_LIST_SIZE; i++) {
-               u16 val = settings->fs_table.fs_avl[i].gain;
-
-               if (val > 0)
-                       len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ",
-                                        val);
-       }
+       for (i = 0; i < settings->fs_table.fs_len; i++)
+               len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ",
+                                settings->fs_table.fs_avl[i].gain);
        buf[len - 1] = '\n';
 
        return len;
index 08d7e1e..4a1a883 100644 (file)
@@ -314,6 +314,7 @@ config MAX44009
 config NOA1305
        tristate "ON Semiconductor NOA1305 ambient light sensor"
        depends on I2C
+       select REGMAP_I2C
        help
         Say Y here if you want to build support for the ON Semiconductor
         NOA1305 ambient light sensor.
index e666879..92004a2 100644 (file)
@@ -686,6 +686,7 @@ static irqreturn_t opt3001_irq(int irq, void *_iio)
        struct iio_dev *iio = _iio;
        struct opt3001 *opt = iio_priv(iio);
        int ret;
+       bool wake_result_ready_queue = false;
 
        if (!opt->ok_to_ignore_lock)
                mutex_lock(&opt->lock);
@@ -720,13 +721,16 @@ static irqreturn_t opt3001_irq(int irq, void *_iio)
                }
                opt->result = ret;
                opt->result_ready = true;
-               wake_up(&opt->result_ready_queue);
+               wake_result_ready_queue = true;
        }
 
 out:
        if (!opt->ok_to_ignore_lock)
                mutex_unlock(&opt->lock);
 
+       if (wake_result_ready_queue)
+               wake_up(&opt->result_ready_queue);
+
        return IRQ_HANDLED;
 }
 
index 51421ac..16dacea 100644 (file)
@@ -398,19 +398,23 @@ static int vcnl4000_probe(struct i2c_client *client,
 static const struct of_device_id vcnl_4000_of_match[] = {
        {
                .compatible = "vishay,vcnl4000",
-               .data = "VCNL4000",
+               .data = (void *)VCNL4000,
        },
        {
                .compatible = "vishay,vcnl4010",
-               .data = "VCNL4010",
+               .data = (void *)VCNL4010,
        },
        {
-               .compatible = "vishay,vcnl4010",
-               .data = "VCNL4020",
+               .compatible = "vishay,vcnl4020",
+               .data = (void *)VCNL4010,
+       },
+       {
+               .compatible = "vishay,vcnl4040",
+               .data = (void *)VCNL4040,
        },
        {
                .compatible = "vishay,vcnl4200",
-               .data = "VCNL4200",
+               .data = (void *)VCNL4200,
        },
        {},
 };
index 1dd467b..6d7ec37 100644 (file)
@@ -352,7 +352,7 @@ static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
 
        if (family == AF_INET) {
                rt = container_of(dst, struct rtable, dst);
-               return rt->rt_gw_family == AF_INET;
+               return rt->rt_uses_gateway;
        }
 
        rt6 = container_of(dst, struct rt6_info, dst);
index da10e6c..5920c00 100644 (file)
@@ -4399,6 +4399,7 @@ error2:
 error1:
        port_modify.set_port_cap_mask = 0;
        port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
+       kfree(port);
        while (--i) {
                if (!rdma_cap_ib_cm(ib_device, i))
                        continue;
@@ -4407,6 +4408,7 @@ error1:
                ib_modify_port(ib_device, port->port_num, 0, &port_modify);
                ib_unregister_mad_agent(port->mad_agent);
                cm_remove_port_fs(port);
+               kfree(port);
        }
 free:
        kfree(cm_dev);
@@ -4460,6 +4462,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
                spin_unlock_irq(&cm.state_lock);
                ib_unregister_mad_agent(cur_mad_agent);
                cm_remove_port_fs(port);
+               kfree(port);
        }
 
        kfree(cm_dev);
index 0e3cf34..d78f676 100644 (file)
@@ -2396,9 +2396,10 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
                conn_id->cm_id.iw = NULL;
                cma_exch(conn_id, RDMA_CM_DESTROYING);
                mutex_unlock(&conn_id->handler_mutex);
+               mutex_unlock(&listen_id->handler_mutex);
                cma_deref_id(conn_id);
                rdma_destroy_id(&conn_id->id);
-               goto out;
+               return ret;
        }
 
        mutex_unlock(&conn_id->handler_mutex);
index 3a8b091..9d07378 100644 (file)
@@ -199,6 +199,7 @@ void ib_mad_cleanup(void);
 int ib_sa_init(void);
 void ib_sa_cleanup(void);
 
+void rdma_nl_init(void);
 void rdma_nl_exit(void);
 
 int ib_nl_handle_resolve_resp(struct sk_buff *skb,
index 99c4a55..50a9244 100644 (file)
@@ -1987,8 +1987,6 @@ static int iw_query_port(struct ib_device *device,
        if (!netdev)
                return -ENODEV;
 
-       dev_put(netdev);
-
        port_attr->max_mtu = IB_MTU_4096;
        port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
 
@@ -1996,19 +1994,22 @@ static int iw_query_port(struct ib_device *device,
                port_attr->state = IB_PORT_DOWN;
                port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
        } else {
-               inetdev = in_dev_get(netdev);
+               rcu_read_lock();
+               inetdev = __in_dev_get_rcu(netdev);
 
                if (inetdev && inetdev->ifa_list) {
                        port_attr->state = IB_PORT_ACTIVE;
                        port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
-                       in_dev_put(inetdev);
                } else {
                        port_attr->state = IB_PORT_INIT;
                        port_attr->phys_state =
                                IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
                }
+
+               rcu_read_unlock();
        }
 
+       dev_put(netdev);
        err = device->ops.query_port(device, port_num, port_attr);
        if (err)
                return err;
@@ -2715,6 +2716,8 @@ static int __init ib_core_init(void)
                goto err_comp_unbound;
        }
 
+       rdma_nl_init();
+
        ret = addr_init();
        if (ret) {
                pr_warn("Could't init IB address resolution\n");
index 72141c5..ade7182 100644 (file)
@@ -372,6 +372,7 @@ EXPORT_SYMBOL(iw_cm_disconnect);
 static void destroy_cm_id(struct iw_cm_id *cm_id)
 {
        struct iwcm_id_private *cm_id_priv;
+       struct ib_qp *qp;
        unsigned long flags;
 
        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
@@ -389,6 +390,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
        set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
 
        spin_lock_irqsave(&cm_id_priv->lock, flags);
+       qp = cm_id_priv->qp;
+       cm_id_priv->qp = NULL;
+
        switch (cm_id_priv->state) {
        case IW_CM_STATE_LISTEN:
                cm_id_priv->state = IW_CM_STATE_DESTROYING;
@@ -401,7 +405,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
                cm_id_priv->state = IW_CM_STATE_DESTROYING;
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
                /* Abrupt close of the connection */
-               (void)iwcm_modify_qp_err(cm_id_priv->qp);
+               (void)iwcm_modify_qp_err(qp);
                spin_lock_irqsave(&cm_id_priv->lock, flags);
                break;
        case IW_CM_STATE_IDLE:
@@ -426,11 +430,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
                BUG();
                break;
        }
-       if (cm_id_priv->qp) {
-               cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
-               cm_id_priv->qp = NULL;
-       }
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       if (qp)
+               cm_id_priv->id.device->ops.iw_rem_ref(qp);
 
        if (cm_id->mapped) {
                iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr);
@@ -671,11 +673,11 @@ int iw_cm_accept(struct iw_cm_id *cm_id,
                BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
                cm_id_priv->state = IW_CM_STATE_IDLE;
                spin_lock_irqsave(&cm_id_priv->lock, flags);
-               if (cm_id_priv->qp) {
-                       cm_id->device->ops.iw_rem_ref(qp);
-                       cm_id_priv->qp = NULL;
-               }
+               qp = cm_id_priv->qp;
+               cm_id_priv->qp = NULL;
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               if (qp)
+                       cm_id->device->ops.iw_rem_ref(qp);
                clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
                wake_up_all(&cm_id_priv->connect_wait);
        }
@@ -696,7 +698,7 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
        struct iwcm_id_private *cm_id_priv;
        int ret;
        unsigned long flags;
-       struct ib_qp *qp;
+       struct ib_qp *qp = NULL;
 
        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 
@@ -730,13 +732,13 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
                return 0;       /* success */
 
        spin_lock_irqsave(&cm_id_priv->lock, flags);
-       if (cm_id_priv->qp) {
-               cm_id->device->ops.iw_rem_ref(qp);
-               cm_id_priv->qp = NULL;
-       }
+       qp = cm_id_priv->qp;
+       cm_id_priv->qp = NULL;
        cm_id_priv->state = IW_CM_STATE_IDLE;
 err:
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       if (qp)
+               cm_id->device->ops.iw_rem_ref(qp);
        clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
        wake_up_all(&cm_id_priv->connect_wait);
        return ret;
@@ -878,6 +880,7 @@ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
 static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
                               struct iw_cm_event *iw_event)
 {
+       struct ib_qp *qp = NULL;
        unsigned long flags;
        int ret;
 
@@ -896,11 +899,13 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
                cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
        } else {
                /* REJECTED or RESET */
-               cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
+               qp = cm_id_priv->qp;
                cm_id_priv->qp = NULL;
                cm_id_priv->state = IW_CM_STATE_IDLE;
        }
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       if (qp)
+               cm_id_priv->id.device->ops.iw_rem_ref(qp);
        ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
 
        if (iw_event->private_data_len)
@@ -942,21 +947,18 @@ static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
 static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
                                  struct iw_cm_event *iw_event)
 {
+       struct ib_qp *qp;
        unsigned long flags;
-       int ret = 0;
+       int ret = 0, notify_event = 0;
        spin_lock_irqsave(&cm_id_priv->lock, flags);
+       qp = cm_id_priv->qp;
+       cm_id_priv->qp = NULL;
 
-       if (cm_id_priv->qp) {
-               cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
-               cm_id_priv->qp = NULL;
-       }
        switch (cm_id_priv->state) {
        case IW_CM_STATE_ESTABLISHED:
        case IW_CM_STATE_CLOSING:
                cm_id_priv->state = IW_CM_STATE_IDLE;
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-               ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
-               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               notify_event = 1;
                break;
        case IW_CM_STATE_DESTROYING:
                break;
@@ -965,6 +967,10 @@ static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
        }
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
+       if (qp)
+               cm_id_priv->id.device->ops.iw_rem_ref(qp);
+       if (notify_event)
+               ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
        return ret;
 }
 
index 81dbd5f..8cd31ef 100644 (file)
 #include <linux/module.h>
 #include "core_priv.h"
 
-static DEFINE_MUTEX(rdma_nl_mutex);
 static struct {
-       const struct rdma_nl_cbs   *cb_table;
+       const struct rdma_nl_cbs *cb_table;
+       /* Synchronizes between ongoing netlink commands and netlink client
+        * unregistration.
+        */
+       struct rw_semaphore sem;
 } rdma_nl_types[RDMA_NL_NUM_CLIENTS];
 
 bool rdma_nl_chk_listeners(unsigned int group)
@@ -75,70 +78,53 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op)
        return (op < max_num_ops[type]) ? true : false;
 }
 
-static bool
-is_nl_valid(const struct sk_buff *skb, unsigned int type, unsigned int op)
+static const struct rdma_nl_cbs *
+get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op)
 {
        const struct rdma_nl_cbs *cb_table;
 
-       if (!is_nl_msg_valid(type, op))
-               return false;
-
        /*
         * Currently only NLDEV client is supporting netlink commands in
         * non init_net net namespace.
         */
        if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV)
-               return false;
+               return NULL;
 
-       if (!rdma_nl_types[type].cb_table) {
-               mutex_unlock(&rdma_nl_mutex);
-               request_module("rdma-netlink-subsys-%d", type);
-               mutex_lock(&rdma_nl_mutex);
-       }
+       cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
+       if (!cb_table) {
+               /*
+                * Didn't get valid reference of the table, attempt module
+                * load once.
+                */
+               up_read(&rdma_nl_types[type].sem);
 
-       cb_table = rdma_nl_types[type].cb_table;
+               request_module("rdma-netlink-subsys-%d", type);
 
+               down_read(&rdma_nl_types[type].sem);
+               cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
+       }
        if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
-               return false;
-       return true;
+               return NULL;
+       return cb_table;
 }
 
 void rdma_nl_register(unsigned int index,
                      const struct rdma_nl_cbs cb_table[])
 {
-       mutex_lock(&rdma_nl_mutex);
-       if (!is_nl_msg_valid(index, 0)) {
-               /*
-                * All clients are not interesting in success/failure of
-                * this call. They want to see the print to error log and
-                * continue their initialization. Print warning for them,
-                * because it is programmer's error to be here.
-                */
-               mutex_unlock(&rdma_nl_mutex);
-               WARN(true,
-                    "The not-valid %u index was supplied to RDMA netlink\n",
-                    index);
+       if (WARN_ON(!is_nl_msg_valid(index, 0)) ||
+           WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table)))
                return;
-       }
-
-       if (rdma_nl_types[index].cb_table) {
-               mutex_unlock(&rdma_nl_mutex);
-               WARN(true,
-                    "The %u index is already registered in RDMA netlink\n",
-                    index);
-               return;
-       }
 
-       rdma_nl_types[index].cb_table = cb_table;
-       mutex_unlock(&rdma_nl_mutex);
+       /* Pairs with the READ_ONCE in is_nl_valid() */
+       smp_store_release(&rdma_nl_types[index].cb_table, cb_table);
 }
 EXPORT_SYMBOL(rdma_nl_register);
 
 void rdma_nl_unregister(unsigned int index)
 {
-       mutex_lock(&rdma_nl_mutex);
+       down_write(&rdma_nl_types[index].sem);
        rdma_nl_types[index].cb_table = NULL;
-       mutex_unlock(&rdma_nl_mutex);
+       up_write(&rdma_nl_types[index].sem);
 }
 EXPORT_SYMBOL(rdma_nl_unregister);
 
@@ -170,15 +156,21 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
        unsigned int index = RDMA_NL_GET_CLIENT(type);
        unsigned int op = RDMA_NL_GET_OP(type);
        const struct rdma_nl_cbs *cb_table;
+       int err = -EINVAL;
 
-       if (!is_nl_valid(skb, index, op))
+       if (!is_nl_msg_valid(index, op))
                return -EINVAL;
 
-       cb_table = rdma_nl_types[index].cb_table;
+       down_read(&rdma_nl_types[index].sem);
+       cb_table = get_cb_table(skb, index, op);
+       if (!cb_table)
+               goto done;
 
        if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) &&
-           !netlink_capable(skb, CAP_NET_ADMIN))
-               return -EPERM;
+           !netlink_capable(skb, CAP_NET_ADMIN)) {
+               err = -EPERM;
+               goto done;
+       }
 
        /*
         * LS responses overload the 0x100 (NLM_F_ROOT) flag.  Don't
@@ -186,8 +178,8 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
         */
        if (index == RDMA_NL_LS) {
                if (cb_table[op].doit)
-                       return cb_table[op].doit(skb, nlh, extack);
-               return -EINVAL;
+                       err = cb_table[op].doit(skb, nlh, extack);
+               goto done;
        }
        /* FIXME: Convert IWCM to properly handle doit callbacks */
        if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) {
@@ -195,14 +187,15 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
                        .dump = cb_table[op].dump,
                };
                if (c.dump)
-                       return netlink_dump_start(skb->sk, skb, nlh, &c);
-               return -EINVAL;
+                       err = netlink_dump_start(skb->sk, skb, nlh, &c);
+               goto done;
        }
 
        if (cb_table[op].doit)
-               return cb_table[op].doit(skb, nlh, extack);
-
-       return 0;
+               err = cb_table[op].doit(skb, nlh, extack);
+done:
+       up_read(&rdma_nl_types[index].sem);
+       return err;
 }
 
 /*
@@ -263,9 +256,7 @@ skip:
 
 static void rdma_nl_rcv(struct sk_buff *skb)
 {
-       mutex_lock(&rdma_nl_mutex);
        rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg);
-       mutex_unlock(&rdma_nl_mutex);
 }
 
 int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid)
@@ -297,6 +288,14 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(rdma_nl_multicast);
 
+void rdma_nl_init(void)
+{
+       int idx;
+
+       for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
+               init_rwsem(&rdma_nl_types[idx].sem);
+}
+
 void rdma_nl_exit(void)
 {
        int idx;
index 7a74740..c03af08 100644 (file)
@@ -778,7 +778,7 @@ static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
                container_of(res, struct rdma_counter, res);
 
        if (port && port != counter->port)
-               return 0;
+               return -EAGAIN;
 
        /* Dump it even query failed */
        rdma_counter_query_stats(counter);
@@ -1230,7 +1230,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg) {
                ret = -ENOMEM;
-               goto err;
+               goto err_get;
        }
 
        nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
@@ -1787,10 +1787,6 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
        qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
-       ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
-       if (ret)
-               goto err_unbind;
-
        if (fill_nldev_handle(msg, device) ||
            nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
            nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
@@ -1799,13 +1795,15 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
                goto err_fill;
        }
 
+       ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
+       if (ret)
+               goto err_fill;
+
        nlmsg_end(msg, nlh);
        ib_device_put(device);
        return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_fill:
-       rdma_counter_bind_qpn(device, port, qpn, cntn);
-err_unbind:
        nlmsg_free(msg);
 err:
        ib_device_put(device);
index dce0610..5337393 100644 (file)
@@ -583,8 +583,10 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
                break;
        }
 
-       /* P2PDMA contexts do not need to be unmapped */
-       if (!is_pci_p2pdma_page(sg_page(sg)))
+       if (is_pci_p2pdma_page(sg_page(sg)))
+               pci_p2pdma_unmap_sg(qp->pd->device->dma_device, sg,
+                                   sg_cnt, dir);
+       else
                ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
 }
 EXPORT_SYMBOL(rdma_rw_ctx_destroy);
index 1ab423b..6eb6d27 100644 (file)
@@ -426,7 +426,7 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
        int ret;
 
        rdma_for_each_port (dev, i) {
-               is_ib = rdma_protocol_ib(dev, i++);
+               is_ib = rdma_protocol_ib(dev, i);
                if (is_ib)
                        break;
        }
index 41f9e26..24244a2 100644 (file)
@@ -54,10 +54,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 
        for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
                page = sg_page_iter_page(&sg_iter);
-               if (umem->writable && dirty)
-                       put_user_pages_dirty_lock(&page, 1);
-               else
-                       put_user_page(page);
+               put_user_pages_dirty_lock(&page, 1, umem->writable && dirty);
        }
 
        sg_free_table(&umem->sg_head);
index f67a30f..163ff7b 100644 (file)
@@ -451,8 +451,10 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
         * that the hardware will not attempt to access the MR any more.
         */
        if (!umem_odp->is_implicit_odp) {
+               mutex_lock(&umem_odp->umem_mutex);
                ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
                                            ib_umem_end(umem_odp));
+               mutex_unlock(&umem_odp->umem_mutex);
                kvfree(umem_odp->dma_list);
                kvfree(umem_odp->page_list);
        }
@@ -719,6 +721,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
        u64 addr;
        struct ib_device *dev = umem_odp->umem.ibdev;
 
+       lockdep_assert_held(&umem_odp->umem_mutex);
+
        virt = max_t(u64, virt, ib_umem_start(umem_odp));
        bound = min_t(u64, bound, ib_umem_end(umem_odp));
        /* Note that during the run of this function, the
@@ -726,7 +730,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
         * faults from completion. We might be racing with other
         * invalidations, so we must make sure we free each page only
         * once. */
-       mutex_lock(&umem_odp->umem_mutex);
        for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
                idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
                if (umem_odp->page_list[idx]) {
@@ -757,7 +760,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
                        umem_odp->npages--;
                }
        }
-       mutex_unlock(&umem_odp->umem_mutex);
 }
 EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
 
index 1e5aeb3..63f7f7d 100644 (file)
@@ -98,7 +98,7 @@ ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata,
 
 struct ib_uverbs_device {
        atomic_t                                refcount;
-       int                                     num_comp_vectors;
+       u32                                     num_comp_vectors;
        struct completion                       comp;
        struct device                           dev;
        /* First group for device attributes, NULL terminated array */
index f974b68..35c2841 100644 (file)
@@ -662,16 +662,17 @@ static bool find_gid_index(const union ib_gid *gid,
                           void *context)
 {
        struct find_gid_index_context *ctx = context;
+       u16 vlan_id = 0xffff;
+       int ret;
 
        if (ctx->gid_type != gid_attr->gid_type)
                return false;
 
-       if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
-           (is_vlan_dev(gid_attr->ndev) &&
-            vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
+       ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
+       if (ret)
                return false;
 
-       return true;
+       return ctx->vlan_id == vlan_id;
 }
 
 static const struct ib_gid_attr *
index e87fc04..347dc24 100644 (file)
@@ -495,7 +495,6 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
 
        ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
        release_ep_resources(ep);
-       kfree_skb(skb);
        return 0;
 }
 
@@ -506,7 +505,6 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
        ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
        c4iw_put_ep(&ep->parent_ep->com);
        release_ep_resources(ep);
-       kfree_skb(skb);
        return 0;
 }
 
@@ -2424,20 +2422,6 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
        enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
 
        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
-
-       skb_get(skb);
-       rpl = cplhdr(skb);
-       if (!is_t4(adapter_type)) {
-               skb_trim(skb, roundup(sizeof(*rpl5), 16));
-               rpl5 = (void *)rpl;
-               INIT_TP_WR(rpl5, ep->hwtid);
-       } else {
-               skb_trim(skb, sizeof(*rpl));
-               INIT_TP_WR(rpl, ep->hwtid);
-       }
-       OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
-                                                   ep->hwtid));
-
        cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
                      enable_tcp_timestamps && req->tcpopt.tstamp,
                      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
@@ -2483,6 +2467,20 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
                if (tcph->ece && tcph->cwr)
                        opt2 |= CCTRL_ECN_V(1);
        }
+
+       skb_get(skb);
+       rpl = cplhdr(skb);
+       if (!is_t4(adapter_type)) {
+               skb_trim(skb, roundup(sizeof(*rpl5), 16));
+               rpl5 = (void *)rpl;
+               INIT_TP_WR(rpl5, ep->hwtid);
+       } else {
+               skb_trim(skb, sizeof(*rpl));
+               INIT_TP_WR(rpl, ep->hwtid);
+       }
+       OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+                                                   ep->hwtid));
+
        if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
                u32 isn = (prandom_u32() & ~7UL) - 1;
                opt2 |= T5_OPT_2_VALID_F;
index a8b9548..599340c 100644 (file)
@@ -242,10 +242,13 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep,
        }
 }
 
-static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd)
+static int dump_qp(unsigned long id, struct c4iw_qp *qp,
+                  struct c4iw_debugfs_data *qpd)
 {
        int space;
        int cc;
+       if (id != qp->wq.sq.qid)
+               return 0;
 
        space = qpd->bufsize - qpd->pos - 1;
        if (space == 0)
@@ -350,7 +353,7 @@ static int qp_open(struct inode *inode, struct file *file)
 
        xa_lock_irq(&qpd->devp->qps);
        xa_for_each(&qpd->devp->qps, index, qp)
-               dump_qp(qp, qpd);
+               dump_qp(index, qp, qpd);
        xa_unlock_irq(&qpd->devp->qps);
 
        qpd->buf[qpd->pos++] = 0;
index aa772ee..35c284a 100644 (file)
@@ -275,13 +275,17 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
                           struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp)
 {
        int err;
-       struct fw_ri_tpte tpt;
+       struct fw_ri_tpte *tpt;
        u32 stag_idx;
        static atomic_t key;
 
        if (c4iw_fatal_error(rdev))
                return -EIO;
 
+       tpt = kmalloc(sizeof(*tpt), GFP_KERNEL);
+       if (!tpt)
+               return -ENOMEM;
+
        stag_state = stag_state > 0;
        stag_idx = (*stag) >> 8;
 
@@ -291,6 +295,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
                        mutex_lock(&rdev->stats.lock);
                        rdev->stats.stag.fail++;
                        mutex_unlock(&rdev->stats.lock);
+                       kfree(tpt);
                        return -ENOMEM;
                }
                mutex_lock(&rdev->stats.lock);
@@ -305,28 +310,28 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
 
        /* write TPT entry */
        if (reset_tpt_entry)
-               memset(&tpt, 0, sizeof(tpt));
+               memset(tpt, 0, sizeof(*tpt));
        else {
-               tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
+               tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
                        FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) |
                        FW_RI_TPTE_STAGSTATE_V(stag_state) |
                        FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid));
-               tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
+               tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
                        (bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) |
                        FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO :
                                                      FW_RI_VA_BASED_TO))|
                        FW_RI_TPTE_PS_V(page_size));
-               tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
+               tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
                        FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3));
-               tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
-               tpt.va_hi = cpu_to_be32((u32)(to >> 32));
-               tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
-               tpt.dca_mwbcnt_pstag = cpu_to_be32(0);
-               tpt.len_hi = cpu_to_be32((u32)(len >> 32));
+               tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
+               tpt->va_hi = cpu_to_be32((u32)(to >> 32));
+               tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
+               tpt->dca_mwbcnt_pstag = cpu_to_be32(0);
+               tpt->len_hi = cpu_to_be32((u32)(len >> 32));
        }
        err = write_adapter_mem(rdev, stag_idx +
                                (rdev->lldi.vr->stag.start >> 5),
-                               sizeof(tpt), &tpt, skb, wr_waitp);
+                               sizeof(*tpt), tpt, skb, wr_waitp);
 
        if (reset_tpt_entry) {
                c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
@@ -334,6 +339,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
                rdev->stats.stag.cur -= 32;
                mutex_unlock(&rdev->stats.lock);
        }
+       kfree(tpt);
        return err;
 }
 
index eb9368b..bbcac53 100644 (file)
@@ -2737,15 +2737,11 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
        if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
                srq->flags = T4_SRQ_LIMIT_SUPPORT;
 
-       ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL);
-       if (ret)
-               goto err_free_queue;
-
        if (udata) {
                srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
                if (!srq_key_mm) {
                        ret = -ENOMEM;
-                       goto err_remove_handle;
+                       goto err_free_queue;
                }
                srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
                if (!srq_db_key_mm) {
@@ -2789,8 +2785,6 @@ err_free_srq_db_key_mm:
        kfree(srq_db_key_mm);
 err_free_srq_key_mm:
        kfree(srq_key_mm);
-err_remove_handle:
-       xa_erase_irq(&rhp->qps, srq->wq.qid);
 err_free_queue:
        free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
                       srq->wr_waitp);
@@ -2813,8 +2807,6 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
        rhp = srq->rhp;
 
        pr_debug("%s id %d\n", __func__, srq->wq.qid);
-
-       xa_erase_irq(&rhp->qps, srq->wq.qid);
        ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
                                             ibucontext);
        free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
index 2395fd4..c61b602 100644 (file)
@@ -65,6 +65,7 @@
 #define SDMA_DESCQ_CNT 2048
 #define SDMA_DESC_INTR 64
 #define INVALID_TAIL 0xffff
+#define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32))
 
 static uint sdma_descq_cnt = SDMA_DESCQ_CNT;
 module_param(sdma_descq_cnt, uint, S_IRUGO);
@@ -1296,7 +1297,7 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
        struct sdma_engine *sde;
 
        if (dd->sdma_pad_dma) {
-               dma_free_coherent(&dd->pcidev->dev, 4,
+               dma_free_coherent(&dd->pcidev->dev, SDMA_PAD,
                                  (void *)dd->sdma_pad_dma,
                                  dd->sdma_pad_phys);
                dd->sdma_pad_dma = NULL;
@@ -1491,7 +1492,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
        }
 
        /* Allocate memory for pad */
-       dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, sizeof(u32),
+       dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD,
                                              &dd->sdma_pad_phys, GFP_KERNEL);
        if (!dd->sdma_pad_dma) {
                dd_dev_err(dd, "failed to allocate SendDMA pad memory\n");
@@ -1526,8 +1527,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
        }
 
        ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
-       if (ret < 0)
+       if (ret < 0) {
+               kfree(tmp_sdma_rht);
                goto bail;
+       }
+
        dd->sdma_rht = tmp_sdma_rht;
 
        dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
index b4dcc4d..f21fca3 100644 (file)
@@ -2736,11 +2736,6 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
                                diff = cmp_psn(psn,
                                               flow->flow_state.r_next_psn);
                                if (diff > 0) {
-                                       if (!(qp->r_flags & RVT_R_RDMAR_SEQ))
-                                               restart_tid_rdma_read_req(rcd,
-                                                                         qp,
-                                                                         wqe);
-
                                        /* Drop the packet.*/
                                        goto s_unlock;
                                } else if (diff < 0) {
index b89a9b9..469acb9 100644 (file)
@@ -118,10 +118,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
 void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
                             size_t npages, bool dirty)
 {
-       if (dirty)
-               put_user_pages_dirty_lock(p, npages);
-       else
-               put_user_pages(p, npages);
+       put_user_pages_dirty_lock(p, npages, dirty);
 
        if (mm) { /* during close after signal, mm can be NULL */
                atomic64_sub(npages, &mm->pinned_vm);
index 9f53f63..089e201 100644 (file)
@@ -147,9 +147,6 @@ static int pio_wait(struct rvt_qp *qp,
 /* Length of buffer to create verbs txreq cache name */
 #define TXREQ_NAME_LEN 24
 
-/* 16B trailing buffer */
-static const u8 trail_buf[MAX_16B_PADDING];
-
 static uint wss_threshold = 80;
 module_param(wss_threshold, uint, S_IRUGO);
 MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
@@ -820,8 +817,8 @@ static int build_verbs_tx_desc(
 
        /* add icrc, lt byte, and padding to flit */
        if (extra_bytes)
-               ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq,
-                                       (void *)trail_buf, extra_bytes);
+               ret = sdma_txadd_daddr(sde->dd, &tx->txreq,
+                                      sde->dd->sdma_pad_phys, extra_bytes);
 
 bail_txadd:
        return ret;
@@ -1041,7 +1038,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
        if (cb)
                iowait_pio_inc(&priv->s_iowait);
        pbuf = sc_buffer_alloc(sc, plen, cb, qp);
-       if (unlikely(IS_ERR_OR_NULL(pbuf))) {
+       if (IS_ERR_OR_NULL(pbuf)) {
                if (cb)
                        verbs_pio_complete(qp, 0);
                if (IS_ERR(pbuf)) {
@@ -1089,7 +1086,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
                }
                /* add icrc, lt byte, and padding to flit */
                if (extra_bytes)
-                       seg_pio_copy_mid(pbuf, trail_buf, extra_bytes);
+                       seg_pio_copy_mid(pbuf, ppd->dd->sdma_pad_dma,
+                                        extra_bytes);
 
                seg_pio_copy_end(pbuf);
        }
index 7a89d66..e82567f 100644 (file)
@@ -5389,9 +5389,9 @@ static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
                return;
        }
 
-       if (eq->buf_list)
-               dma_free_coherent(hr_dev->dev, buf_chk_sz,
-                                 eq->buf_list->buf, eq->buf_list->map);
+       dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf,
+                         eq->buf_list->map);
+       kfree(eq->buf_list);
 }
 
 static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
index 8056930..cd9ee16 100644 (file)
@@ -2773,6 +2773,10 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
                return -ENOMEM;
        iwibdev = iwdev->iwibdev;
        rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
+       ret = ib_device_set_netdev(&iwibdev->ibdev, iwdev->netdev, 1);
+       if (ret)
+               goto error;
+
        ret = ib_register_device(&iwibdev->ibdev, "i40iw%d");
        if (ret)
                goto error;
index 59022b7..d609f46 100644 (file)
@@ -1298,29 +1298,6 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
        return 0;
 }
 
-static void devx_free_indirect_mkey(struct rcu_head *rcu)
-{
-       kfree(container_of(rcu, struct devx_obj, devx_mr.rcu));
-}
-
-/* This function to delete from the radix tree needs to be called before
- * destroying the underlying mkey. Otherwise a race might occur in case that
- * other thread will get the same mkey before this one will be deleted,
- * in that case it will fail via inserting to the tree its own data.
- *
- * Note:
- * An error in the destroy is not expected unless there is some other indirect
- * mkey which points to this one. In a kernel cleanup flow it will be just
- * destroyed in the iterative destruction call. In a user flow, in case
- * the application didn't close in the expected order it's its own problem,
- * the mkey won't be part of the tree, in both cases the kernel is safe.
- */
-static void devx_cleanup_mkey(struct devx_obj *obj)
-{
-       xa_erase(&obj->ib_dev->mdev->priv.mkey_table,
-                mlx5_base_mkey(obj->devx_mr.mmkey.key));
-}
-
 static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
                                      struct devx_event_subscription *sub)
 {
@@ -1362,8 +1339,16 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
        int ret;
 
        dev = mlx5_udata_to_mdev(&attrs->driver_udata);
-       if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
-               devx_cleanup_mkey(obj);
+       if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+               /*
+                * The pagefault_single_data_segment() does commands against
+                * the mmkey, we must wait for that to stop before freeing the
+                * mkey, as another allocation could get the same mkey #.
+                */
+               xa_erase(&obj->ib_dev->mdev->priv.mkey_table,
+                        mlx5_base_mkey(obj->devx_mr.mmkey.key));
+               synchronize_srcu(&dev->mr_srcu);
+       }
 
        if (obj->flags & DEVX_OBJ_FLAGS_DCT)
                ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
@@ -1382,12 +1367,6 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
                devx_cleanup_subscription(dev, sub_entry);
        mutex_unlock(&devx_event_table->event_xa_lock);
 
-       if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
-               call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu,
-                         devx_free_indirect_mkey);
-               return ret;
-       }
-
        kfree(obj);
        return ret;
 }
@@ -1491,26 +1470,21 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
                                   &obj_id);
        WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
 
-       if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
-               err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
-               if (err)
-                       goto obj_destroy;
-       }
-
        err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
        if (err)
-               goto err_copy;
+               goto obj_destroy;
 
        if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT)
                obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
-
        obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
 
+       if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+               err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
+               if (err)
+                       goto obj_destroy;
+       }
        return 0;
 
-err_copy:
-       if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
-               devx_cleanup_mkey(obj);
 obj_destroy:
        if (obj->flags & DEVX_OBJ_FLAGS_DCT)
                mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
index 2ceaef3..1a98ee2 100644 (file)
@@ -606,7 +606,7 @@ struct mlx5_ib_mr {
        struct mlx5_ib_dev     *dev;
        u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
        struct mlx5_core_sig_ctx    *sig;
-       int                     live;
+       unsigned int            live;
        void                    *descs_alloc;
        int                     access_flags; /* Needed for rereg MR */
 
@@ -639,7 +639,6 @@ struct mlx5_ib_mw {
 struct mlx5_ib_devx_mr {
        struct mlx5_core_mkey   mmkey;
        int                     ndescs;
-       struct rcu_head         rcu;
 };
 
 struct mlx5_ib_umr_context {
index 1eff031..7019c12 100644 (file)
@@ -84,32 +84,6 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
                length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
 }
 
-static void update_odp_mr(struct mlx5_ib_mr *mr)
-{
-       if (is_odp_mr(mr)) {
-               /*
-                * This barrier prevents the compiler from moving the
-                * setting of umem->odp_data->private to point to our
-                * MR, before reg_umr finished, to ensure that the MR
-                * initialization have finished before starting to
-                * handle invalidations.
-                */
-               smp_wmb();
-               to_ib_umem_odp(mr->umem)->private = mr;
-               /*
-                * Make sure we will see the new
-                * umem->odp_data->private value in the invalidation
-                * routines, before we can get page faults on the
-                * MR. Page faults can happen once we put the MR in
-                * the tree, below this line. Without the barrier,
-                * there can be a fault handling and an invalidation
-                * before umem->odp_data->private == mr is visible to
-                * the invalidation handler.
-                */
-               smp_wmb();
-       }
-}
-
 static void reg_mr_callback(int status, struct mlx5_async_work *context)
 {
        struct mlx5_ib_mr *mr =
@@ -1346,8 +1320,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        mr->umem = umem;
        set_mr_fields(dev, mr, npages, length, access_flags);
 
-       update_odp_mr(mr);
-
        if (use_umr) {
                int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
 
@@ -1363,10 +1335,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                }
        }
 
-       if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
-               mr->live = 1;
+       if (is_odp_mr(mr)) {
+               to_ib_umem_odp(mr->umem)->private = mr;
                atomic_set(&mr->num_pending_prefetch, 0);
        }
+       if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+               smp_store_release(&mr->live, 1);
 
        return &mr->ibmr;
 error:
@@ -1441,6 +1415,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
        if (!mr->umem)
                return -EINVAL;
 
+       if (is_odp_mr(mr))
+               return -EOPNOTSUPP;
+
        if (flags & IB_MR_REREG_TRANS) {
                addr = virt_addr;
                len = length;
@@ -1486,8 +1463,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
                }
 
                mr->allocated_from_cache = 0;
-               if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
-                       mr->live = 1;
        } else {
                /*
                 * Send a UMR WQE
@@ -1516,7 +1491,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 
        set_mr_fields(dev, mr, npages, len, access_flags);
 
-       update_odp_mr(mr);
        return 0;
 
 err:
@@ -1607,15 +1581,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
                /* Prevent new page faults and
                 * prefetch requests from succeeding
                 */
-               mr->live = 0;
+               WRITE_ONCE(mr->live, 0);
+
+               /* Wait for all running page-fault handlers to finish. */
+               synchronize_srcu(&dev->mr_srcu);
 
                /* dequeue pending prefetch requests for the mr */
                if (atomic_read(&mr->num_pending_prefetch))
                        flush_workqueue(system_unbound_wq);
                WARN_ON(atomic_read(&mr->num_pending_prefetch));
 
-               /* Wait for all running page-fault handlers to finish. */
-               synchronize_srcu(&dev->mr_srcu);
                /* Destroy all page mappings */
                if (!umem_odp->is_implicit_odp)
                        mlx5_ib_invalidate_range(umem_odp,
@@ -1987,14 +1962,25 @@ free:
 
 int mlx5_ib_dealloc_mw(struct ib_mw *mw)
 {
+       struct mlx5_ib_dev *dev = to_mdev(mw->device);
        struct mlx5_ib_mw *mmw = to_mmw(mw);
        int err;
 
-       err =  mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
-                                     &mmw->mmkey);
-       if (!err)
-               kfree(mmw);
-       return err;
+       if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+               xa_erase_irq(&dev->mdev->priv.mkey_table,
+                            mlx5_base_mkey(mmw->mmkey.key));
+               /*
+                * pagefault_single_data_segment() may be accessing mmw under
+                * SRCU if the user bound an ODP MR to this MW.
+                */
+               synchronize_srcu(&dev->mr_srcu);
+       }
+
+       err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
+       if (err)
+               return err;
+       kfree(mmw);
+       return 0;
 }
 
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
index 2e9b430..3f9478d 100644 (file)
@@ -178,6 +178,29 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
                return;
        }
 
+       /*
+        * The locking here is pretty subtle. Ideally the implicit children
+        * list would be protected by the umem_mutex, however that is not
+        * possible. Instead this uses a weaker update-then-lock pattern:
+        *
+        *  srcu_read_lock()
+        *    <change children list>
+        *    mutex_lock(umem_mutex)
+        *     mlx5_ib_update_xlt()
+        *    mutex_unlock(umem_mutex)
+        *    destroy lkey
+        *
+        * ie any change the children list must be followed by the locked
+        * update_xlt before destroying.
+        *
+        * The umem_mutex provides the acquire/release semantic needed to make
+        * the children list visible to a racing thread. While SRCU is not
+        * technically required, using it gives consistent use of the SRCU
+        * locking around the children list.
+        */
+       lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex);
+       lockdep_assert_held(&mr->dev->mr_srcu);
+
        odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE,
                         nentries * MLX5_IMR_MTT_SIZE, mr);
 
@@ -202,15 +225,22 @@ static void mr_leaf_free_action(struct work_struct *work)
        struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
        int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
        struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
+       struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
+       int srcu_key;
 
        mr->parent = NULL;
        synchronize_srcu(&mr->dev->mr_srcu);
 
-       ib_umem_odp_release(odp);
-       if (imr->live)
+       if (smp_load_acquire(&imr->live)) {
+               srcu_key = srcu_read_lock(&mr->dev->mr_srcu);
+               mutex_lock(&odp_imr->umem_mutex);
                mlx5_ib_update_xlt(imr, idx, 1, 0,
                                   MLX5_IB_UPD_XLT_INDIRECT |
                                   MLX5_IB_UPD_XLT_ATOMIC);
+               mutex_unlock(&odp_imr->umem_mutex);
+               srcu_read_unlock(&mr->dev->mr_srcu, srcu_key);
+       }
+       ib_umem_odp_release(odp);
        mlx5_mr_cache_free(mr->dev, mr);
 
        if (atomic_dec_and_test(&imr->num_leaf_free))
@@ -278,7 +308,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
                                   idx - blk_start_idx + 1, 0,
                                   MLX5_IB_UPD_XLT_ZAP |
                                   MLX5_IB_UPD_XLT_ATOMIC);
-       mutex_unlock(&umem_odp->umem_mutex);
        /*
         * We are now sure that the device will not access the
         * memory. We can safely unmap it, and mark it as dirty if
@@ -289,10 +318,12 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
 
        if (unlikely(!umem_odp->npages && mr->parent &&
                     !umem_odp->dying)) {
-               WRITE_ONCE(umem_odp->dying, 1);
+               WRITE_ONCE(mr->live, 0);
+               umem_odp->dying = 1;
                atomic_inc(&mr->parent->num_leaf_free);
                schedule_work(&umem_odp->work);
        }
+       mutex_unlock(&umem_odp->umem_mutex);
 }
 
 void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
@@ -429,8 +460,6 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
        mr->ibmr.lkey = mr->mmkey.key;
        mr->ibmr.rkey = mr->mmkey.key;
 
-       mr->live = 1;
-
        mlx5_ib_dbg(dev, "key %x dev %p mr %p\n",
                    mr->mmkey.key, dev->mdev, mr);
 
@@ -484,6 +513,8 @@ next_mr:
                mtt->parent = mr;
                INIT_WORK(&odp->work, mr_leaf_free_action);
 
+               smp_store_release(&mtt->live, 1);
+
                if (!nentries)
                        start_idx = addr >> MLX5_IMR_MTT_SHIFT;
                nentries++;
@@ -536,6 +567,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
        init_waitqueue_head(&imr->q_leaf_free);
        atomic_set(&imr->num_leaf_free, 0);
        atomic_set(&imr->num_pending_prefetch, 0);
+       smp_store_release(&imr->live, 1);
 
        return imr;
 }
@@ -555,15 +587,19 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
                if (mr->parent != imr)
                        continue;
 
+               mutex_lock(&umem_odp->umem_mutex);
                ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
                                            ib_umem_end(umem_odp));
 
-               if (umem_odp->dying)
+               if (umem_odp->dying) {
+                       mutex_unlock(&umem_odp->umem_mutex);
                        continue;
+               }
 
-               WRITE_ONCE(umem_odp->dying, 1);
+               umem_odp->dying = 1;
                atomic_inc(&imr->num_leaf_free);
                schedule_work(&umem_odp->work);
+               mutex_unlock(&umem_odp->umem_mutex);
        }
        up_read(&per_mm->umem_rwsem);
 
@@ -773,7 +809,7 @@ next_mr:
        switch (mmkey->type) {
        case MLX5_MKEY_MR:
                mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
-               if (!mr->live || !mr->ibmr.pd) {
+               if (!smp_load_acquire(&mr->live) || !mr->ibmr.pd) {
                        mlx5_ib_dbg(dev, "got dead MR\n");
                        ret = -EFAULT;
                        goto srcu_unlock;
@@ -1641,12 +1677,12 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd,
 
                mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
 
-               if (mr->ibmr.pd != pd) {
+               if (!smp_load_acquire(&mr->live)) {
                        ret = false;
                        break;
                }
 
-               if (!mr->live) {
+               if (mr->ibmr.pd != pd) {
                        ret = false;
                        break;
                }
index 8937d72..5fd071c 100644 (file)
@@ -3249,10 +3249,12 @@ static int modify_raw_packet_qp_sq(
        }
 
        /* Only remove the old rate after new rate was set */
-       if ((old_rl.rate &&
-            !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
-           (new_state != MLX5_SQC_STATE_RDY))
+       if ((old_rl.rate && !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
+           (new_state != MLX5_SQC_STATE_RDY)) {
                mlx5_rl_remove_rate(dev, &old_rl);
+               if (new_state != MLX5_SQC_STATE_RDY)
+                       memset(&new_rl, 0, sizeof(new_rl));
+       }
 
        ibqp->rl = new_rl;
        sq->state = new_state;
index 5136b83..dc71b6e 100644 (file)
@@ -76,7 +76,7 @@ static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str)
        struct qedr_dev *qedr = get_qedr_dev(ibdev);
        u32 fw_ver = (u32)qedr->attr.fw_ver;
 
-       snprintf(str, IB_FW_VERSION_NAME_MAX, "%d. %d. %d. %d",
+       snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d",
                 (fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF,
                 (fw_ver >> 8) & 0xFF, fw_ver & 0xFF);
 }
index bfbfbb7..6bf764e 100644 (file)
 static void __qib_release_user_pages(struct page **p, size_t num_pages,
                                     int dirty)
 {
-       if (dirty)
-               put_user_pages_dirty_lock(p, num_pages);
-       else
-               put_user_pages(p, num_pages);
+       put_user_pages_dirty_lock(p, num_pages, dirty);
 }
 
 /**
index 0b0237d..62e6ffa 100644 (file)
@@ -75,10 +75,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
                for_each_sg(chunk->page_list, sg, chunk->nents, i) {
                        page = sg_page(sg);
                        pa = sg_phys(sg);
-                       if (dirty)
-                               put_user_pages_dirty_lock(&page, 1);
-                       else
-                               put_user_page(page);
+                       put_user_pages_dirty_lock(&page, 1, dirty);
                        usnic_dbg("pa: %pa\n", &pa);
                }
                kfree(chunk);
index 6cac0c8..36cdfbd 100644 (file)
@@ -230,8 +230,6 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq)
 
        pvrdma_page_dir_cleanup(dev, &srq->pdir);
 
-       kfree(srq);
-
        atomic_dec(&dev->num_srqs);
 }
 
index 87a5603..e99983f 100644 (file)
@@ -63,15 +63,7 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
 static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
                           bool dirty)
 {
-       struct page **p = chunk->plist;
-
-       while (num_pages--) {
-               if (!PageDirty(*p) && dirty)
-                       put_user_pages_dirty_lock(p, 1);
-               else
-                       put_user_page(*p);
-               p++;
-       }
+       put_user_pages_dirty_lock(chunk->plist, num_pages, dirty);
 }
 
 void siw_umem_release(struct siw_umem *umem, bool dirty)
index 430314c..b431748 100644 (file)
@@ -182,12 +182,19 @@ void siw_qp_llp_close(struct siw_qp *qp)
  */
 void siw_qp_llp_write_space(struct sock *sk)
 {
-       struct siw_cep *cep = sk_to_cep(sk);
+       struct siw_cep *cep;
 
-       cep->sk_write_space(sk);
+       read_lock(&sk->sk_callback_lock);
+
+       cep  = sk_to_cep(sk);
+       if (cep) {
+               cep->sk_write_space(sk);
 
-       if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
-               (void)siw_sq_start(cep->qp);
+               if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+                       (void)siw_sq_start(cep->qp);
+       }
+
+       read_unlock(&sk->sk_callback_lock);
 }
 
 static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
@@ -1305,6 +1312,7 @@ int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp)
 void siw_free_qp(struct kref *ref)
 {
        struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref);
+       struct siw_base_qp *siw_base_qp = to_siw_base_qp(qp->ib_qp);
        struct siw_device *sdev = qp->sdev;
        unsigned long flags;
 
@@ -1327,4 +1335,5 @@ void siw_free_qp(struct kref *ref)
        atomic_dec(&sdev->num_qp);
        siw_dbg_qp(qp, "free QP\n");
        kfree_rcu(qp, rcu);
+       kfree(siw_base_qp);
 }
index 869e02b..b18a677 100644 (file)
@@ -604,7 +604,6 @@ out:
 int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata)
 {
        struct siw_qp *qp = to_siw_qp(base_qp);
-       struct siw_base_qp *siw_base_qp = to_siw_base_qp(base_qp);
        struct siw_ucontext *uctx =
                rdma_udata_to_drv_context(udata, struct siw_ucontext,
                                          base_ucontext);
@@ -641,7 +640,6 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata)
        qp->scq = qp->rcq = NULL;
 
        siw_qp_put(qp);
-       kfree(siw_base_qp);
 
        return 0;
 }
index dace857..7985192 100644 (file)
@@ -232,10 +232,7 @@ static int da9063_onkey_probe(struct platform_device *pdev)
        onkey->input->phys = onkey->phys;
        onkey->input->dev.parent = &pdev->dev;
 
-       if (onkey->key_power)
-               input_set_capability(onkey->input, EV_KEY, KEY_POWER);
-
-       input_set_capability(onkey->input, EV_KEY, KEY_SLEEP);
+       input_set_capability(onkey->input, EV_KEY, KEY_POWER);
 
        INIT_DELAYED_WORK(&onkey->work, da9063_poll_on);
 
index 97e3639..08520b3 100644 (file)
@@ -92,11 +92,18 @@ soc_button_device_create(struct platform_device *pdev,
                        continue;
 
                gpio = soc_button_lookup_gpio(&pdev->dev, info->acpi_index);
-               if (gpio < 0 && gpio != -ENOENT) {
-                       error = gpio;
-                       goto err_free_mem;
-               } else if (!gpio_is_valid(gpio)) {
-                       /* Skip GPIO if not present */
+               if (!gpio_is_valid(gpio)) {
+                       /*
+                        * Skip GPIO if not present. Note we deliberately
+                        * ignore -EPROBE_DEFER errors here. On some devices
+                        * Intel is using so called virtual GPIOs which are not
+                        * GPIOs at all but some way for AML code to check some
+                        * random status bits without need a custom opregion.
+                        * In some cases the resources table we parse points to
+                        * such a virtual GPIO, since these are not real GPIOs
+                        * we do not have a driver for these so they will never
+                        * show up, therefore we ignore -EPROBE_DEFER.
+                        */
                        continue;
                }
 
index 04fe434..2d8434b 100644 (file)
@@ -1827,31 +1827,6 @@ static int elantech_create_smbus(struct psmouse *psmouse,
                                  leave_breadcrumbs);
 }
 
-static bool elantech_use_host_notify(struct psmouse *psmouse,
-                                    struct elantech_device_info *info)
-{
-       if (ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version))
-               return true;
-
-       switch (info->bus) {
-       case ETP_BUS_PS2_ONLY:
-               /* expected case */
-               break;
-       case ETP_BUS_SMB_HST_NTFY_ONLY:
-       case ETP_BUS_PS2_SMB_HST_NTFY:
-               /* SMbus implementation is stable since 2018 */
-               if (dmi_get_bios_year() >= 2018)
-                       return true;
-               /* fall through */
-       default:
-               psmouse_dbg(psmouse,
-                           "Ignoring SMBus bus provider %d\n", info->bus);
-               break;
-       }
-
-       return false;
-}
-
 /**
  * elantech_setup_smbus - called once the PS/2 devices are enumerated
  * and decides to instantiate a SMBus InterTouch device.
@@ -1871,7 +1846,7 @@ static int elantech_setup_smbus(struct psmouse *psmouse,
                 * i2c_blacklist_pnp_ids.
                 * Old ICs are up to the user to decide.
                 */
-               if (!elantech_use_host_notify(psmouse, info) ||
+               if (!ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version) ||
                    psmouse_matches_pnp_id(psmouse, i2c_blacklist_pnp_ids))
                        return -ENXIO;
        }
@@ -1891,6 +1866,34 @@ static int elantech_setup_smbus(struct psmouse *psmouse,
        return 0;
 }
 
+static bool elantech_use_host_notify(struct psmouse *psmouse,
+                                    struct elantech_device_info *info)
+{
+       if (ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version))
+               return true;
+
+       switch (info->bus) {
+       case ETP_BUS_PS2_ONLY:
+               /* expected case */
+               break;
+       case ETP_BUS_SMB_ALERT_ONLY:
+               /* fall-through  */
+       case ETP_BUS_PS2_SMB_ALERT:
+               psmouse_dbg(psmouse, "Ignoring SMBus provider through alert protocol.\n");
+               break;
+       case ETP_BUS_SMB_HST_NTFY_ONLY:
+               /* fall-through  */
+       case ETP_BUS_PS2_SMB_HST_NTFY:
+               return true;
+       default:
+               psmouse_dbg(psmouse,
+                           "Ignoring SMBus bus provider %d.\n",
+                           info->bus);
+       }
+
+       return false;
+}
+
 int elantech_init_smbus(struct psmouse *psmouse)
 {
        struct elantech_device_info info;
index 772493b..190b997 100644 (file)
@@ -146,7 +146,7 @@ static int rmi_process_interrupt_requests(struct rmi_device *rmi_dev)
        }
 
        mutex_lock(&data->irq_mutex);
-       bitmap_and(data->irq_status, data->irq_status, data->current_irq_mask,
+       bitmap_and(data->irq_status, data->irq_status, data->fn_irq_bits,
               data->irq_count);
        /*
         * At this point, irq_status has all bits that are set in the
@@ -385,6 +385,8 @@ static int rmi_driver_set_irq_bits(struct rmi_device *rmi_dev,
        bitmap_copy(data->current_irq_mask, data->new_irq_mask,
                    data->num_of_irq_regs);
 
+       bitmap_or(data->fn_irq_bits, data->fn_irq_bits, mask, data->irq_count);
+
 error_unlock:
        mutex_unlock(&data->irq_mutex);
        return error;
@@ -398,6 +400,8 @@ static int rmi_driver_clear_irq_bits(struct rmi_device *rmi_dev,
        struct device *dev = &rmi_dev->dev;
 
        mutex_lock(&data->irq_mutex);
+       bitmap_andnot(data->fn_irq_bits,
+                     data->fn_irq_bits, mask, data->irq_count);
        bitmap_andnot(data->new_irq_mask,
                  data->current_irq_mask, mask, data->irq_count);
 
index 5178ea8..fb43aa7 100644 (file)
@@ -53,6 +53,7 @@ struct goodix_ts_data {
        const char *cfg_name;
        struct completion firmware_loading_complete;
        unsigned long irq_flags;
+       unsigned int contact_size;
 };
 
 #define GOODIX_GPIO_INT_NAME           "irq"
@@ -62,6 +63,7 @@ struct goodix_ts_data {
 #define GOODIX_MAX_WIDTH               4096
 #define GOODIX_INT_TRIGGER             1
 #define GOODIX_CONTACT_SIZE            8
+#define GOODIX_MAX_CONTACT_SIZE                9
 #define GOODIX_MAX_CONTACTS            10
 
 #define GOODIX_CONFIG_MAX_LENGTH       240
@@ -144,6 +146,19 @@ static const struct dmi_system_id rotated_screen[] = {
        {}
 };
 
+static const struct dmi_system_id nine_bytes_report[] = {
+#if defined(CONFIG_DMI) && defined(CONFIG_X86)
+       {
+               .ident = "Lenovo YogaBook",
+               /* YB1-X91L/F and YB1-X90L/F */
+               .matches = {
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9")
+               }
+       },
+#endif
+       {}
+};
+
 /**
  * goodix_i2c_read - read data from a register of the i2c slave device.
  *
@@ -249,7 +264,7 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data)
        max_timeout = jiffies + msecs_to_jiffies(GOODIX_BUFFER_STATUS_TIMEOUT);
        do {
                error = goodix_i2c_read(ts->client, GOODIX_READ_COOR_ADDR,
-                                       data, GOODIX_CONTACT_SIZE + 1);
+                                       data, ts->contact_size + 1);
                if (error) {
                        dev_err(&ts->client->dev, "I2C transfer error: %d\n",
                                        error);
@@ -262,12 +277,12 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data)
                                return -EPROTO;
 
                        if (touch_num > 1) {
-                               data += 1 + GOODIX_CONTACT_SIZE;
+                               data += 1 + ts->contact_size;
                                error = goodix_i2c_read(ts->client,
                                                GOODIX_READ_COOR_ADDR +
-                                                       1 + GOODIX_CONTACT_SIZE,
+                                                       1 + ts->contact_size,
                                                data,
-                                               GOODIX_CONTACT_SIZE *
+                                               ts->contact_size *
                                                        (touch_num - 1));
                                if (error)
                                        return error;
@@ -286,7 +301,7 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data)
        return 0;
 }
 
-static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data)
+static void goodix_ts_report_touch_8b(struct goodix_ts_data *ts, u8 *coor_data)
 {
        int id = coor_data[0] & 0x0F;
        int input_x = get_unaligned_le16(&coor_data[1]);
@@ -301,6 +316,21 @@ static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data)
        input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w);
 }
 
+static void goodix_ts_report_touch_9b(struct goodix_ts_data *ts, u8 *coor_data)
+{
+       int id = coor_data[1] & 0x0F;
+       int input_x = get_unaligned_le16(&coor_data[3]);
+       int input_y = get_unaligned_le16(&coor_data[5]);
+       int input_w = get_unaligned_le16(&coor_data[7]);
+
+       input_mt_slot(ts->input_dev, id);
+       input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, true);
+       touchscreen_report_pos(ts->input_dev, &ts->prop,
+                              input_x, input_y, true);
+       input_report_abs(ts->input_dev, ABS_MT_TOUCH_MAJOR, input_w);
+       input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w);
+}
+
 /**
  * goodix_process_events - Process incoming events
  *
@@ -311,7 +341,7 @@ static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data)
  */
 static void goodix_process_events(struct goodix_ts_data *ts)
 {
-       u8  point_data[1 + GOODIX_CONTACT_SIZE * GOODIX_MAX_CONTACTS];
+       u8  point_data[1 + GOODIX_MAX_CONTACT_SIZE * GOODIX_MAX_CONTACTS];
        int touch_num;
        int i;
 
@@ -326,8 +356,12 @@ static void goodix_process_events(struct goodix_ts_data *ts)
        input_report_key(ts->input_dev, KEY_LEFTMETA, point_data[0] & BIT(4));
 
        for (i = 0; i < touch_num; i++)
-               goodix_ts_report_touch(ts,
-                               &point_data[1 + GOODIX_CONTACT_SIZE * i]);
+               if (ts->contact_size == 9)
+                       goodix_ts_report_touch_9b(ts,
+                               &point_data[1 + ts->contact_size * i]);
+               else
+                       goodix_ts_report_touch_8b(ts,
+                               &point_data[1 + ts->contact_size * i]);
 
        input_mt_sync_frame(ts->input_dev);
        input_sync(ts->input_dev);
@@ -730,6 +764,13 @@ static int goodix_configure_dev(struct goodix_ts_data *ts)
                        "Applying '180 degrees rotated screen' quirk\n");
        }
 
+       if (dmi_check_system(nine_bytes_report)) {
+               ts->contact_size = 9;
+
+               dev_dbg(&ts->client->dev,
+                       "Non-standard 9-bytes report format quirk\n");
+       }
+
        error = input_mt_init_slots(ts->input_dev, ts->max_touch_num,
                                    INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED);
        if (error) {
@@ -810,6 +851,7 @@ static int goodix_ts_probe(struct i2c_client *client,
        ts->client = client;
        i2c_set_clientdata(client, ts);
        init_completion(&ts->firmware_loading_complete);
+       ts->contact_size = GOODIX_CONTACT_SIZE;
 
        error = goodix_get_gpio_config(ts);
        if (error)
index 3492339..1139714 100644 (file)
@@ -81,8 +81,10 @@ static int st1232_ts_read_data(struct st1232_ts_data *ts)
        for (i = 0, y = 0; i < ts->chip_info->max_fingers; i++, y += 3) {
                finger[i].is_valid = buf[i + y] >> 7;
                if (finger[i].is_valid) {
-                       finger[i].x = ((buf[i + y] & 0x0070) << 4) | buf[i + 1];
-                       finger[i].y = ((buf[i + y] & 0x0007) << 8) | buf[i + 2];
+                       finger[i].x = ((buf[i + y] & 0x0070) << 4) |
+                                       buf[i + y + 1];
+                       finger[i].y = ((buf[i + y] & 0x0007) << 8) |
+                                       buf[i + y + 2];
 
                        /* st1232 includes a z-axis / touch strength */
                        if (ts->chip_info->have_z)
index 97975bb..dd55507 100644 (file)
@@ -70,7 +70,6 @@
  */
 #define AMD_IOMMU_PGSIZES      ((~0xFFFUL) & ~(2ULL << 38))
 
-static DEFINE_SPINLOCK(amd_iommu_devtable_lock);
 static DEFINE_SPINLOCK(pd_bitmap_lock);
 
 /* List of all available dev_data structures */
@@ -202,6 +201,7 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
        if (!dev_data)
                return NULL;
 
+       spin_lock_init(&dev_data->lock);
        dev_data->devid = devid;
        ratelimit_default_init(&dev_data->rs);
 
@@ -501,6 +501,29 @@ static void iommu_uninit_device(struct device *dev)
         */
 }
 
+/*
+ * Helper function to get the first pte of a large mapping
+ */
+static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
+                        unsigned long *count)
+{
+       unsigned long pte_mask, pg_size, cnt;
+       u64 *fpte;
+
+       pg_size  = PTE_PAGE_SIZE(*pte);
+       cnt      = PAGE_SIZE_PTE_COUNT(pg_size);
+       pte_mask = ~((cnt << 3) - 1);
+       fpte     = (u64 *)(((unsigned long)pte) & pte_mask);
+
+       if (page_size)
+               *page_size = pg_size;
+
+       if (count)
+               *count = cnt;
+
+       return fpte;
+}
+
 /****************************************************************************
  *
  * Interrupt handling functions
@@ -560,7 +583,8 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 retry:
        type    = (event[1] >> EVENT_TYPE_SHIFT)  & EVENT_TYPE_MASK;
        devid   = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
-       pasid   = PPR_PASID(*(u64 *)&event[0]);
+       pasid   = (event[0] & EVENT_DOMID_MASK_HI) |
+                 (event[1] & EVENT_DOMID_MASK_LO);
        flags   = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
        address = (u64)(((u64)event[3]) << 32) | event[2];
 
@@ -593,7 +617,7 @@ retry:
                        address, flags);
                break;
        case EVENT_TYPE_PAGE_TAB_ERR:
-               dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
+               dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n",
                        PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
                        pasid, address, flags);
                break;
@@ -1311,8 +1335,12 @@ static void domain_flush_np_cache(struct protection_domain *domain,
                dma_addr_t iova, size_t size)
 {
        if (unlikely(amd_iommu_np_cache)) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&domain->lock, flags);
                domain_flush_pages(domain, iova, size);
                domain_flush_complete(domain);
+               spin_unlock_irqrestore(&domain->lock, flags);
        }
 }
 
@@ -1425,7 +1453,7 @@ static void free_pagetable(struct protection_domain *domain)
        BUG_ON(domain->mode < PAGE_MODE_NONE ||
               domain->mode > PAGE_MODE_6_LEVEL);
 
-       free_sub_pt(root, domain->mode, freelist);
+       freelist = free_sub_pt(root, domain->mode, freelist);
 
        free_page_list(freelist);
 }
@@ -1435,16 +1463,18 @@ static void free_pagetable(struct protection_domain *domain)
  * another level increases the size of the address space by 9 bits to a size up
  * to 64 bits.
  */
-static void increase_address_space(struct protection_domain *domain,
+static bool increase_address_space(struct protection_domain *domain,
+                                  unsigned long address,
                                   gfp_t gfp)
 {
        unsigned long flags;
+       bool ret = false;
        u64 *pte;
 
        spin_lock_irqsave(&domain->lock, flags);
 
-       if (WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL))
-               /* address space already 64 bit large */
+       if (address <= PM_LEVEL_SIZE(domain->mode) ||
+           WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL))
                goto out;
 
        pte = (void *)get_zeroed_page(gfp);
@@ -1455,19 +1485,21 @@ static void increase_address_space(struct protection_domain *domain,
                                        iommu_virt_to_phys(domain->pt_root));
        domain->pt_root  = pte;
        domain->mode    += 1;
-       domain->updated  = true;
+
+       ret = true;
 
 out:
        spin_unlock_irqrestore(&domain->lock, flags);
 
-       return;
+       return ret;
 }
 
 static u64 *alloc_pte(struct protection_domain *domain,
                      unsigned long address,
                      unsigned long page_size,
                      u64 **pte_page,
-                     gfp_t gfp)
+                     gfp_t gfp,
+                     bool *updated)
 {
        int level, end_lvl;
        u64 *pte, *page;
@@ -1475,7 +1507,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
        BUG_ON(!is_power_of_2(page_size));
 
        while (address > PM_LEVEL_SIZE(domain->mode))
-               increase_address_space(domain, gfp);
+               *updated = increase_address_space(domain, address, gfp) || *updated;
 
        level   = domain->mode - 1;
        pte     = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
@@ -1489,9 +1521,32 @@ static u64 *alloc_pte(struct protection_domain *domain,
                __pte     = *pte;
                pte_level = PM_PTE_LEVEL(__pte);
 
-               if (!IOMMU_PTE_PRESENT(__pte) ||
+               /*
+                * If we replace a series of large PTEs, we need
+                * to tear down all of them.
+                */
+               if (IOMMU_PTE_PRESENT(__pte) &&
                    pte_level == PAGE_MODE_7_LEVEL) {
+                       unsigned long count, i;
+                       u64 *lpte;
+
+                       lpte = first_pte_l7(pte, NULL, &count);
+
+                       /*
+                        * Unmap the replicated PTEs that still match the
+                        * original large mapping
+                        */
+                       for (i = 0; i < count; ++i)
+                               cmpxchg64(&lpte[i], __pte, 0ULL);
+
+                       *updated = true;
+                       continue;
+               }
+
+               if (!IOMMU_PTE_PRESENT(__pte) ||
+                   pte_level == PAGE_MODE_NONE) {
                        page = (u64 *)get_zeroed_page(gfp);
+
                        if (!page)
                                return NULL;
 
@@ -1500,8 +1555,8 @@ static u64 *alloc_pte(struct protection_domain *domain,
                        /* pte could have been changed somewhere. */
                        if (cmpxchg64(pte, __pte, __npte) != __pte)
                                free_page((unsigned long)page);
-                       else if (pte_level == PAGE_MODE_7_LEVEL)
-                               domain->updated = true;
+                       else if (IOMMU_PTE_PRESENT(__pte))
+                               *updated = true;
 
                        continue;
                }
@@ -1566,17 +1621,12 @@ static u64 *fetch_pte(struct protection_domain *domain,
                *page_size = PTE_LEVEL_PAGE_SIZE(level);
        }
 
-       if (PM_PTE_LEVEL(*pte) == 0x07) {
-               unsigned long pte_mask;
-
-               /*
-                * If we have a series of large PTEs, make
-                * sure to return a pointer to the first one.
-                */
-               *page_size = pte_mask = PTE_PAGE_SIZE(*pte);
-               pte_mask   = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1);
-               pte        = (u64 *)(((unsigned long)pte) & pte_mask);
-       }
+       /*
+        * If we have a series of large PTEs, make
+        * sure to return a pointer to the first one.
+        */
+       if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
+               pte = first_pte_l7(pte, page_size, NULL);
 
        return pte;
 }
@@ -1615,26 +1665,29 @@ static int iommu_map_page(struct protection_domain *dom,
                          gfp_t gfp)
 {
        struct page *freelist = NULL;
+       bool updated = false;
        u64 __pte, *pte;
-       int i, count;
+       int ret, i, count;
 
        BUG_ON(!IS_ALIGNED(bus_addr, page_size));
        BUG_ON(!IS_ALIGNED(phys_addr, page_size));
 
+       ret = -EINVAL;
        if (!(prot & IOMMU_PROT_MASK))
-               return -EINVAL;
+               goto out;
 
        count = PAGE_SIZE_PTE_COUNT(page_size);
-       pte   = alloc_pte(dom, bus_addr, page_size, NULL, gfp);
+       pte   = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated);
 
+       ret = -ENOMEM;
        if (!pte)
-               return -ENOMEM;
+               goto out;
 
        for (i = 0; i < count; ++i)
                freelist = free_clear_pte(&pte[i], pte[i], freelist);
 
        if (freelist != NULL)
-               dom->updated = true;
+               updated = true;
 
        if (count > 1) {
                __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
@@ -1650,12 +1703,21 @@ static int iommu_map_page(struct protection_domain *dom,
        for (i = 0; i < count; ++i)
                pte[i] = __pte;
 
-       update_domain(dom);
+       ret = 0;
+
+out:
+       if (updated) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&dom->lock, flags);
+               update_domain(dom);
+               spin_unlock_irqrestore(&dom->lock, flags);
+       }
 
        /* Everything flushed out, free pages now */
        free_page_list(freelist);
 
-       return 0;
+       return ret;
 }
 
 static unsigned long iommu_unmap_page(struct protection_domain *dom,
@@ -1806,8 +1868,12 @@ static void free_gcr3_table(struct protection_domain *domain)
 
 static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&dom->domain.lock, flags);
        domain_flush_tlb(&dom->domain);
        domain_flush_complete(&dom->domain);
+       spin_unlock_irqrestore(&dom->domain.lock, flags);
 }
 
 static void iova_domain_flush_tlb(struct iova_domain *iovad)
@@ -2022,36 +2088,6 @@ static void do_detach(struct iommu_dev_data *dev_data)
        domain->dev_cnt                 -= 1;
 }
 
-/*
- * If a device is not yet associated with a domain, this function makes the
- * device visible in the domain
- */
-static int __attach_device(struct iommu_dev_data *dev_data,
-                          struct protection_domain *domain)
-{
-       int ret;
-
-       /* lock domain */
-       spin_lock(&domain->lock);
-
-       ret = -EBUSY;
-       if (dev_data->domain != NULL)
-               goto out_unlock;
-
-       /* Attach alias group root */
-       do_attach(dev_data, domain);
-
-       ret = 0;
-
-out_unlock:
-
-       /* ready */
-       spin_unlock(&domain->lock);
-
-       return ret;
-}
-
-
 static void pdev_iommuv2_disable(struct pci_dev *pdev)
 {
        pci_disable_ats(pdev);
@@ -2133,19 +2169,28 @@ static int attach_device(struct device *dev,
        unsigned long flags;
        int ret;
 
+       spin_lock_irqsave(&domain->lock, flags);
+
        dev_data = get_dev_data(dev);
 
+       spin_lock(&dev_data->lock);
+
+       ret = -EBUSY;
+       if (dev_data->domain != NULL)
+               goto out;
+
        if (!dev_is_pci(dev))
                goto skip_ats_check;
 
        pdev = to_pci_dev(dev);
        if (domain->flags & PD_IOMMUV2_MASK) {
+               ret = -EINVAL;
                if (!dev_data->passthrough)
-                       return -EINVAL;
+                       goto out;
 
                if (dev_data->iommu_v2) {
                        if (pdev_iommuv2_enable(pdev) != 0)
-                               return -EINVAL;
+                               goto out;
 
                        dev_data->ats.enabled = true;
                        dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
@@ -2158,9 +2203,9 @@ static int attach_device(struct device *dev,
        }
 
 skip_ats_check:
-       spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
-       ret = __attach_device(dev_data, domain);
-       spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+       ret = 0;
+
+       do_attach(dev_data, domain);
 
        /*
         * We might boot into a crash-kernel here. The crashed kernel
@@ -2169,23 +2214,14 @@ skip_ats_check:
         */
        domain_flush_tlb_pde(domain);
 
-       return ret;
-}
-
-/*
- * Removes a device from a protection domain (unlocked)
- */
-static void __detach_device(struct iommu_dev_data *dev_data)
-{
-       struct protection_domain *domain;
-
-       domain = dev_data->domain;
+       domain_flush_complete(domain);
 
-       spin_lock(&domain->lock);
+out:
+       spin_unlock(&dev_data->lock);
 
-       do_detach(dev_data);
+       spin_unlock_irqrestore(&domain->lock, flags);
 
-       spin_unlock(&domain->lock);
+       return ret;
 }
 
 /*
@@ -2200,6 +2236,10 @@ static void detach_device(struct device *dev)
        dev_data = get_dev_data(dev);
        domain   = dev_data->domain;
 
+       spin_lock_irqsave(&domain->lock, flags);
+
+       spin_lock(&dev_data->lock);
+
        /*
         * First check if the device is still attached. It might already
         * be detached from its domain because the generic
@@ -2207,15 +2247,12 @@ static void detach_device(struct device *dev)
         * our alias handling.
         */
        if (WARN_ON(!dev_data->domain))
-               return;
+               goto out;
 
-       /* lock device table */
-       spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
-       __detach_device(dev_data);
-       spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+       do_detach(dev_data);
 
        if (!dev_is_pci(dev))
-               return;
+               goto out;
 
        if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2)
                pdev_iommuv2_disable(to_pci_dev(dev));
@@ -2223,6 +2260,11 @@ static void detach_device(struct device *dev)
                pci_disable_ats(to_pci_dev(dev));
 
        dev_data->ats.enabled = false;
+
+out:
+       spin_unlock(&dev_data->lock);
+
+       spin_unlock_irqrestore(&domain->lock, flags);
 }
 
 static int amd_iommu_add_device(struct device *dev)
@@ -2354,15 +2396,10 @@ static void update_device_table(struct protection_domain *domain)
 
 static void update_domain(struct protection_domain *domain)
 {
-       if (!domain->updated)
-               return;
-
        update_device_table(domain);
 
        domain_flush_devices(domain);
        domain_flush_tlb_pde(domain);
-
-       domain->updated = false;
 }
 
 static int dir2prot(enum dma_data_direction direction)
@@ -2392,6 +2429,7 @@ static dma_addr_t __map_single(struct device *dev,
 {
        dma_addr_t offset = paddr & ~PAGE_MASK;
        dma_addr_t address, start, ret;
+       unsigned long flags;
        unsigned int pages;
        int prot = 0;
        int i;
@@ -2429,8 +2467,10 @@ out_unmap:
                iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE);
        }
 
+       spin_lock_irqsave(&dma_dom->domain.lock, flags);
        domain_flush_tlb(&dma_dom->domain);
        domain_flush_complete(&dma_dom->domain);
+       spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
 
        dma_ops_free_iova(dma_dom, address, pages);
 
@@ -2459,8 +2499,12 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
        }
 
        if (amd_iommu_unmap_flush) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&dma_dom->domain.lock, flags);
                domain_flush_tlb(&dma_dom->domain);
                domain_flush_complete(&dma_dom->domain);
+               spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
                dma_ops_free_iova(dma_dom, dma_addr, pages);
        } else {
                pages = __roundup_pow_of_two(pages);
@@ -2866,16 +2910,16 @@ static void cleanup_domain(struct protection_domain *domain)
        struct iommu_dev_data *entry;
        unsigned long flags;
 
-       spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
+       spin_lock_irqsave(&domain->lock, flags);
 
        while (!list_empty(&domain->dev_list)) {
                entry = list_first_entry(&domain->dev_list,
                                         struct iommu_dev_data, list);
                BUG_ON(!entry->domain);
-               __detach_device(entry);
+               do_detach(entry);
        }
 
-       spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+       spin_unlock_irqrestore(&domain->lock, flags);
 }
 
 static void protection_domain_free(struct protection_domain *domain)
@@ -3226,9 +3270,12 @@ static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
 static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
 {
        struct protection_domain *dom = to_pdomain(domain);
+       unsigned long flags;
 
+       spin_lock_irqsave(&dom->lock, flags);
        domain_flush_tlb_pde(dom);
        domain_flush_complete(dom);
+       spin_unlock_irqrestore(&dom->lock, flags);
 }
 
 static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
@@ -3290,7 +3337,6 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom)
 
        /* Update data structure */
        domain->mode    = PAGE_MODE_NONE;
-       domain->updated = true;
 
        /* Make changes visible to IOMMUs */
        update_domain(domain);
@@ -3336,7 +3382,6 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
 
        domain->glx      = levels;
        domain->flags   |= PD_IOMMUV2_MASK;
-       domain->updated  = true;
 
        update_domain(domain);
 
index c235f79..5120ce4 100644 (file)
@@ -73,6 +73,19 @@ static const struct dmi_system_id ivrs_quirks[] __initconst = {
                },
                .driver_data = (void *)&ivrs_ioapic_quirks[DELL_LATITUDE_5495],
        },
+       {
+               /*
+                * Acer Aspire A315-41 requires the very same workaround as
+                * Dell Latitude 5495
+                */
+               .callback = ivrs_ioapic_quirk_cb,
+               .ident = "Acer Aspire A315-41",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Aspire A315-41"),
+               },
+               .driver_data = (void *)&ivrs_ioapic_quirks[DELL_LATITUDE_5495],
+       },
        {
                .callback = ivrs_ioapic_quirk_cb,
                .ident = "Lenovo ideapad 330S-15ARR",
index 9ac229e..17bd5a3 100644 (file)
 #define EVENT_TYPE_INV_PPR_REQ 0x9
 #define EVENT_DEVID_MASK       0xffff
 #define EVENT_DEVID_SHIFT      0
-#define EVENT_DOMID_MASK       0xffff
-#define EVENT_DOMID_SHIFT      0
+#define EVENT_DOMID_MASK_LO    0xffff
+#define EVENT_DOMID_MASK_HI    0xf0000
 #define EVENT_FLAGS_MASK       0xfff
 #define EVENT_FLAGS_SHIFT      0x10
 
@@ -475,7 +475,6 @@ struct protection_domain {
        int glx;                /* Number of levels for GCR3 table */
        u64 *gcr3_tbl;          /* Guest CR3 table */
        unsigned long flags;    /* flags to find out type of domain */
-       bool updated;           /* complete domain flush required */
        unsigned dev_cnt;       /* devices assigned to this domain */
        unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
 };
@@ -634,6 +633,9 @@ struct devid_map {
  * This struct contains device specific data for the IOMMU
  */
 struct iommu_dev_data {
+       /*Protect against attach/detach races */
+       spinlock_t lock;
+
        struct list_head list;            /* For domain->dev_list */
        struct llist_node dev_data_list;  /* For global dev_data_list */
        struct protection_domain *domain; /* Domain the device is bound to */
index b18aac4..7c503a6 100644 (file)
@@ -812,6 +812,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
        return 0;
 
 out_clear_smmu:
+       __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
        smmu_domain->smmu = NULL;
 out_unlock:
        mutex_unlock(&smmu_domain->init_mutex);
index 3f97491..6db6d96 100644 (file)
@@ -2794,7 +2794,7 @@ static int identity_mapping(struct device *dev)
        struct device_domain_info *info;
 
        info = dev->archdata.iommu;
-       if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
+       if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO)
                return (info->domain == si_domain);
 
        return 0;
@@ -3471,7 +3471,7 @@ static bool iommu_need_mapping(struct device *dev)
                if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
                        dma_mask = dev->coherent_dma_mask;
 
-               if (dma_mask >= dma_get_required_mask(dev))
+               if (dma_mask >= dma_direct_get_required_mask(dev))
                        return false;
 
                /*
@@ -3775,6 +3775,13 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
        return nelems;
 }
 
+static u64 intel_get_required_mask(struct device *dev)
+{
+       if (!iommu_need_mapping(dev))
+               return dma_direct_get_required_mask(dev);
+       return DMA_BIT_MASK(32);
+}
+
 static const struct dma_map_ops intel_dma_ops = {
        .alloc = intel_alloc_coherent,
        .free = intel_free_coherent,
@@ -3787,6 +3794,7 @@ static const struct dma_map_ops intel_dma_ops = {
        .dma_supported = dma_direct_supported,
        .mmap = dma_common_mmap,
        .get_sgtable = dma_common_get_sgtable,
+       .get_required_mask = intel_get_required_mask,
 };
 
 static void
index 4c91359..ca51036 100644 (file)
 #define ARM_MALI_LPAE_TTBR_READ_INNER  BIT(2)
 #define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4)
 
+#define ARM_MALI_LPAE_MEMATTR_IMP_DEF  0x88ULL
+#define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
+
 /* IOPTE accessors */
 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
 
@@ -1015,27 +1018,56 @@ arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 static struct io_pgtable *
 arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
 {
-       struct io_pgtable *iop;
+       struct arm_lpae_io_pgtable *data;
 
-       if (cfg->ias != 48 || cfg->oas > 40)
+       /* No quirks for Mali (hopefully) */
+       if (cfg->quirks)
+               return NULL;
+
+       if (cfg->ias > 48 || cfg->oas > 40)
                return NULL;
 
        cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
-       iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie);
-       if (iop) {
-               u64 mair, ttbr;
 
-               /* Copy values as union fields overlap */
-               mair = cfg->arm_lpae_s1_cfg.mair[0];
-               ttbr = cfg->arm_lpae_s1_cfg.ttbr[0];
+       data = arm_lpae_alloc_pgtable(cfg);
+       if (!data)
+               return NULL;
 
-               cfg->arm_mali_lpae_cfg.memattr = mair;
-               cfg->arm_mali_lpae_cfg.transtab = ttbr |
-                       ARM_MALI_LPAE_TTBR_READ_INNER |
-                       ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
+       /* Mali seems to need a full 4-level table regardless of IAS */
+       if (data->levels < ARM_LPAE_MAX_LEVELS) {
+               data->levels = ARM_LPAE_MAX_LEVELS;
+               data->pgd_size = sizeof(arm_lpae_iopte);
        }
+       /*
+        * MEMATTR: Mali has no actual notion of a non-cacheable type, so the
+        * best we can do is mimic the out-of-tree driver and hope that the
+        * "implementation-defined caching policy" is good enough. Similarly,
+        * we'll use it for the sake of a valid attribute for our 'device'
+        * index, although callers should never request that in practice.
+        */
+       cfg->arm_mali_lpae_cfg.memattr =
+               (ARM_MALI_LPAE_MEMATTR_IMP_DEF
+                << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
+               (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC
+                << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
+               (ARM_MALI_LPAE_MEMATTR_IMP_DEF
+                << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
 
-       return iop;
+       data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
+       if (!data->pgd)
+               goto out_free_data;
+
+       /* Ensure the empty pgd is visible before TRANSTAB can be written */
+       wmb();
+
+       cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) |
+                                         ARM_MALI_LPAE_TTBR_READ_INNER |
+                                         ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
+       return &data->iop;
+
+out_free_data:
+       kfree(data);
+       return NULL;
 }
 
 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
index 9da8309..2639fc7 100644 (file)
@@ -1086,8 +1086,6 @@ static int ipmmu_probe(struct platform_device *pdev)
 
        mmu->num_ctx = min(IPMMU_CTX_MAX, mmu->features->number_of_contexts);
 
-       irq = platform_get_irq(pdev, 0);
-
        /*
         * Determine if this IPMMU instance is a root device by checking for
         * the lack of has_cache_leaf_nodes flag or renesas,ipmmu-main property.
@@ -1106,10 +1104,9 @@ static int ipmmu_probe(struct platform_device *pdev)
 
        /* Root devices have mandatory IRQs */
        if (ipmmu_is_root(mmu)) {
-               if (irq < 0) {
-                       dev_err(&pdev->dev, "no IRQ found\n");
+               irq = platform_get_irq(pdev, 0);
+               if (irq < 0)
                        return irq;
-               }
 
                ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0,
                                       dev_name(&pdev->dev), mmu);
index 26290f3..4dcbf68 100644 (file)
@@ -100,6 +100,7 @@ struct rk_iommu {
        struct device *dev;
        void __iomem **bases;
        int num_mmu;
+       int num_irq;
        struct clk_bulk_data *clocks;
        int num_clocks;
        bool reset_disabled;
@@ -1136,7 +1137,7 @@ static int rk_iommu_probe(struct platform_device *pdev)
        struct rk_iommu *iommu;
        struct resource *res;
        int num_res = pdev->num_resources;
-       int err, i, irq;
+       int err, i;
 
        iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
        if (!iommu)
@@ -1163,6 +1164,10 @@ static int rk_iommu_probe(struct platform_device *pdev)
        if (iommu->num_mmu == 0)
                return PTR_ERR(iommu->bases[0]);
 
+       iommu->num_irq = platform_irq_count(pdev);
+       if (iommu->num_irq < 0)
+               return iommu->num_irq;
+
        iommu->reset_disabled = device_property_read_bool(dev,
                                        "rockchip,disable-mmu-reset");
 
@@ -1219,8 +1224,9 @@ static int rk_iommu_probe(struct platform_device *pdev)
 
        pm_runtime_enable(dev);
 
-       i = 0;
-       while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) {
+       for (i = 0; i < iommu->num_irq; i++) {
+               int irq = platform_get_irq(pdev, i);
+
                if (irq < 0)
                        return irq;
 
@@ -1245,10 +1251,13 @@ err_unprepare_clocks:
 static void rk_iommu_shutdown(struct platform_device *pdev)
 {
        struct rk_iommu *iommu = platform_get_drvdata(pdev);
-       int i = 0, irq;
+       int i;
+
+       for (i = 0; i < iommu->num_irq; i++) {
+               int irq = platform_get_irq(pdev, i);
 
-       while ((irq = platform_get_irq(pdev, i++)) != -ENXIO)
                devm_free_irq(iommu->dev, irq, iommu);
+       }
 
        pm_runtime_force_suspend(&pdev->dev);
 }
index 1a57cee..0b0a737 100644 (file)
@@ -15,6 +15,7 @@
 
 /* FIC Registers */
 #define AL_FIC_CAUSE           0x00
+#define AL_FIC_SET_CAUSE       0x08
 #define AL_FIC_MASK            0x10
 #define AL_FIC_CONTROL         0x28
 
@@ -126,6 +127,16 @@ static void al_fic_irq_handler(struct irq_desc *desc)
        chained_irq_exit(irqchip, desc);
 }
 
+static int al_fic_irq_retrigger(struct irq_data *data)
+{
+       struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
+       struct al_fic *fic = gc->private;
+
+       writel_relaxed(BIT(data->hwirq), fic->base + AL_FIC_SET_CAUSE);
+
+       return 1;
+}
+
 static int al_fic_register(struct device_node *node,
                           struct al_fic *fic)
 {
@@ -159,6 +170,7 @@ static int al_fic_register(struct device_node *node,
        gc->chip_types->chip.irq_unmask = irq_gc_mask_clr_bit;
        gc->chip_types->chip.irq_ack = irq_gc_ack_clr_bit;
        gc->chip_types->chip.irq_set_type = al_fic_irq_set_type;
+       gc->chip_types->chip.irq_retrigger = al_fic_irq_retrigger;
        gc->chip_types->chip.flags = IRQCHIP_SKIP_SET_WAKE;
        gc->private = fic;
 
index 6acad2e..2933349 100644 (file)
@@ -313,6 +313,7 @@ static void __init sama5d3_aic_irq_fixup(void)
 static const struct of_device_id aic5_irq_fixups[] __initconst = {
        { .compatible = "atmel,sama5d3", .data = sama5d3_aic_irq_fixup },
        { .compatible = "atmel,sama5d4", .data = sama5d3_aic_irq_fixup },
+       { .compatible = "microchip,sam9x60", .data = sama5d3_aic_irq_fixup },
        { /* sentinel */ },
 };
 
@@ -390,3 +391,12 @@ static int __init sama5d4_aic5_of_init(struct device_node *node,
        return aic5_of_init(node, parent, NR_SAMA5D4_IRQS);
 }
 IRQCHIP_DECLARE(sama5d4_aic5, "atmel,sama5d4-aic", sama5d4_aic5_of_init);
+
+#define NR_SAM9X60_IRQS                50
+
+static int __init sam9x60_aic5_of_init(struct device_node *node,
+                                      struct device_node *parent)
+{
+       return aic5_of_init(node, parent, NR_SAM9X60_IRQS);
+}
+IRQCHIP_DECLARE(sam9x60_aic5, "microchip,sam9x60-aic", sam9x60_aic5_of_init);
index 62e54f1..787e8ee 100644 (file)
@@ -175,6 +175,22 @@ static DEFINE_IDA(its_vpeid_ida);
 #define gic_data_rdist_rd_base()       (gic_data_rdist()->rd_base)
 #define gic_data_rdist_vlpi_base()     (gic_data_rdist_rd_base() + SZ_128K)
 
+static u16 get_its_list(struct its_vm *vm)
+{
+       struct its_node *its;
+       unsigned long its_list = 0;
+
+       list_for_each_entry(its, &its_nodes, entry) {
+               if (!its->is_v4)
+                       continue;
+
+               if (vm->vlpi_count[its->list_nr])
+                       __set_bit(its->list_nr, &its_list);
+       }
+
+       return (u16)its_list;
+}
+
 static struct its_collection *dev_event_to_col(struct its_device *its_dev,
                                               u32 event)
 {
@@ -976,17 +992,15 @@ static void its_send_vmapp(struct its_node *its,
 
 static void its_send_vmovp(struct its_vpe *vpe)
 {
-       struct its_cmd_desc desc;
+       struct its_cmd_desc desc = {};
        struct its_node *its;
        unsigned long flags;
        int col_id = vpe->col_idx;
 
        desc.its_vmovp_cmd.vpe = vpe;
-       desc.its_vmovp_cmd.its_list = (u16)its_list_map;
 
        if (!its_list_map) {
                its = list_first_entry(&its_nodes, struct its_node, entry);
-               desc.its_vmovp_cmd.seq_num = 0;
                desc.its_vmovp_cmd.col = &its->collections[col_id];
                its_send_single_vcommand(its, its_build_vmovp_cmd, &desc);
                return;
@@ -1003,6 +1017,7 @@ static void its_send_vmovp(struct its_vpe *vpe)
        raw_spin_lock_irqsave(&vmovp_lock, flags);
 
        desc.its_vmovp_cmd.seq_num = vmovp_seq_num++;
+       desc.its_vmovp_cmd.its_list = get_its_list(vpe->its_vm);
 
        /* Emit VMOVPs */
        list_for_each_entry(its, &its_nodes, entry) {
index 422664a..1edc993 100644 (file)
@@ -59,7 +59,7 @@ static struct gic_chip_data gic_data __read_mostly;
 static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
 
 #define GIC_ID_NR      (1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer))
-#define GIC_LINE_NR    max(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
+#define GIC_LINE_NR    min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
 #define GIC_ESPI_NR    GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer)
 
 /*
index c72c036..7d0a12f 100644 (file)
@@ -97,7 +97,7 @@ static inline void plic_irq_toggle(const struct cpumask *mask,
        }
 }
 
-static void plic_irq_enable(struct irq_data *d)
+static void plic_irq_unmask(struct irq_data *d)
 {
        unsigned int cpu = cpumask_any_and(irq_data_get_affinity_mask(d),
                                           cpu_online_mask);
@@ -106,7 +106,7 @@ static void plic_irq_enable(struct irq_data *d)
        plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1);
 }
 
-static void plic_irq_disable(struct irq_data *d)
+static void plic_irq_mask(struct irq_data *d)
 {
        plic_irq_toggle(cpu_possible_mask, d->hwirq, 0);
 }
@@ -125,10 +125,8 @@ static int plic_set_affinity(struct irq_data *d,
        if (cpu >= nr_cpu_ids)
                return -EINVAL;
 
-       if (!irqd_irq_disabled(d)) {
-               plic_irq_toggle(cpu_possible_mask, d->hwirq, 0);
-               plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1);
-       }
+       plic_irq_toggle(cpu_possible_mask, d->hwirq, 0);
+       plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1);
 
        irq_data_update_effective_affinity(d, cpumask_of(cpu));
 
@@ -136,14 +134,18 @@ static int plic_set_affinity(struct irq_data *d,
 }
 #endif
 
+static void plic_irq_eoi(struct irq_data *d)
+{
+       struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
+
+       writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+}
+
 static struct irq_chip plic_chip = {
        .name           = "SiFive PLIC",
-       /*
-        * There is no need to mask/unmask PLIC interrupts.  They are "masked"
-        * by reading claim and "unmasked" when writing it back.
-        */
-       .irq_enable     = plic_irq_enable,
-       .irq_disable    = plic_irq_disable,
+       .irq_mask       = plic_irq_mask,
+       .irq_unmask     = plic_irq_unmask,
+       .irq_eoi        = plic_irq_eoi,
 #ifdef CONFIG_SMP
        .irq_set_affinity = plic_set_affinity,
 #endif
@@ -152,7 +154,7 @@ static struct irq_chip plic_chip = {
 static int plic_irqdomain_map(struct irq_domain *d, unsigned int irq,
                              irq_hw_number_t hwirq)
 {
-       irq_set_chip_and_handler(irq, &plic_chip, handle_simple_irq);
+       irq_set_chip_and_handler(irq, &plic_chip, handle_fasteoi_irq);
        irq_set_chip_data(irq, NULL);
        irq_set_noprobe(irq);
        return 0;
@@ -188,7 +190,6 @@ static void plic_handle_irq(struct pt_regs *regs)
                                        hwirq);
                else
                        generic_handle_irq(irq);
-               writel(hwirq, claim);
        }
        csr_set(sie, SIE_SEIE);
 }
@@ -251,8 +252,8 @@ static int __init plic_init(struct device_node *node,
                        continue;
                }
 
-               /* skip context holes */
-               if (parent.args[0] == -1)
+               /* skip contexts other than supervisor external interrupt */
+               if (parent.args[0] != IRQ_S_EXT)
                        continue;
 
                hartid = plic_find_hart_id(parent.np);
index c92b405..ba86195 100644 (file)
@@ -744,7 +744,7 @@ capi_poll(struct file *file, poll_table *wait)
 
        poll_wait(file, &(cdev->recvwait), wait);
        mask = EPOLLOUT | EPOLLWRNORM;
-       if (!skb_queue_empty(&cdev->recvqueue))
+       if (!skb_queue_empty_lockless(&cdev->recvqueue))
                mask |= EPOLLIN | EPOLLRDNORM;
        return mask;
 }
index c6ba37d..dff4132 100644 (file)
@@ -754,6 +754,8 @@ base_sock_create(struct net *net, struct socket *sock, int protocol, int kern)
 
        if (sock->type != SOCK_RAW)
                return -ESOCKTNOSUPPORT;
+       if (!capable(CAP_NET_RAW))
+               return -EPERM;
 
        sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto, kern);
        if (!sk)
index 705c620..7b726f0 100644 (file)
@@ -18,7 +18,7 @@
 
 static int clamped;
 static struct wf_control *clamp_control;
-static struct dev_pm_qos_request qos_req;
+static struct freq_qos_request qos_req;
 static unsigned int min_freq, max_freq;
 
 static int clamp_set(struct wf_control *ct, s32 value)
@@ -35,7 +35,7 @@ static int clamp_set(struct wf_control *ct, s32 value)
        }
        clamped = value;
 
-       return dev_pm_qos_update_request(&qos_req, freq);
+       return freq_qos_update_request(&qos_req, freq);
 }
 
 static int clamp_get(struct wf_control *ct, s32 *value)
@@ -77,38 +77,44 @@ static int __init wf_cpufreq_clamp_init(void)
 
        min_freq = policy->cpuinfo.min_freq;
        max_freq = policy->cpuinfo.max_freq;
+
+       ret = freq_qos_add_request(&policy->constraints, &qos_req, FREQ_QOS_MAX,
+                                  max_freq);
+
        cpufreq_cpu_put(policy);
 
+       if (ret < 0) {
+               pr_err("%s: Failed to add freq constraint (%d)\n", __func__,
+                      ret);
+               return ret;
+       }
+
        dev = get_cpu_device(0);
        if (unlikely(!dev)) {
                pr_warn("%s: No cpu device for cpu0\n", __func__);
-               return -ENODEV;
+               ret = -ENODEV;
+               goto fail;
        }
 
        clamp = kmalloc(sizeof(struct wf_control), GFP_KERNEL);
-       if (clamp == NULL)
-               return -ENOMEM;
-
-       ret = dev_pm_qos_add_request(dev, &qos_req, DEV_PM_QOS_MAX_FREQUENCY,
-                                    max_freq);
-       if (ret < 0) {
-               pr_err("%s: Failed to add freq constraint (%d)\n", __func__,
-                      ret);
-               goto free;
+       if (clamp == NULL) {
+               ret = -ENOMEM;
+               goto fail;
        }
 
        clamp->ops = &clamp_ops;
        clamp->name = "cpufreq-clamp";
        ret = wf_register_control(clamp);
        if (ret)
-               goto fail;
+               goto free;
+
        clamp_control = clamp;
        return 0;
- fail:
-       dev_pm_qos_remove_request(&qos_req);
 
  free:
        kfree(clamp);
+ fail:
+       freq_qos_remove_request(&qos_req);
        return ret;
 }
 
@@ -116,7 +122,7 @@ static void __exit wf_cpufreq_clamp_exit(void)
 {
        if (clamp_control) {
                wf_unregister_control(clamp_control);
-               dev_pm_qos_remove_request(&qos_req);
+               freq_qos_remove_request(&qos_req);
        }
 }
 
index d249cf8..8346e6d 100644 (file)
@@ -542,7 +542,7 @@ static void wake_migration_worker(struct cache *cache)
 
 static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache)
 {
-       return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOWAIT);
+       return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO);
 }
 
 static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell)
@@ -554,9 +554,7 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache)
 {
        struct dm_cache_migration *mg;
 
-       mg = mempool_alloc(&cache->migration_pool, GFP_NOWAIT);
-       if (!mg)
-               return NULL;
+       mg = mempool_alloc(&cache->migration_pool, GFP_NOIO);
 
        memset(mg, 0, sizeof(*mg));
 
@@ -664,10 +662,6 @@ static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bi
        struct dm_bio_prison_cell_v2 *cell_prealloc, *cell;
 
        cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */
-       if (!cell_prealloc) {
-               defer_bio(cache, bio);
-               return false;
-       }
 
        build_key(oblock, end, &key);
        r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell);
@@ -1493,11 +1487,6 @@ static int mg_lock_writes(struct dm_cache_migration *mg)
        struct dm_bio_prison_cell_v2 *prealloc;
 
        prealloc = alloc_prison_cell(cache);
-       if (!prealloc) {
-               DMERR_LIMIT("%s: alloc_prison_cell failed", cache_device_name(cache));
-               mg_complete(mg, false);
-               return -ENOMEM;
-       }
 
        /*
         * Prevent writes to the block, but allow reads to continue.
@@ -1535,11 +1524,6 @@ static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio
        }
 
        mg = alloc_migration(cache);
-       if (!mg) {
-               policy_complete_background_work(cache->policy, op, false);
-               background_work_end(cache);
-               return -ENOMEM;
-       }
 
        mg->op = op;
        mg->overwrite_bio = bio;
@@ -1628,10 +1612,6 @@ static int invalidate_lock(struct dm_cache_migration *mg)
        struct dm_bio_prison_cell_v2 *prealloc;
 
        prealloc = alloc_prison_cell(cache);
-       if (!prealloc) {
-               invalidate_complete(mg, false);
-               return -ENOMEM;
-       }
 
        build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key);
        r = dm_cell_lock_v2(cache->prison, &key,
@@ -1669,10 +1649,6 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
                return -EPERM;
 
        mg = alloc_migration(cache);
-       if (!mg) {
-               background_work_end(cache);
-               return -ENOMEM;
-       }
 
        mg->overwrite_bio = bio;
        mg->invalidate_cblock = cblock;
index cd6f9e9..4ca8f19 100644 (file)
@@ -591,8 +591,8 @@ static struct hash_table_bucket *get_hash_table_bucket(struct clone *clone,
  *
  * NOTE: Must be called with the bucket lock held
  */
-struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket,
-                                             unsigned long region_nr)
+static struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket,
+                                                    unsigned long region_nr)
 {
        struct dm_clone_region_hydration *hd;
 
index 9118ab8..dab4446 100644 (file)
@@ -345,6 +345,14 @@ static void __DEBUG_bytes(__u8 *bytes, size_t len, const char *msg, ...)
 #define DEBUG_bytes(bytes, len, msg, ...)      do { } while (0)
 #endif
 
+static void dm_integrity_prepare(struct request *rq)
+{
+}
+
+static void dm_integrity_complete(struct request *rq, unsigned int nr_bytes)
+{
+}
+
 /*
  * DM Integrity profile, protection is performed layer above (dm-crypt)
  */
@@ -352,6 +360,8 @@ static const struct blk_integrity_profile dm_integrity_profile = {
        .name                   = "DM-DIF-EXT-TAG",
        .generate_fn            = NULL,
        .verify_fn              = NULL,
+       .prepare_fn             = dm_integrity_prepare,
+       .complete_fn            = dm_integrity_complete,
 };
 
 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
index f150f5c..4fb1a40 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/vmalloc.h>
 #include <linux/log2.h>
 #include <linux/dm-kcopyd.h>
-#include <linux/semaphore.h>
 
 #include "dm.h"
 
@@ -107,8 +106,8 @@ struct dm_snapshot {
        /* The on disk metadata handler */
        struct dm_exception_store *store;
 
-       /* Maximum number of in-flight COW jobs. */
-       struct semaphore cow_count;
+       unsigned in_progress;
+       struct wait_queue_head in_progress_wait;
 
        struct dm_kcopyd_client *kcopyd_client;
 
@@ -162,8 +161,8 @@ struct dm_snapshot {
  */
 #define DEFAULT_COW_THRESHOLD 2048
 
-static int cow_threshold = DEFAULT_COW_THRESHOLD;
-module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644);
+static unsigned cow_threshold = DEFAULT_COW_THRESHOLD;
+module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644);
 MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
 
 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
@@ -1327,7 +1326,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                goto bad_hash_tables;
        }
 
-       sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX);
+       init_waitqueue_head(&s->in_progress_wait);
 
        s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
        if (IS_ERR(s->kcopyd_client)) {
@@ -1509,9 +1508,56 @@ static void snapshot_dtr(struct dm_target *ti)
 
        dm_put_device(ti, s->origin);
 
+       WARN_ON(s->in_progress);
+
        kfree(s);
 }
 
+static void account_start_copy(struct dm_snapshot *s)
+{
+       spin_lock(&s->in_progress_wait.lock);
+       s->in_progress++;
+       spin_unlock(&s->in_progress_wait.lock);
+}
+
+static void account_end_copy(struct dm_snapshot *s)
+{
+       spin_lock(&s->in_progress_wait.lock);
+       BUG_ON(!s->in_progress);
+       s->in_progress--;
+       if (likely(s->in_progress <= cow_threshold) &&
+           unlikely(waitqueue_active(&s->in_progress_wait)))
+               wake_up_locked(&s->in_progress_wait);
+       spin_unlock(&s->in_progress_wait.lock);
+}
+
+static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins)
+{
+       if (unlikely(s->in_progress > cow_threshold)) {
+               spin_lock(&s->in_progress_wait.lock);
+               if (likely(s->in_progress > cow_threshold)) {
+                       /*
+                        * NOTE: this throttle doesn't account for whether
+                        * the caller is servicing an IO that will trigger a COW
+                        * so excess throttling may result for chunks not required
+                        * to be COW'd.  But if cow_threshold was reached, extra
+                        * throttling is unlikely to negatively impact performance.
+                        */
+                       DECLARE_WAITQUEUE(wait, current);
+                       __add_wait_queue(&s->in_progress_wait, &wait);
+                       __set_current_state(TASK_UNINTERRUPTIBLE);
+                       spin_unlock(&s->in_progress_wait.lock);
+                       if (unlock_origins)
+                               up_read(&_origins_lock);
+                       io_schedule();
+                       remove_wait_queue(&s->in_progress_wait, &wait);
+                       return false;
+               }
+               spin_unlock(&s->in_progress_wait.lock);
+       }
+       return true;
+}
+
 /*
  * Flush a list of buffers.
  */
@@ -1527,7 +1573,7 @@ static void flush_bios(struct bio *bio)
        }
 }
 
-static int do_origin(struct dm_dev *origin, struct bio *bio);
+static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit);
 
 /*
  * Flush a list of buffers.
@@ -1540,7 +1586,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
        while (bio) {
                n = bio->bi_next;
                bio->bi_next = NULL;
-               r = do_origin(s->origin, bio);
+               r = do_origin(s->origin, bio, false);
                if (r == DM_MAPIO_REMAPPED)
                        generic_make_request(bio);
                bio = n;
@@ -1732,7 +1778,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
                rb_link_node(&pe->out_of_order_node, parent, p);
                rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
        }
-       up(&s->cow_count);
+       account_end_copy(s);
 }
 
 /*
@@ -1756,7 +1802,7 @@ static void start_copy(struct dm_snap_pending_exception *pe)
        dest.count = src.count;
 
        /* Hand over to kcopyd */
-       down(&s->cow_count);
+       account_start_copy(s);
        dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
 }
 
@@ -1776,7 +1822,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
        pe->full_bio = bio;
        pe->full_bio_end_io = bio->bi_end_io;
 
-       down(&s->cow_count);
+       account_start_copy(s);
        callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
                                                   copy_callback, pe);
 
@@ -1866,7 +1912,7 @@ static void zero_callback(int read_err, unsigned long write_err, void *context)
        struct bio *bio = context;
        struct dm_snapshot *s = bio->bi_private;
 
-       up(&s->cow_count);
+       account_end_copy(s);
        bio->bi_status = write_err ? BLK_STS_IOERR : 0;
        bio_endio(bio);
 }
@@ -1880,7 +1926,7 @@ static void zero_exception(struct dm_snapshot *s, struct dm_exception *e,
        dest.sector = bio->bi_iter.bi_sector;
        dest.count = s->store->chunk_size;
 
-       down(&s->cow_count);
+       account_start_copy(s);
        WARN_ON_ONCE(bio->bi_private);
        bio->bi_private = s;
        dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio);
@@ -1916,6 +1962,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
        if (!s->valid)
                return DM_MAPIO_KILL;
 
+       if (bio_data_dir(bio) == WRITE) {
+               while (unlikely(!wait_for_in_progress(s, false)))
+                       ; /* wait_for_in_progress() has slept */
+       }
+
        down_read(&s->lock);
        dm_exception_table_lock(&lock);
 
@@ -2112,7 +2163,7 @@ redirect_to_origin:
 
        if (bio_data_dir(bio) == WRITE) {
                up_write(&s->lock);
-               return do_origin(s->origin, bio);
+               return do_origin(s->origin, bio, false);
        }
 
 out_unlock:
@@ -2487,15 +2538,24 @@ next_snapshot:
 /*
  * Called on a write from the origin driver.
  */
-static int do_origin(struct dm_dev *origin, struct bio *bio)
+static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit)
 {
        struct origin *o;
        int r = DM_MAPIO_REMAPPED;
 
+again:
        down_read(&_origins_lock);
        o = __lookup_origin(origin->bdev);
-       if (o)
+       if (o) {
+               if (limit) {
+                       struct dm_snapshot *s;
+                       list_for_each_entry(s, &o->snapshots, list)
+                               if (unlikely(!wait_for_in_progress(s, true)))
+                                       goto again;
+               }
+
                r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
+       }
        up_read(&_origins_lock);
 
        return r;
@@ -2608,7 +2668,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
                dm_accept_partial_bio(bio, available_sectors);
 
        /* Only tell snapshots if this is a write */
-       return do_origin(o->dev, bio);
+       return do_origin(o->dev, bio, true);
 }
 
 /*
index f61693e..1e77228 100644 (file)
@@ -154,7 +154,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
        } else {
                pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
                       mdname(mddev));
-               pr_err("md/raid0: please set raid.default_layout to 1 or 2\n");
+               pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
                err = -ENOTSUPP;
                goto abort;
        }
index 0a47d47..23e02ff 100644 (file)
@@ -183,14 +183,14 @@ static int adv748x_initialise_clients(struct adv748x_state *state)
        int ret;
 
        for (i = ADV748X_PAGE_DPLL; i < ADV748X_PAGE_MAX; ++i) {
-               state->i2c_clients[i] = i2c_new_secondary_device(
+               state->i2c_clients[i] = i2c_new_ancillary_device(
                                state->client,
                                adv748x_default_addresses[i].name,
                                adv748x_default_addresses[i].default_addr);
 
-               if (state->i2c_clients[i] == NULL) {
+               if (IS_ERR(state->i2c_clients[i])) {
                        adv_err(state, "failed to create i2c client %u\n", i);
-                       return -ENOMEM;
+                       return PTR_ERR(state->i2c_clients[i]);
                }
 
                ret = adv748x_configure_regmap(state, i);
index 28a84bf..2dedd6e 100644 (file)
@@ -2862,10 +2862,8 @@ static void adv76xx_unregister_clients(struct adv76xx_state *state)
 {
        unsigned int i;
 
-       for (i = 1; i < ARRAY_SIZE(state->i2c_clients); ++i) {
-               if (state->i2c_clients[i])
-                       i2c_unregister_device(state->i2c_clients[i]);
-       }
+       for (i = 1; i < ARRAY_SIZE(state->i2c_clients); ++i)
+               i2c_unregister_device(state->i2c_clients[i]);
 }
 
 static struct i2c_client *adv76xx_dummy_client(struct v4l2_subdev *sd,
@@ -2878,14 +2876,14 @@ static struct i2c_client *adv76xx_dummy_client(struct v4l2_subdev *sd,
        struct i2c_client *new_client;
 
        if (pdata && pdata->i2c_addresses[page])
-               new_client = i2c_new_dummy(client->adapter,
+               new_client = i2c_new_dummy_device(client->adapter,
                                           pdata->i2c_addresses[page]);
        else
-               new_client = i2c_new_secondary_device(client,
+               new_client = i2c_new_ancillary_device(client,
                                adv76xx_default_addresses[page].name,
                                adv76xx_default_addresses[page].default_addr);
 
-       if (new_client)
+       if (!IS_ERR(new_client))
                io_write(sd, io_reg, new_client->addr << 1);
 
        return new_client;
@@ -3516,15 +3514,19 @@ static int adv76xx_probe(struct i2c_client *client,
        }
 
        for (i = 1; i < ADV76XX_PAGE_MAX; ++i) {
+               struct i2c_client *dummy_client;
+
                if (!(BIT(i) & state->info->page_mask))
                        continue;
 
-               state->i2c_clients[i] = adv76xx_dummy_client(sd, i);
-               if (!state->i2c_clients[i]) {
-                       err = -EINVAL;
+               dummy_client = adv76xx_dummy_client(sd, i);
+               if (IS_ERR(dummy_client)) {
+                       err = PTR_ERR(dummy_client);
                        v4l2_err(sd, "failed to create i2c client %u\n", i);
                        goto err_i2c;
                }
+
+               state->i2c_clients[i] = dummy_client;
        }
 
        INIT_DELAYED_WORK(&state->delayed_work_enable_hotplug,
index cfca3c7..21f90a8 100644 (file)
@@ -643,8 +643,7 @@ static int v4l_stk_release(struct file *fp)
                dev->owner = NULL;
        }
 
-       if (is_present(dev))
-               usb_autopm_put_interface(dev->interface);
+       usb_autopm_put_interface(dev->interface);
        mutex_unlock(&dev->lock);
        return v4l2_fh_release(fp);
 }
index 76b4ac7..aeb2f49 100644 (file)
@@ -157,6 +157,7 @@ static void videobuf_dma_contig_user_put(struct videobuf_dma_contig_memory *mem)
 static int videobuf_dma_contig_user_get(struct videobuf_dma_contig_memory *mem,
                                        struct videobuf_buffer *vb)
 {
+       unsigned long untagged_baddr = untagged_addr(vb->baddr);
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
        unsigned long prev_pfn, this_pfn;
@@ -164,22 +165,22 @@ static int videobuf_dma_contig_user_get(struct videobuf_dma_contig_memory *mem,
        unsigned int offset;
        int ret;
 
-       offset = vb->baddr & ~PAGE_MASK;
+       offset = untagged_baddr & ~PAGE_MASK;
        mem->size = PAGE_ALIGN(vb->size + offset);
        ret = -EINVAL;
 
        down_read(&mm->mmap_sem);
 
-       vma = find_vma(mm, vb->baddr);
+       vma = find_vma(mm, untagged_baddr);
        if (!vma)
                goto out_up;
 
-       if ((vb->baddr + mem->size) > vma->vm_end)
+       if ((untagged_baddr + mem->size) > vma->vm_end)
                goto out_up;
 
        pages_done = 0;
        prev_pfn = 0; /* kill warning */
-       user_address = vb->baddr;
+       user_address = untagged_baddr;
 
        while (pages_done < (mem->size >> PAGE_SHIFT)) {
                ret = follow_pfn(vma, user_address, &this_pfn);
index 3274742..64fff6a 100644 (file)
@@ -941,7 +941,7 @@ static int jmb38x_ms_probe(struct pci_dev *pdev,
        if (!cnt) {
                rc = -ENODEV;
                pci_dev_busy = 1;
-               goto err_out;
+               goto err_out_int;
        }
 
        jm = kzalloc(sizeof(struct jmb38x_ms)
index f2d9fb4..4e8d0d6 100644 (file)
@@ -425,10 +425,10 @@ static int pm800_pages_init(struct pm80x_chip *chip)
                return -ENODEV;
 
        /* PM800 block power page */
-       subchip->power_page = i2c_new_dummy(client->adapter,
+       subchip->power_page = i2c_new_dummy_device(client->adapter,
                                            subchip->power_page_addr);
-       if (subchip->power_page == NULL) {
-               ret = -ENODEV;
+       if (IS_ERR(subchip->power_page)) {
+               ret = PTR_ERR(subchip->power_page);
                goto out;
        }
 
@@ -444,10 +444,10 @@ static int pm800_pages_init(struct pm80x_chip *chip)
        i2c_set_clientdata(subchip->power_page, chip);
 
        /* PM800 block GPADC */
-       subchip->gpadc_page = i2c_new_dummy(client->adapter,
+       subchip->gpadc_page = i2c_new_dummy_device(client->adapter,
                                            subchip->gpadc_page_addr);
-       if (subchip->gpadc_page == NULL) {
-               ret = -ENODEV;
+       if (IS_ERR(subchip->gpadc_page)) {
+               ret = PTR_ERR(subchip->gpadc_page);
                goto out;
        }
 
index 9e0bd13..c9bae71 100644 (file)
@@ -1178,12 +1178,12 @@ static int pm860x_probe(struct i2c_client *client)
         */
        if (pdata->companion_addr && (pdata->companion_addr != client->addr)) {
                chip->companion_addr = pdata->companion_addr;
-               chip->companion = i2c_new_dummy(chip->client->adapter,
+               chip->companion = i2c_new_dummy_device(chip->client->adapter,
                                                chip->companion_addr);
-               if (!chip->companion) {
+               if (IS_ERR(chip->companion)) {
                        dev_err(&client->dev,
                                "Failed to allocate I2C companion device\n");
-                       return -ENODEV;
+                       return PTR_ERR(chip->companion);
                }
                chip->regmap_companion = regmap_init_i2c(chip->companion,
                                                        &pm860x_regmap_config);
index c9c49da..ae24d3e 100644 (file)
@@ -589,6 +589,17 @@ config INTEL_SOC_PMIC_CHTDC_TI
          Select this option for supporting Dollar Cove (TI version) PMIC
          device that is found on some Intel Cherry Trail systems.
 
+config INTEL_SOC_PMIC_MRFLD
+       tristate "Support for Intel Merrifield Basin Cove PMIC"
+       depends on GPIOLIB
+       depends on ACPI
+       depends on INTEL_SCU_IPC
+       select MFD_CORE
+       select REGMAP_IRQ
+       help
+         Select this option for supporting Basin Cove PMIC device
+         that is found on Intel Merrifield systems.
+
 config MFD_INTEL_LPSS
        tristate
        select COMMON_CLK
@@ -641,15 +652,6 @@ config MFD_JANZ_CMODIO
          host many different types of MODULbus daughterboards, including
          CAN and GPIO controllers.
 
-config MFD_JZ4740_ADC
-       bool "Janz JZ4740 ADC core"
-       select MFD_CORE
-       select GENERIC_IRQ_CHIP
-       depends on MACH_JZ4740
-       help
-         Say yes here if you want support for the ADC unit in the JZ4740 SoC.
-         This driver is necessary for jz4740-battery and jz4740-hwmon driver.
-
 config MFD_KEMPLD
        tristate "Kontron module PLD device"
        select MFD_CORE
index 0c0a848..c1067ea 100644 (file)
@@ -189,7 +189,6 @@ obj-$(CONFIG_LPC_SCH)               += lpc_sch.o
 obj-$(CONFIG_LPC_ICH)          += lpc_ich.o
 obj-$(CONFIG_MFD_RDC321X)      += rdc321x-southbridge.o
 obj-$(CONFIG_MFD_JANZ_CMODIO)  += janz-cmodio.o
-obj-$(CONFIG_MFD_JZ4740_ADC)   += jz4740-adc.o
 obj-$(CONFIG_MFD_TPS6586X)     += tps6586x.o
 obj-$(CONFIG_MFD_VX855)                += vx855.o
 obj-$(CONFIG_MFD_WL1273_CORE)  += wl1273-core.o
@@ -239,7 +238,9 @@ obj-$(CONFIG_INTEL_SOC_PMIC)        += intel-soc-pmic.o
 obj-$(CONFIG_INTEL_SOC_PMIC_BXTWC)     += intel_soc_pmic_bxtwc.o
 obj-$(CONFIG_INTEL_SOC_PMIC_CHTWC)     += intel_soc_pmic_chtwc.o
 obj-$(CONFIG_INTEL_SOC_PMIC_CHTDC_TI)  += intel_soc_pmic_chtdc_ti.o
-obj-$(CONFIG_MFD_MT6397)       += mt6397-core.o
+mt6397-objs    := mt6397-core.o mt6397-irq.o
+obj-$(CONFIG_MFD_MT6397)       += mt6397.o
+obj-$(CONFIG_INTEL_SOC_PMIC_MRFLD)     += intel_soc_pmic_mrfld.o
 
 obj-$(CONFIG_MFD_ALTERA_A10SR) += altera-a10sr.o
 obj-$(CONFIG_MFD_ALTERA_SYSMGR) += altera-sysmgr.o
index 9f3dbc3..57723f1 100644 (file)
@@ -865,10 +865,10 @@ static int ab3100_probe(struct i2c_client *client,
                 &ab3100->chip_name[0]);
 
        /* Attach a second dummy i2c_client to the test register address */
-       ab3100->testreg_client = i2c_new_dummy(client->adapter,
+       ab3100->testreg_client = i2c_new_dummy_device(client->adapter,
                                               client->addr + 1);
-       if (!ab3100->testreg_client) {
-               err = -ENOMEM;
+       if (IS_ERR(ab3100->testreg_client)) {
+               err = PTR_ERR(ab3100->testreg_client);
                goto exit_no_testreg_client;
        }
 
index 567a34b..f4e26b6 100644 (file)
@@ -2680,16 +2680,12 @@ static int ab8500_debug_probe(struct platform_device *plf)
        irq_ab8500 = res->start;
 
        irq_first = platform_get_irq_byname(plf, "IRQ_FIRST");
-       if (irq_first < 0) {
-               dev_err(&plf->dev, "First irq not found, err %d\n", irq_first);
+       if (irq_first < 0)
                return irq_first;
-       }
 
        irq_last = platform_get_irq_byname(plf, "IRQ_LAST");
-       if (irq_last < 0) {
-               dev_err(&plf->dev, "Last irq not found, err %d\n", irq_last);
+       if (irq_last < 0)
                return irq_last;
-       }
 
        ab8500_dir = debugfs_create_dir(AB8500_NAME_STRING, NULL);
 
index 83b18c9..a6bd213 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
 #include <linux/export.h>
 #include <linux/io.h>
 #include <linux/slab.h>
index 1aeb5e4..bfac5dc 100644 (file)
@@ -61,11 +61,11 @@ static int bcm590xx_i2c_probe(struct i2c_client *i2c_pri,
        }
 
        /* Secondary I2C slave address is the base address with A(2) asserted */
-       bcm590xx->i2c_sec = i2c_new_dummy(i2c_pri->adapter,
+       bcm590xx->i2c_sec = i2c_new_dummy_device(i2c_pri->adapter,
                                          i2c_pri->addr | BIT(2));
-       if (!bcm590xx->i2c_sec) {
+       if (IS_ERR(bcm590xx->i2c_sec)) {
                dev_err(&i2c_pri->dev, "failed to add secondary I2C device\n");
-               return -ENODEV;
+               return PTR_ERR(bcm590xx->i2c_sec);
        }
        i2c_set_clientdata(bcm590xx->i2c_sec, bcm590xx);
 
index 1303306..7f0aa1e 100644 (file)
@@ -420,10 +420,10 @@ static int da9150_probe(struct i2c_client *client,
        qif_addr = da9150_reg_read(da9150, DA9150_CORE2WIRE_CTRL_A);
        qif_addr = (qif_addr & DA9150_CORE_BASE_ADDR_MASK) >> 1;
        qif_addr |= DA9150_QIF_I2C_ADDR_LSB;
-       da9150->core_qif = i2c_new_dummy(client->adapter, qif_addr);
-       if (!da9150->core_qif) {
+       da9150->core_qif = i2c_new_dummy_device(client->adapter, qif_addr);
+       if (IS_ERR(da9150->core_qif)) {
                dev_err(da9150->dev, "Failed to attach QIF client\n");
-               return -ENODEV;
+               return PTR_ERR(da9150->core_qif);
        }
 
        i2c_set_clientdata(da9150->core_qif, da9150);
index 13ca720..e5c8bc9 100644 (file)
@@ -19,7 +19,6 @@
 #include <sound/pcm.h>
 
 #include <linux/mfd/davinci_voicecodec.h>
-#include <mach/hardware.h>
 
 static const struct regmap_config davinci_vc_regmap = {
        .reg_bits = 32,
@@ -31,6 +30,7 @@ static int __init davinci_vc_probe(struct platform_device *pdev)
        struct davinci_vc *davinci_vc;
        struct resource *res;
        struct mfd_cell *cell = NULL;
+       dma_addr_t fifo_base;
        int ret;
 
        davinci_vc = devm_kzalloc(&pdev->dev,
@@ -48,6 +48,7 @@ static int __init davinci_vc_probe(struct platform_device *pdev)
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
+       fifo_base = (dma_addr_t)res->start;
        davinci_vc->base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(davinci_vc->base)) {
                ret = PTR_ERR(davinci_vc->base);
@@ -70,8 +71,7 @@ static int __init davinci_vc_probe(struct platform_device *pdev)
        }
 
        davinci_vc->davinci_vcif.dma_tx_channel = res->start;
-       davinci_vc->davinci_vcif.dma_tx_addr =
-               (dma_addr_t)(io_v2p(davinci_vc->base) + DAVINCI_VC_WFIFO);
+       davinci_vc->davinci_vcif.dma_tx_addr = fifo_base + DAVINCI_VC_WFIFO;
 
        res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
        if (!res) {
@@ -81,8 +81,7 @@ static int __init davinci_vc_probe(struct platform_device *pdev)
        }
 
        davinci_vc->davinci_vcif.dma_rx_channel = res->start;
-       davinci_vc->davinci_vcif.dma_rx_addr =
-               (dma_addr_t)(io_v2p(davinci_vc->base) + DAVINCI_VC_RFIFO);
+       davinci_vc->davinci_vcif.dma_rx_addr = fifo_base + DAVINCI_VC_RFIFO;
 
        davinci_vc->dev = &pdev->dev;
        davinci_vc->pdev = pdev;
index 90e0f21..dfac6af 100644 (file)
@@ -36,7 +36,6 @@
 #include <linux/regulator/db8500-prcmu.h>
 #include <linux/regulator/machine.h>
 #include <linux/platform_data/ux500_wdt.h>
-#include <linux/platform_data/db8500_thermal.h>
 #include "dbx500-prcmu-regs.h"
 
 /* Index of different voltages to be used when accessing AVSData */
@@ -1695,21 +1694,41 @@ static long round_clock_rate(u8 clock, unsigned long rate)
        return rounded_rate;
 }
 
-static const unsigned long armss_freqs[] = {
+static const unsigned long db8500_armss_freqs[] = {
        200000000,
        400000000,
        800000000,
        998400000
 };
 
+/* The DB8520 has slightly higher ARMSS max frequency */
+static const unsigned long db8520_armss_freqs[] = {
+       200000000,
+       400000000,
+       800000000,
+       1152000000
+};
+
+
+
 static long round_armss_rate(unsigned long rate)
 {
        unsigned long freq = 0;
+       const unsigned long *freqs;
+       int nfreqs;
        int i;
 
+       if (fw_info.version.project == PRCMU_FW_PROJECT_U8520) {
+               freqs = db8520_armss_freqs;
+               nfreqs = ARRAY_SIZE(db8520_armss_freqs);
+       } else {
+               freqs = db8500_armss_freqs;
+               nfreqs = ARRAY_SIZE(db8500_armss_freqs);
+       }
+
        /* Find the corresponding arm opp from the cpufreq table. */
-       for (i = 0; i < ARRAY_SIZE(armss_freqs); i++) {
-               freq = armss_freqs[i];
+       for (i = 0; i < nfreqs; i++) {
+               freq = freqs[i];
                if (rate <= freq)
                        break;
        }
@@ -1854,11 +1873,21 @@ static int set_armss_rate(unsigned long rate)
 {
        unsigned long freq;
        u8 opps[] = { ARM_EXTCLK, ARM_50_OPP, ARM_100_OPP, ARM_MAX_OPP };
+       const unsigned long *freqs;
+       int nfreqs;
        int i;
 
+       if (fw_info.version.project == PRCMU_FW_PROJECT_U8520) {
+               freqs = db8520_armss_freqs;
+               nfreqs = ARRAY_SIZE(db8520_armss_freqs);
+       } else {
+               freqs = db8500_armss_freqs;
+               nfreqs = ARRAY_SIZE(db8500_armss_freqs);
+       }
+
        /* Find the corresponding arm opp from the cpufreq table. */
-       for (i = 0; i < ARRAY_SIZE(armss_freqs); i++) {
-               freq = armss_freqs[i];
+       for (i = 0; i < nfreqs; i++) {
+               freq = freqs[i];
                if (rate == freq)
                        break;
        }
@@ -2984,53 +3013,6 @@ static struct ux500_wdt_data db8500_wdt_pdata = {
        .timeout = 600, /* 10 minutes */
        .has_28_bits_resolution = true,
 };
-/*
- * Thermal Sensor
- */
-
-static struct resource db8500_thsens_resources[] = {
-       {
-               .name = "IRQ_HOTMON_LOW",
-               .start  = IRQ_PRCMU_HOTMON_LOW,
-               .end    = IRQ_PRCMU_HOTMON_LOW,
-               .flags  = IORESOURCE_IRQ,
-       },
-       {
-               .name = "IRQ_HOTMON_HIGH",
-               .start  = IRQ_PRCMU_HOTMON_HIGH,
-               .end    = IRQ_PRCMU_HOTMON_HIGH,
-               .flags  = IORESOURCE_IRQ,
-       },
-};
-
-static struct db8500_thsens_platform_data db8500_thsens_data = {
-       .trip_points[0] = {
-               .temp = 70000,
-               .type = THERMAL_TRIP_ACTIVE,
-               .cdev_name = {
-                       [0] = "thermal-cpufreq-0",
-               },
-       },
-       .trip_points[1] = {
-               .temp = 75000,
-               .type = THERMAL_TRIP_ACTIVE,
-               .cdev_name = {
-                       [0] = "thermal-cpufreq-0",
-               },
-       },
-       .trip_points[2] = {
-               .temp = 80000,
-               .type = THERMAL_TRIP_ACTIVE,
-               .cdev_name = {
-                       [0] = "thermal-cpufreq-0",
-               },
-       },
-       .trip_points[3] = {
-               .temp = 85000,
-               .type = THERMAL_TRIP_CRITICAL,
-       },
-       .num_trips = 4,
-};
 
 static const struct mfd_cell common_prcmu_devs[] = {
        {
@@ -3054,10 +3036,7 @@ static const struct mfd_cell db8500_prcmu_devs[] = {
        },
        {
                .name = "db8500-thermal",
-               .num_resources = ARRAY_SIZE(db8500_thsens_resources),
-               .resources = db8500_thsens_resources,
-               .platform_data = &db8500_thsens_data,
-               .pdata_size = sizeof(db8500_thsens_data),
+               .of_compatible = "stericsson,db8500-thermal",
        },
 };
 
@@ -3130,10 +3109,8 @@ static int db8500_prcmu_probe(struct platform_device *pdev)
        writel(ALL_MBOX_BITS, PRCM_ARM_IT1_CLR);
 
        irq = platform_get_irq(pdev, 0);
-       if (irq <= 0) {
-               dev_err(&pdev->dev, "no prcmu irq provided\n");
+       if (irq <= 0)
                return irq;
-       }
 
        err = request_threaded_irq(irq, prcmu_irq_handler,
                prcmu_irq_thread_fn, IRQF_NO_SUSPEND, "prcmu", NULL);
index f505e3e..70fa18b 100644 (file)
@@ -35,7 +35,7 @@ struct pcap_chip {
 
        /* IO */
        u32 buf;
-       struct mutex io_mutex;
+       spinlock_t io_lock;
 
        /* IRQ */
        unsigned int irq_base;
@@ -48,7 +48,7 @@ struct pcap_chip {
        struct pcap_adc_request *adc_queue[PCAP_ADC_MAXQ];
        u8 adc_head;
        u8 adc_tail;
-       struct mutex adc_mutex;
+       spinlock_t adc_lock;
 };
 
 /* IO */
@@ -76,14 +76,15 @@ static int ezx_pcap_putget(struct pcap_chip *pcap, u32 *data)
 
 int ezx_pcap_write(struct pcap_chip *pcap, u8 reg_num, u32 value)
 {
+       unsigned long flags;
        int ret;
 
-       mutex_lock(&pcap->io_mutex);
+       spin_lock_irqsave(&pcap->io_lock, flags);
        value &= PCAP_REGISTER_VALUE_MASK;
        value |= PCAP_REGISTER_WRITE_OP_BIT
                | (reg_num << PCAP_REGISTER_ADDRESS_SHIFT);
        ret = ezx_pcap_putget(pcap, &value);
-       mutex_unlock(&pcap->io_mutex);
+       spin_unlock_irqrestore(&pcap->io_lock, flags);
 
        return ret;
 }
@@ -91,14 +92,15 @@ EXPORT_SYMBOL_GPL(ezx_pcap_write);
 
 int ezx_pcap_read(struct pcap_chip *pcap, u8 reg_num, u32 *value)
 {
+       unsigned long flags;
        int ret;
 
-       mutex_lock(&pcap->io_mutex);
+       spin_lock_irqsave(&pcap->io_lock, flags);
        *value = PCAP_REGISTER_READ_OP_BIT
                | (reg_num << PCAP_REGISTER_ADDRESS_SHIFT);
 
        ret = ezx_pcap_putget(pcap, value);
-       mutex_unlock(&pcap->io_mutex);
+       spin_unlock_irqrestore(&pcap->io_lock, flags);
 
        return ret;
 }
@@ -106,11 +108,12 @@ EXPORT_SYMBOL_GPL(ezx_pcap_read);
 
 int ezx_pcap_set_bits(struct pcap_chip *pcap, u8 reg_num, u32 mask, u32 val)
 {
+       unsigned long flags;
        int ret;
        u32 tmp = PCAP_REGISTER_READ_OP_BIT |
                (reg_num << PCAP_REGISTER_ADDRESS_SHIFT);
 
-       mutex_lock(&pcap->io_mutex);
+       spin_lock_irqsave(&pcap->io_lock, flags);
        ret = ezx_pcap_putget(pcap, &tmp);
        if (ret)
                goto out_unlock;
@@ -121,7 +124,7 @@ int ezx_pcap_set_bits(struct pcap_chip *pcap, u8 reg_num, u32 mask, u32 val)
 
        ret = ezx_pcap_putget(pcap, &tmp);
 out_unlock:
-       mutex_unlock(&pcap->io_mutex);
+       spin_unlock_irqrestore(&pcap->io_lock, flags);
 
        return ret;
 }
@@ -212,14 +215,15 @@ static void pcap_irq_handler(struct irq_desc *desc)
 /* ADC */
 void pcap_set_ts_bits(struct pcap_chip *pcap, u32 bits)
 {
+       unsigned long flags;
        u32 tmp;
 
-       mutex_lock(&pcap->adc_mutex);
+       spin_lock_irqsave(&pcap->adc_lock, flags);
        ezx_pcap_read(pcap, PCAP_REG_ADC, &tmp);
        tmp &= ~(PCAP_ADC_TS_M_MASK | PCAP_ADC_TS_REF_LOWPWR);
        tmp |= bits & (PCAP_ADC_TS_M_MASK | PCAP_ADC_TS_REF_LOWPWR);
        ezx_pcap_write(pcap, PCAP_REG_ADC, tmp);
-       mutex_unlock(&pcap->adc_mutex);
+       spin_unlock_irqrestore(&pcap->adc_lock, flags);
 }
 EXPORT_SYMBOL_GPL(pcap_set_ts_bits);
 
@@ -234,15 +238,16 @@ static void pcap_disable_adc(struct pcap_chip *pcap)
 
 static void pcap_adc_trigger(struct pcap_chip *pcap)
 {
+       unsigned long flags;
        u32 tmp;
        u8 head;
 
-       mutex_lock(&pcap->adc_mutex);
+       spin_lock_irqsave(&pcap->adc_lock, flags);
        head = pcap->adc_head;
        if (!pcap->adc_queue[head]) {
                /* queue is empty, save power */
                pcap_disable_adc(pcap);
-               mutex_unlock(&pcap->adc_mutex);
+               spin_unlock_irqrestore(&pcap->adc_lock, flags);
                return;
        }
        /* start conversion on requested bank, save TS_M bits */
@@ -254,7 +259,7 @@ static void pcap_adc_trigger(struct pcap_chip *pcap)
                tmp |= PCAP_ADC_AD_SEL1;
 
        ezx_pcap_write(pcap, PCAP_REG_ADC, tmp);
-       mutex_unlock(&pcap->adc_mutex);
+       spin_unlock_irqrestore(&pcap->adc_lock, flags);
        ezx_pcap_write(pcap, PCAP_REG_ADR, PCAP_ADR_ASC);
 }
 
@@ -265,11 +270,11 @@ static irqreturn_t pcap_adc_irq(int irq, void *_pcap)
        u16 res[2];
        u32 tmp;
 
-       mutex_lock(&pcap->adc_mutex);
+       spin_lock(&pcap->adc_lock);
        req = pcap->adc_queue[pcap->adc_head];
 
        if (WARN(!req, "adc irq without pending request\n")) {
-               mutex_unlock(&pcap->adc_mutex);
+               spin_unlock(&pcap->adc_lock);
                return IRQ_HANDLED;
        }
 
@@ -285,7 +290,7 @@ static irqreturn_t pcap_adc_irq(int irq, void *_pcap)
 
        pcap->adc_queue[pcap->adc_head] = NULL;
        pcap->adc_head = (pcap->adc_head + 1) & (PCAP_ADC_MAXQ - 1);
-       mutex_unlock(&pcap->adc_mutex);
+       spin_unlock(&pcap->adc_lock);
 
        /* pass the results and release memory */
        req->callback(req->data, res);
@@ -301,6 +306,7 @@ int pcap_adc_async(struct pcap_chip *pcap, u8 bank, u32 flags, u8 ch[],
                                                void *callback, void *data)
 {
        struct pcap_adc_request *req;
+       unsigned long irq_flags;
 
        /* This will be freed after we have a result */
        req = kmalloc(sizeof(struct pcap_adc_request), GFP_KERNEL);
@@ -314,15 +320,15 @@ int pcap_adc_async(struct pcap_chip *pcap, u8 bank, u32 flags, u8 ch[],
        req->callback = callback;
        req->data = data;
 
-       mutex_lock(&pcap->adc_mutex);
+       spin_lock_irqsave(&pcap->adc_lock, irq_flags);
        if (pcap->adc_queue[pcap->adc_tail]) {
-               mutex_unlock(&pcap->adc_mutex);
+               spin_unlock_irqrestore(&pcap->adc_lock, irq_flags);
                kfree(req);
                return -EBUSY;
        }
        pcap->adc_queue[pcap->adc_tail] = req;
        pcap->adc_tail = (pcap->adc_tail + 1) & (PCAP_ADC_MAXQ - 1);
-       mutex_unlock(&pcap->adc_mutex);
+       spin_unlock_irqrestore(&pcap->adc_lock, irq_flags);
 
        /* start conversion */
        pcap_adc_trigger(pcap);
@@ -389,16 +395,17 @@ static int pcap_add_subdev(struct pcap_chip *pcap,
 static int ezx_pcap_remove(struct spi_device *spi)
 {
        struct pcap_chip *pcap = spi_get_drvdata(spi);
+       unsigned long flags;
        int i;
 
        /* remove all registered subdevs */
        device_for_each_child(&spi->dev, NULL, pcap_remove_subdev);
 
        /* cleanup ADC */
-       mutex_lock(&pcap->adc_mutex);
+       spin_lock_irqsave(&pcap->adc_lock, flags);
        for (i = 0; i < PCAP_ADC_MAXQ; i++)
                kfree(pcap->adc_queue[i]);
-       mutex_unlock(&pcap->adc_mutex);
+       spin_unlock_irqrestore(&pcap->adc_lock, flags);
 
        /* cleanup irqchip */
        for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++)
@@ -426,8 +433,8 @@ static int ezx_pcap_probe(struct spi_device *spi)
                goto ret;
        }
 
-       mutex_init(&pcap->io_mutex);
-       mutex_init(&pcap->adc_mutex);
+       spin_lock_init(&pcap->io_lock);
+       spin_lock_init(&pcap->adc_lock);
        INIT_WORK(&pcap->isr_work, pcap_isr_work);
        INIT_WORK(&pcap->msr_work, pcap_msr_work);
        spi_set_drvdata(spi, pcap);
index 20791ca..a016b39 100644 (file)
@@ -69,10 +69,8 @@ static int mx25_tsadc_setup_irq(struct platform_device *pdev,
        int irq;
 
        irq = platform_get_irq(pdev, 0);
-       if (irq <= 0) {
-               dev_err(dev, "Failed to get irq\n");
+       if (irq <= 0)
                return irq;
-       }
 
        tsadc->domain = irq_domain_add_simple(np, 2, 0, &mx25_tsadc_domain_ops,
                                              tsadc);
index 370519a..8ad6768 100644 (file)
@@ -385,8 +385,7 @@ static void htcpld_unregister_chip_i2c(
        htcpld = platform_get_drvdata(pdev);
        chip = &htcpld->chip[chip_index];
 
-       if (chip->client)
-               i2c_unregister_device(chip->client);
+       i2c_unregister_device(chip->client);
 }
 
 static int htcpld_register_chip_gpio(
index 61ffb8b..c8fe334 100644 (file)
 
 #include "intel-lpss.h"
 
+static const struct intel_lpss_platform_info spt_info = {
+       .clk_rate = 120000000,
+};
+
 static struct property_entry spt_i2c_properties[] = {
        PROPERTY_ENTRY_U32("i2c-sda-hold-time-ns", 230),
        { },
@@ -28,6 +32,19 @@ static const struct intel_lpss_platform_info spt_i2c_info = {
        .properties = spt_i2c_properties,
 };
 
+static struct property_entry uart_properties[] = {
+       PROPERTY_ENTRY_U32("reg-io-width", 4),
+       PROPERTY_ENTRY_U32("reg-shift", 2),
+       PROPERTY_ENTRY_BOOL("snps,uart-16550-compatible"),
+       { },
+};
+
+static const struct intel_lpss_platform_info spt_uart_info = {
+       .clk_rate = 120000000,
+       .clk_con_id = "baudclk",
+       .properties = uart_properties,
+};
+
 static const struct intel_lpss_platform_info bxt_info = {
        .clk_rate = 100000000,
 };
@@ -58,8 +75,17 @@ static const struct intel_lpss_platform_info apl_i2c_info = {
 
 static const struct acpi_device_id intel_lpss_acpi_ids[] = {
        /* SPT */
+       { "INT3440", (kernel_ulong_t)&spt_info },
+       { "INT3441", (kernel_ulong_t)&spt_info },
+       { "INT3442", (kernel_ulong_t)&spt_i2c_info },
+       { "INT3443", (kernel_ulong_t)&spt_i2c_info },
+       { "INT3444", (kernel_ulong_t)&spt_i2c_info },
+       { "INT3445", (kernel_ulong_t)&spt_i2c_info },
        { "INT3446", (kernel_ulong_t)&spt_i2c_info },
        { "INT3447", (kernel_ulong_t)&spt_i2c_info },
+       { "INT3448", (kernel_ulong_t)&spt_uart_info },
+       { "INT3449", (kernel_ulong_t)&spt_uart_info },
+       { "INT344A", (kernel_ulong_t)&spt_uart_info },
        /* BXT */
        { "80860AAC", (kernel_ulong_t)&bxt_i2c_info },
        { "80860ABC", (kernel_ulong_t)&bxt_info },
index ade6e1c..9355db2 100644 (file)
@@ -35,6 +35,8 @@ static int intel_lpss_pci_probe(struct pci_dev *pdev,
        info->mem = &pdev->resource[0];
        info->irq = pdev->irq;
 
+       pdev->d3cold_delay = 0;
+
        /* Probably it is enough to set this for iDMA capable devices only */
        pci_set_master(pdev);
        pci_try_set_mwi(pdev);
@@ -256,6 +258,29 @@ static const struct pci_device_id intel_lpss_pci_ids[] = {
        { PCI_VDEVICE(INTEL, 0x9dea), (kernel_ulong_t)&cnl_i2c_info },
        { PCI_VDEVICE(INTEL, 0x9deb), (kernel_ulong_t)&cnl_i2c_info },
        { PCI_VDEVICE(INTEL, 0x9dfb), (kernel_ulong_t)&spt_info },
+       /* TGL-LP */
+       { PCI_VDEVICE(INTEL, 0xa0a8), (kernel_ulong_t)&bxt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa0a9), (kernel_ulong_t)&bxt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa0aa), (kernel_ulong_t)&spt_info },
+       { PCI_VDEVICE(INTEL, 0xa0ab), (kernel_ulong_t)&spt_info },
+       { PCI_VDEVICE(INTEL, 0xa0c5), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0c6), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0c7), (kernel_ulong_t)&bxt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa0d8), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0d9), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0da), (kernel_ulong_t)&bxt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa0db), (kernel_ulong_t)&bxt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa0dc), (kernel_ulong_t)&bxt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa0dd), (kernel_ulong_t)&bxt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa0de), (kernel_ulong_t)&spt_info },
+       { PCI_VDEVICE(INTEL, 0xa0df), (kernel_ulong_t)&spt_info },
+       { PCI_VDEVICE(INTEL, 0xa0e8), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0e9), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0ea), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0eb), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0fb), (kernel_ulong_t)&spt_info },
+       { PCI_VDEVICE(INTEL, 0xa0fd), (kernel_ulong_t)&spt_info },
+       { PCI_VDEVICE(INTEL, 0xa0fe), (kernel_ulong_t)&spt_info },
        /* SPT-H */
        { PCI_VDEVICE(INTEL, 0xa127), (kernel_ulong_t)&spt_uart_info },
        { PCI_VDEVICE(INTEL, 0xa128), (kernel_ulong_t)&spt_uart_info },
index 277f48f..bfe4ff3 100644 (file)
 #define LPSS_PRIV_IDLELTR              0x14
 
 #define LPSS_PRIV_LTR_REQ              BIT(15)
-#define LPSS_PRIV_LTR_SCALE_MASK       0xc00
-#define LPSS_PRIV_LTR_SCALE_1US                0x800
-#define LPSS_PRIV_LTR_SCALE_32US       0xc00
-#define LPSS_PRIV_LTR_VALUE_MASK       0x3ff
+#define LPSS_PRIV_LTR_SCALE_MASK       GENMASK(11, 10)
+#define LPSS_PRIV_LTR_SCALE_1US                (2 << 10)
+#define LPSS_PRIV_LTR_SCALE_32US       (3 << 10)
+#define LPSS_PRIV_LTR_VALUE_MASK       GENMASK(9, 0)
 
 #define LPSS_PRIV_SSP_REG              0x20
 #define LPSS_PRIV_SSP_REG_DIS_DMA_FIN  BIT(0)
@@ -59,8 +59,8 @@
 
 #define LPSS_PRIV_CAPS                 0xfc
 #define LPSS_PRIV_CAPS_NO_IDMA         BIT(8)
+#define LPSS_PRIV_CAPS_TYPE_MASK       GENMASK(7, 4)
 #define LPSS_PRIV_CAPS_TYPE_SHIFT      4
-#define LPSS_PRIV_CAPS_TYPE_MASK       (0xf << LPSS_PRIV_CAPS_TYPE_SHIFT)
 
 /* This matches the type field in CAPS register */
 enum intel_lpss_dev_type {
@@ -128,17 +128,6 @@ static const struct mfd_cell intel_lpss_spi_cell = {
 static DEFINE_IDA(intel_lpss_devid_ida);
 static struct dentry *intel_lpss_debugfs;
 
-static int intel_lpss_request_dma_module(const char *name)
-{
-       static bool intel_lpss_dma_requested;
-
-       if (intel_lpss_dma_requested)
-               return 0;
-
-       intel_lpss_dma_requested = true;
-       return request_module("%s", name);
-}
-
 static void intel_lpss_cache_ltr(struct intel_lpss *lpss)
 {
        lpss->active_ltr = readl(lpss->priv + LPSS_PRIV_ACTIVELTR);
@@ -429,16 +418,6 @@ int intel_lpss_probe(struct device *dev,
                dev_warn(dev, "Failed to create debugfs entries\n");
 
        if (intel_lpss_has_idma(lpss)) {
-               /*
-                * Ensure the DMA driver is loaded before the host
-                * controller device appears, so that the host controller
-                * driver can request its DMA channels as early as
-                * possible.
-                *
-                * If the DMA module is not there that's OK as well.
-                */
-               intel_lpss_request_dma_module(LPSS_IDMA64_DRIVER_NAME);
-
                ret = mfd_add_devices(dev, lpss->devid, &intel_lpss_idma64_cell,
                                      1, info->mem, info->irq, NULL);
                if (ret)
@@ -554,3 +533,11 @@ MODULE_AUTHOR("Heikki Krogerus <heikki.krogerus@linux.intel.com>");
 MODULE_AUTHOR("Jarkko Nikula <jarkko.nikula@linux.intel.com>");
 MODULE_DESCRIPTION("Intel LPSS core driver");
 MODULE_LICENSE("GPL v2");
+/*
+ * Ensure the DMA driver is loaded before the host controller device appears,
+ * so that the host controller driver can request its DMA channels as early
+ * as possible.
+ *
+ * If the DMA module is not there that's OK as well.
+ */
+MODULE_SOFTDEP("pre: platform:" LPSS_IDMA64_DRIVER_NAME);
index 6310c3b..739cfb5 100644 (file)
@@ -450,10 +450,8 @@ static int bxtwc_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        ret = platform_get_irq(pdev, 0);
-       if (ret < 0) {
-               dev_err(&pdev->dev, "Invalid IRQ\n");
+       if (ret < 0)
                return ret;
-       }
        pmic->irq = ret;
 
        dev_set_drvdata(&pdev->dev, pmic);
diff --git a/drivers/mfd/intel_soc_pmic_mrfld.c b/drivers/mfd/intel_soc_pmic_mrfld.c
new file mode 100644 (file)
index 0000000..26a1551
--- /dev/null
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device access for Basin Cove PMIC
+ *
+ * Copyright (c) 2019, Intel Corporation.
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ */
+
+#include <linux/acpi.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/intel_soc_pmic.h>
+#include <linux/mfd/intel_soc_pmic_mrfld.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <asm/intel_scu_ipc.h>
+
+/*
+ * Level 2 IRQs
+ *
+ * Firmware on the systems with Basin Cove PMIC services Level 1 IRQs
+ * without an assistance. Thus, each of the Level 1 IRQ is represented
+ * as a separate RTE in IOAPIC.
+ */
+static struct resource irq_level2_resources[] = {
+       DEFINE_RES_IRQ(0), /* power button */
+       DEFINE_RES_IRQ(0), /* TMU */
+       DEFINE_RES_IRQ(0), /* thermal */
+       DEFINE_RES_IRQ(0), /* BCU */
+       DEFINE_RES_IRQ(0), /* ADC */
+       DEFINE_RES_IRQ(0), /* charger */
+       DEFINE_RES_IRQ(0), /* GPIO */
+};
+
+static const struct mfd_cell bcove_dev[] = {
+       {
+               .name = "mrfld_bcove_pwrbtn",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[0],
+       }, {
+               .name = "mrfld_bcove_tmu",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[1],
+       }, {
+               .name = "mrfld_bcove_thermal",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[2],
+       }, {
+               .name = "mrfld_bcove_bcu",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[3],
+       }, {
+               .name = "mrfld_bcove_adc",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[4],
+       }, {
+               .name = "mrfld_bcove_charger",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[5],
+       }, {
+               .name = "mrfld_bcove_pwrsrc",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[5],
+       }, {
+               .name = "mrfld_bcove_gpio",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[6],
+       },
+       {       .name = "mrfld_bcove_region", },
+};
+
+static int bcove_ipc_byte_reg_read(void *context, unsigned int reg,
+                                   unsigned int *val)
+{
+       u8 ipc_out;
+       int ret;
+
+       ret = intel_scu_ipc_ioread8(reg, &ipc_out);
+       if (ret)
+               return ret;
+
+       *val = ipc_out;
+       return 0;
+}
+
+static int bcove_ipc_byte_reg_write(void *context, unsigned int reg,
+                                    unsigned int val)
+{
+       u8 ipc_in = val;
+       int ret;
+
+       ret = intel_scu_ipc_iowrite8(reg, ipc_in);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static const struct regmap_config bcove_regmap_config = {
+       .reg_bits = 16,
+       .val_bits = 8,
+       .max_register = 0xff,
+       .reg_write = bcove_ipc_byte_reg_write,
+       .reg_read = bcove_ipc_byte_reg_read,
+};
+
+static int bcove_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct intel_soc_pmic *pmic;
+       unsigned int i;
+       int ret;
+
+       pmic = devm_kzalloc(dev, sizeof(*pmic), GFP_KERNEL);
+       if (!pmic)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, pmic);
+       pmic->dev = &pdev->dev;
+
+       pmic->regmap = devm_regmap_init(dev, NULL, pmic, &bcove_regmap_config);
+       if (IS_ERR(pmic->regmap))
+               return PTR_ERR(pmic->regmap);
+
+       for (i = 0; i < ARRAY_SIZE(irq_level2_resources); i++) {
+               ret = platform_get_irq(pdev, i);
+               if (ret < 0)
+                       return ret;
+
+               irq_level2_resources[i].start = ret;
+               irq_level2_resources[i].end = ret;
+       }
+
+       return devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE,
+                                   bcove_dev, ARRAY_SIZE(bcove_dev),
+                                   NULL, 0, NULL);
+}
+
+static const struct acpi_device_id bcove_acpi_ids[] = {
+       { "INTC100E" },
+       {}
+};
+MODULE_DEVICE_TABLE(acpi, bcove_acpi_ids);
+
+static struct platform_driver bcove_driver = {
+       .driver = {
+               .name = "intel_soc_pmic_mrfld",
+               .acpi_match_table = bcove_acpi_ids,
+       },
+       .probe = bcove_probe,
+};
+module_platform_driver(bcove_driver);
+
+MODULE_DESCRIPTION("IPC driver for Intel SoC Basin Cove PMIC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c
deleted file mode 100644 (file)
index 082f169..0000000
+++ /dev/null
@@ -1,324 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
- * JZ4740 SoC ADC driver
- *
- * This driver synchronizes access to the JZ4740 ADC core between the
- * JZ4740 battery and hwmon drivers.
- */
-
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-
-#include <linux/clk.h>
-#include <linux/mfd/core.h>
-
-#include <linux/jz4740-adc.h>
-
-
-#define JZ_REG_ADC_ENABLE      0x00
-#define JZ_REG_ADC_CFG         0x04
-#define JZ_REG_ADC_CTRL                0x08
-#define JZ_REG_ADC_STATUS      0x0c
-
-#define JZ_REG_ADC_TOUCHSCREEN_BASE    0x10
-#define JZ_REG_ADC_BATTERY_BASE        0x1c
-#define JZ_REG_ADC_HWMON_BASE  0x20
-
-#define JZ_ADC_ENABLE_TOUCH    BIT(2)
-#define JZ_ADC_ENABLE_BATTERY  BIT(1)
-#define JZ_ADC_ENABLE_ADCIN    BIT(0)
-
-enum {
-       JZ_ADC_IRQ_ADCIN = 0,
-       JZ_ADC_IRQ_BATTERY,
-       JZ_ADC_IRQ_TOUCH,
-       JZ_ADC_IRQ_PENUP,
-       JZ_ADC_IRQ_PENDOWN,
-};
-
-struct jz4740_adc {
-       struct resource *mem;
-       void __iomem *base;
-
-       int irq;
-       struct irq_chip_generic *gc;
-
-       struct clk *clk;
-       atomic_t clk_ref;
-
-       spinlock_t lock;
-};
-
-static void jz4740_adc_irq_demux(struct irq_desc *desc)
-{
-       struct irq_chip_generic *gc = irq_desc_get_handler_data(desc);
-       uint8_t status;
-       unsigned int i;
-
-       status = readb(gc->reg_base + JZ_REG_ADC_STATUS);
-
-       for (i = 0; i < 5; ++i) {
-               if (status & BIT(i))
-                       generic_handle_irq(gc->irq_base + i);
-       }
-}
-
-
-/* Refcounting for the ADC clock is done in here instead of in the clock
- * framework, because it is the only clock which is shared between multiple
- * devices and thus is the only clock which needs refcounting */
-static inline void jz4740_adc_clk_enable(struct jz4740_adc *adc)
-{
-       if (atomic_inc_return(&adc->clk_ref) == 1)
-               clk_prepare_enable(adc->clk);
-}
-
-static inline void jz4740_adc_clk_disable(struct jz4740_adc *adc)
-{
-       if (atomic_dec_return(&adc->clk_ref) == 0)
-               clk_disable_unprepare(adc->clk);
-}
-
-static inline void jz4740_adc_set_enabled(struct jz4740_adc *adc, int engine,
-       bool enabled)
-{
-       unsigned long flags;
-       uint8_t val;
-
-       spin_lock_irqsave(&adc->lock, flags);
-
-       val = readb(adc->base + JZ_REG_ADC_ENABLE);
-       if (enabled)
-               val |= BIT(engine);
-       else
-               val &= ~BIT(engine);
-       writeb(val, adc->base + JZ_REG_ADC_ENABLE);
-
-       spin_unlock_irqrestore(&adc->lock, flags);
-}
-
-static int jz4740_adc_cell_enable(struct platform_device *pdev)
-{
-       struct jz4740_adc *adc = dev_get_drvdata(pdev->dev.parent);
-
-       jz4740_adc_clk_enable(adc);
-       jz4740_adc_set_enabled(adc, pdev->id, true);
-
-       return 0;
-}
-
-static int jz4740_adc_cell_disable(struct platform_device *pdev)
-{
-       struct jz4740_adc *adc = dev_get_drvdata(pdev->dev.parent);
-
-       jz4740_adc_set_enabled(adc, pdev->id, false);
-       jz4740_adc_clk_disable(adc);
-
-       return 0;
-}
-
-int jz4740_adc_set_config(struct device *dev, uint32_t mask, uint32_t val)
-{
-       struct jz4740_adc *adc = dev_get_drvdata(dev);
-       unsigned long flags;
-       uint32_t cfg;
-
-       if (!adc)
-               return -ENODEV;
-
-       spin_lock_irqsave(&adc->lock, flags);
-
-       cfg = readl(adc->base + JZ_REG_ADC_CFG);
-
-       cfg &= ~mask;
-       cfg |= val;
-
-       writel(cfg, adc->base + JZ_REG_ADC_CFG);
-
-       spin_unlock_irqrestore(&adc->lock, flags);
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(jz4740_adc_set_config);
-
-static struct resource jz4740_hwmon_resources[] = {
-       {
-               .start = JZ_ADC_IRQ_ADCIN,
-               .flags = IORESOURCE_IRQ,
-       },
-       {
-               .start  = JZ_REG_ADC_HWMON_BASE,
-               .end    = JZ_REG_ADC_HWMON_BASE + 3,
-               .flags  = IORESOURCE_MEM,
-       },
-};
-
-static struct resource jz4740_battery_resources[] = {
-       {
-               .start = JZ_ADC_IRQ_BATTERY,
-               .flags = IORESOURCE_IRQ,
-       },
-       {
-               .start  = JZ_REG_ADC_BATTERY_BASE,
-               .end    = JZ_REG_ADC_BATTERY_BASE + 3,
-               .flags  = IORESOURCE_MEM,
-       },
-};
-
-static const struct mfd_cell jz4740_adc_cells[] = {
-       {
-               .id = 0,
-               .name = "jz4740-hwmon",
-               .num_resources = ARRAY_SIZE(jz4740_hwmon_resources),
-               .resources = jz4740_hwmon_resources,
-
-               .enable = jz4740_adc_cell_enable,
-               .disable = jz4740_adc_cell_disable,
-       },
-       {
-               .id = 1,
-               .name = "jz4740-battery",
-               .num_resources = ARRAY_SIZE(jz4740_battery_resources),
-               .resources = jz4740_battery_resources,
-
-               .enable = jz4740_adc_cell_enable,
-               .disable = jz4740_adc_cell_disable,
-       },
-};
-
-static int jz4740_adc_probe(struct platform_device *pdev)
-{
-       struct irq_chip_generic *gc;
-       struct irq_chip_type *ct;
-       struct jz4740_adc *adc;
-       struct resource *mem_base;
-       int ret;
-       int irq_base;
-
-       adc = devm_kzalloc(&pdev->dev, sizeof(*adc), GFP_KERNEL);
-       if (!adc)
-               return -ENOMEM;
-
-       adc->irq = platform_get_irq(pdev, 0);
-       if (adc->irq < 0) {
-               ret = adc->irq;
-               dev_err(&pdev->dev, "Failed to get platform irq: %d\n", ret);
-               return ret;
-       }
-
-       irq_base = platform_get_irq(pdev, 1);
-       if (irq_base < 0) {
-               dev_err(&pdev->dev, "Failed to get irq base: %d\n", irq_base);
-               return irq_base;
-       }
-
-       mem_base = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!mem_base) {
-               dev_err(&pdev->dev, "Failed to get platform mmio resource\n");
-               return -ENOENT;
-       }
-
-       /* Only request the shared registers for the MFD driver */
-       adc->mem = request_mem_region(mem_base->start, JZ_REG_ADC_STATUS,
-                                       pdev->name);
-       if (!adc->mem) {
-               dev_err(&pdev->dev, "Failed to request mmio memory region\n");
-               return -EBUSY;
-       }
-
-       adc->base = ioremap_nocache(adc->mem->start, resource_size(adc->mem));
-       if (!adc->base) {
-               ret = -EBUSY;
-               dev_err(&pdev->dev, "Failed to ioremap mmio memory\n");
-               goto err_release_mem_region;
-       }
-
-       adc->clk = clk_get(&pdev->dev, "adc");
-       if (IS_ERR(adc->clk)) {
-               ret = PTR_ERR(adc->clk);
-               dev_err(&pdev->dev, "Failed to get clock: %d\n", ret);
-               goto err_iounmap;
-       }
-
-       spin_lock_init(&adc->lock);
-       atomic_set(&adc->clk_ref, 0);
-
-       platform_set_drvdata(pdev, adc);
-
-       gc = irq_alloc_generic_chip("INTC", 1, irq_base, adc->base,
-               handle_level_irq);
-
-       ct = gc->chip_types;
-       ct->regs.mask = JZ_REG_ADC_CTRL;
-       ct->regs.ack = JZ_REG_ADC_STATUS;
-       ct->chip.irq_mask = irq_gc_mask_set_bit;
-       ct->chip.irq_unmask = irq_gc_mask_clr_bit;
-       ct->chip.irq_ack = irq_gc_ack_set_bit;
-
-       irq_setup_generic_chip(gc, IRQ_MSK(5), IRQ_GC_INIT_MASK_CACHE, 0,
-                               IRQ_NOPROBE | IRQ_LEVEL);
-
-       adc->gc = gc;
-
-       irq_set_chained_handler_and_data(adc->irq, jz4740_adc_irq_demux, gc);
-
-       writeb(0x00, adc->base + JZ_REG_ADC_ENABLE);
-       writeb(0xff, adc->base + JZ_REG_ADC_CTRL);
-
-       ret = mfd_add_devices(&pdev->dev, 0, jz4740_adc_cells,
-                             ARRAY_SIZE(jz4740_adc_cells), mem_base,
-                             irq_base, NULL);
-       if (ret < 0)
-               goto err_clk_put;
-
-       return 0;
-
-err_clk_put:
-       clk_put(adc->clk);
-err_iounmap:
-       iounmap(adc->base);
-err_release_mem_region:
-       release_mem_region(adc->mem->start, resource_size(adc->mem));
-       return ret;
-}
-
-static int jz4740_adc_remove(struct platform_device *pdev)
-{
-       struct jz4740_adc *adc = platform_get_drvdata(pdev);
-
-       mfd_remove_devices(&pdev->dev);
-
-       irq_remove_generic_chip(adc->gc, IRQ_MSK(5), IRQ_NOPROBE | IRQ_LEVEL, 0);
-       kfree(adc->gc);
-       irq_set_chained_handler_and_data(adc->irq, NULL, NULL);
-
-       iounmap(adc->base);
-       release_mem_region(adc->mem->start, resource_size(adc->mem));
-
-       clk_put(adc->clk);
-
-       return 0;
-}
-
-static struct platform_driver jz4740_adc_driver = {
-       .probe  = jz4740_adc_probe,
-       .remove = jz4740_adc_remove,
-       .driver = {
-               .name = "jz4740-adc",
-       },
-};
-
-module_platform_driver(jz4740_adc_driver);
-
-MODULE_DESCRIPTION("JZ4740 SoC ADC driver");
-MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:jz4740-adc");
index ebb13d5..fd8864c 100644 (file)
@@ -297,11 +297,11 @@ static int max77836_init(struct max14577 *max14577)
        int ret;
        u8 intsrc_mask;
 
-       max14577->i2c_pmic = i2c_new_dummy(max14577->i2c->adapter,
+       max14577->i2c_pmic = i2c_new_dummy_device(max14577->i2c->adapter,
                        I2C_ADDR_PMIC);
-       if (!max14577->i2c_pmic) {
+       if (IS_ERR(max14577->i2c_pmic)) {
                dev_err(max14577->dev, "Failed to register PMIC I2C device\n");
-               return -ENODEV;
+               return PTR_ERR(max14577->i2c_pmic);
        }
        i2c_set_clientdata(max14577->i2c_pmic, max14577);
 
index 0c28965..a851ff4 100644 (file)
@@ -416,8 +416,10 @@ static int max77620_initialise_fps(struct max77620_chip *chip)
 
        for_each_child_of_node(fps_np, fps_child) {
                ret = max77620_config_fps(chip, fps_child);
-               if (ret < 0)
+               if (ret < 0) {
+                       of_node_put(fps_child);
                        return ret;
+               }
        }
 
        config = chip->enable_global_lpm ? MAX77620_ONOFFCNFG2_SLP_LPM_MSK : 0;
index 901d99d..596ed85 100644 (file)
@@ -183,17 +183,17 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
        } else
                dev_info(max77693->dev, "device ID: 0x%x\n", reg_data);
 
-       max77693->i2c_muic = i2c_new_dummy(i2c->adapter, I2C_ADDR_MUIC);
-       if (!max77693->i2c_muic) {
+       max77693->i2c_muic = i2c_new_dummy_device(i2c->adapter, I2C_ADDR_MUIC);
+       if (IS_ERR(max77693->i2c_muic)) {
                dev_err(max77693->dev, "Failed to allocate I2C device for MUIC\n");
-               return -ENODEV;
+               return PTR_ERR(max77693->i2c_muic);
        }
        i2c_set_clientdata(max77693->i2c_muic, max77693);
 
-       max77693->i2c_haptic = i2c_new_dummy(i2c->adapter, I2C_ADDR_HAPTIC);
-       if (!max77693->i2c_haptic) {
+       max77693->i2c_haptic = i2c_new_dummy_device(i2c->adapter, I2C_ADDR_HAPTIC);
+       if (IS_ERR(max77693->i2c_haptic)) {
                dev_err(max77693->dev, "Failed to allocate I2C device for Haptic\n");
-               ret = -ENODEV;
+               ret = PTR_ERR(max77693->i2c_haptic);
                goto err_i2c_haptic;
        }
        i2c_set_clientdata(max77693->i2c_haptic, max77693);
index 25cbb22..209ee24 100644 (file)
@@ -70,11 +70,11 @@ static int max77843_chg_init(struct max77693_dev *max77843)
 {
        int ret;
 
-       max77843->i2c_chg = i2c_new_dummy(max77843->i2c->adapter, I2C_ADDR_CHG);
-       if (!max77843->i2c_chg) {
+       max77843->i2c_chg = i2c_new_dummy_device(max77843->i2c->adapter, I2C_ADDR_CHG);
+       if (IS_ERR(max77843->i2c_chg)) {
                dev_err(&max77843->i2c->dev,
                                "Cannot allocate I2C device for Charger\n");
-               return -ENODEV;
+               return PTR_ERR(max77843->i2c_chg);
        }
        i2c_set_clientdata(max77843->i2c_chg, max77843);
 
index cc01f70..d44baaf 100644 (file)
@@ -214,9 +214,9 @@ static int max8907_i2c_probe(struct i2c_client *i2c,
                goto err_regmap_gen;
        }
 
-       max8907->i2c_rtc = i2c_new_dummy(i2c->adapter, MAX8907_RTC_I2C_ADDR);
-       if (!max8907->i2c_rtc) {
-               ret = -ENOMEM;
+       max8907->i2c_rtc = i2c_new_dummy_device(i2c->adapter, MAX8907_RTC_I2C_ADDR);
+       if (IS_ERR(max8907->i2c_rtc)) {
+               ret = PTR_ERR(max8907->i2c_rtc);
                goto err_dummy_rtc;
        }
        i2c_set_clientdata(max8907->i2c_rtc, max8907);
index 20bb19b..114e905 100644 (file)
@@ -176,18 +176,18 @@ static int max8925_probe(struct i2c_client *client,
        dev_set_drvdata(chip->dev, chip);
        mutex_init(&chip->io_lock);
 
-       chip->rtc = i2c_new_dummy(chip->i2c->adapter, RTC_I2C_ADDR);
-       if (!chip->rtc) {
+       chip->rtc = i2c_new_dummy_device(chip->i2c->adapter, RTC_I2C_ADDR);
+       if (IS_ERR(chip->rtc)) {
                dev_err(chip->dev, "Failed to allocate I2C device for RTC\n");
-               return -ENODEV;
+               return PTR_ERR(chip->rtc);
        }
        i2c_set_clientdata(chip->rtc, chip);
 
-       chip->adc = i2c_new_dummy(chip->i2c->adapter, ADC_I2C_ADDR);
-       if (!chip->adc) {
+       chip->adc = i2c_new_dummy_device(chip->i2c->adapter, ADC_I2C_ADDR);
+       if (IS_ERR(chip->adc)) {
                dev_err(chip->dev, "Failed to allocate I2C device for ADC\n");
                i2c_unregister_device(chip->rtc);
-               return -ENODEV;
+               return PTR_ERR(chip->adc);
        }
        i2c_set_clientdata(chip->adc, chip);
 
index 8c06c09..68d8f2b 100644 (file)
@@ -185,25 +185,25 @@ static int max8997_i2c_probe(struct i2c_client *i2c,
 
        mutex_init(&max8997->iolock);
 
-       max8997->rtc = i2c_new_dummy(i2c->adapter, I2C_ADDR_RTC);
-       if (!max8997->rtc) {
+       max8997->rtc = i2c_new_dummy_device(i2c->adapter, I2C_ADDR_RTC);
+       if (IS_ERR(max8997->rtc)) {
                dev_err(max8997->dev, "Failed to allocate I2C device for RTC\n");
-               return -ENODEV;
+               return PTR_ERR(max8997->rtc);
        }
        i2c_set_clientdata(max8997->rtc, max8997);
 
-       max8997->haptic = i2c_new_dummy(i2c->adapter, I2C_ADDR_HAPTIC);
-       if (!max8997->haptic) {
+       max8997->haptic = i2c_new_dummy_device(i2c->adapter, I2C_ADDR_HAPTIC);
+       if (IS_ERR(max8997->haptic)) {
                dev_err(max8997->dev, "Failed to allocate I2C device for Haptic\n");
-               ret = -ENODEV;
+               ret = PTR_ERR(max8997->haptic);
                goto err_i2c_haptic;
        }
        i2c_set_clientdata(max8997->haptic, max8997);
 
-       max8997->muic = i2c_new_dummy(i2c->adapter, I2C_ADDR_MUIC);
-       if (!max8997->muic) {
+       max8997->muic = i2c_new_dummy_device(i2c->adapter, I2C_ADDR_MUIC);
+       if (IS_ERR(max8997->muic)) {
                dev_err(max8997->dev, "Failed to allocate I2C device for MUIC\n");
-               ret = -ENODEV;
+               ret = PTR_ERR(max8997->muic);
                goto err_i2c_muic;
        }
        i2c_set_clientdata(max8997->muic, max8997);
index 56409df..785f8e9 100644 (file)
@@ -195,10 +195,10 @@ static int max8998_i2c_probe(struct i2c_client *i2c,
        }
        mutex_init(&max8998->iolock);
 
-       max8998->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR);
-       if (!max8998->rtc) {
+       max8998->rtc = i2c_new_dummy_device(i2c->adapter, RTC_I2C_ADDR);
+       if (IS_ERR(max8998->rtc)) {
                dev_err(&i2c->dev, "Failed to allocate I2C device for RTC\n");
-               return -ENODEV;
+               return PTR_ERR(max8998->rtc);
        }
        i2c_set_clientdata(max8998->rtc, max8998);
 
index 337bccc..b2c325e 100644 (file)
@@ -5,34 +5,34 @@
  */
 
 #include <linux/interrupt.h>
+#include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/regmap.h>
 #include <linux/mfd/core.h>
-#include <linux/mfd/mt6397/core.h>
 #include <linux/mfd/mt6323/core.h>
-#include <linux/mfd/mt6397/registers.h>
+#include <linux/mfd/mt6397/core.h>
 #include <linux/mfd/mt6323/registers.h>
+#include <linux/mfd/mt6397/registers.h>
+
+#define MT6323_RTC_BASE                0x8000
+#define MT6323_RTC_SIZE                0x40
 
 #define MT6397_RTC_BASE                0xe000
 #define MT6397_RTC_SIZE                0x3e
 
-#define MT6323_CID_CODE                0x23
-#define MT6391_CID_CODE                0x91
-#define MT6397_CID_CODE                0x97
+#define MT6323_PWRC_BASE       0x8000
+#define MT6323_PWRC_SIZE       0x40
+
+static const struct resource mt6323_rtc_resources[] = {
+       DEFINE_RES_MEM(MT6323_RTC_BASE, MT6323_RTC_SIZE),
+       DEFINE_RES_IRQ(MT6323_IRQ_STATUS_RTC),
+};
 
 static const struct resource mt6397_rtc_resources[] = {
-       {
-               .start = MT6397_RTC_BASE,
-               .end   = MT6397_RTC_BASE + MT6397_RTC_SIZE,
-               .flags = IORESOURCE_MEM,
-       },
-       {
-               .start = MT6397_IRQ_RTC,
-               .end   = MT6397_IRQ_RTC,
-               .flags = IORESOURCE_IRQ,
-       },
+       DEFINE_RES_MEM(MT6397_RTC_BASE, MT6397_RTC_SIZE),
+       DEFINE_RES_IRQ(MT6397_IRQ_RTC),
 };
 
 static const struct resource mt6323_keys_resources[] = {
@@ -45,8 +45,17 @@ static const struct resource mt6397_keys_resources[] = {
        DEFINE_RES_IRQ(MT6397_IRQ_HOMEKEY),
 };
 
+static const struct resource mt6323_pwrc_resources[] = {
+       DEFINE_RES_MEM(MT6323_PWRC_BASE, MT6323_PWRC_SIZE),
+};
+
 static const struct mfd_cell mt6323_devs[] = {
        {
+               .name = "mt6323-rtc",
+               .num_resources = ARRAY_SIZE(mt6323_rtc_resources),
+               .resources = mt6323_rtc_resources,
+               .of_compatible = "mediatek,mt6323-rtc",
+       }, {
                .name = "mt6323-regulator",
                .of_compatible = "mediatek,mt6323-regulator"
        }, {
@@ -57,6 +66,11 @@ static const struct mfd_cell mt6323_devs[] = {
                .num_resources = ARRAY_SIZE(mt6323_keys_resources),
                .resources = mt6323_keys_resources,
                .of_compatible = "mediatek,mt6323-keys"
+       }, {
+               .name = "mt6323-pwrc",
+               .num_resources = ARRAY_SIZE(mt6323_pwrc_resources),
+               .resources = mt6323_pwrc_resources,
+               .of_compatible = "mediatek,mt6323-pwrc"
        },
 };
 
@@ -86,148 +100,6 @@ static const struct mfd_cell mt6397_devs[] = {
        }
 };
 
-static void mt6397_irq_lock(struct irq_data *data)
-{
-       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
-
-       mutex_lock(&mt6397->irqlock);
-}
-
-static void mt6397_irq_sync_unlock(struct irq_data *data)
-{
-       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
-
-       regmap_write(mt6397->regmap, mt6397->int_con[0],
-                    mt6397->irq_masks_cur[0]);
-       regmap_write(mt6397->regmap, mt6397->int_con[1],
-                    mt6397->irq_masks_cur[1]);
-
-       mutex_unlock(&mt6397->irqlock);
-}
-
-static void mt6397_irq_disable(struct irq_data *data)
-{
-       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
-       int shift = data->hwirq & 0xf;
-       int reg = data->hwirq >> 4;
-
-       mt6397->irq_masks_cur[reg] &= ~BIT(shift);
-}
-
-static void mt6397_irq_enable(struct irq_data *data)
-{
-       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
-       int shift = data->hwirq & 0xf;
-       int reg = data->hwirq >> 4;
-
-       mt6397->irq_masks_cur[reg] |= BIT(shift);
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int mt6397_irq_set_wake(struct irq_data *irq_data, unsigned int on)
-{
-       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(irq_data);
-       int shift = irq_data->hwirq & 0xf;
-       int reg = irq_data->hwirq >> 4;
-
-       if (on)
-               mt6397->wake_mask[reg] |= BIT(shift);
-       else
-               mt6397->wake_mask[reg] &= ~BIT(shift);
-
-       return 0;
-}
-#else
-#define mt6397_irq_set_wake NULL
-#endif
-
-static struct irq_chip mt6397_irq_chip = {
-       .name = "mt6397-irq",
-       .irq_bus_lock = mt6397_irq_lock,
-       .irq_bus_sync_unlock = mt6397_irq_sync_unlock,
-       .irq_enable = mt6397_irq_enable,
-       .irq_disable = mt6397_irq_disable,
-       .irq_set_wake = mt6397_irq_set_wake,
-};
-
-static void mt6397_irq_handle_reg(struct mt6397_chip *mt6397, int reg,
-               int irqbase)
-{
-       unsigned int status;
-       int i, irq, ret;
-
-       ret = regmap_read(mt6397->regmap, reg, &status);
-       if (ret) {
-               dev_err(mt6397->dev, "Failed to read irq status: %d\n", ret);
-               return;
-       }
-
-       for (i = 0; i < 16; i++) {
-               if (status & BIT(i)) {
-                       irq = irq_find_mapping(mt6397->irq_domain, irqbase + i);
-                       if (irq)
-                               handle_nested_irq(irq);
-               }
-       }
-
-       regmap_write(mt6397->regmap, reg, status);
-}
-
-static irqreturn_t mt6397_irq_thread(int irq, void *data)
-{
-       struct mt6397_chip *mt6397 = data;
-
-       mt6397_irq_handle_reg(mt6397, mt6397->int_status[0], 0);
-       mt6397_irq_handle_reg(mt6397, mt6397->int_status[1], 16);
-
-       return IRQ_HANDLED;
-}
-
-static int mt6397_irq_domain_map(struct irq_domain *d, unsigned int irq,
-                                       irq_hw_number_t hw)
-{
-       struct mt6397_chip *mt6397 = d->host_data;
-
-       irq_set_chip_data(irq, mt6397);
-       irq_set_chip_and_handler(irq, &mt6397_irq_chip, handle_level_irq);
-       irq_set_nested_thread(irq, 1);
-       irq_set_noprobe(irq);
-
-       return 0;
-}
-
-static const struct irq_domain_ops mt6397_irq_domain_ops = {
-       .map = mt6397_irq_domain_map,
-};
-
-static int mt6397_irq_init(struct mt6397_chip *mt6397)
-{
-       int ret;
-
-       mutex_init(&mt6397->irqlock);
-
-       /* Mask all interrupt sources */
-       regmap_write(mt6397->regmap, mt6397->int_con[0], 0x0);
-       regmap_write(mt6397->regmap, mt6397->int_con[1], 0x0);
-
-       mt6397->irq_domain = irq_domain_add_linear(mt6397->dev->of_node,
-               MT6397_IRQ_NR, &mt6397_irq_domain_ops, mt6397);
-       if (!mt6397->irq_domain) {
-               dev_err(mt6397->dev, "could not create irq domain\n");
-               return -ENOMEM;
-       }
-
-       ret = devm_request_threaded_irq(mt6397->dev, mt6397->irq, NULL,
-               mt6397_irq_thread, IRQF_ONESHOT, "mt6397-pmic", mt6397);
-       if (ret) {
-               dev_err(mt6397->dev, "failed to register irq=%d; err: %d\n",
-                       mt6397->irq, ret);
-               return ret;
-       }
-
-       return 0;
-}
-
 #ifdef CONFIG_PM_SLEEP
 static int mt6397_irq_suspend(struct device *dev)
 {
@@ -257,11 +129,27 @@ static int mt6397_irq_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(mt6397_pm_ops, mt6397_irq_suspend,
                        mt6397_irq_resume);
 
+struct chip_data {
+       u32 cid_addr;
+       u32 cid_shift;
+};
+
+static const struct chip_data mt6323_core = {
+       .cid_addr = MT6323_CID,
+       .cid_shift = 0,
+};
+
+static const struct chip_data mt6397_core = {
+       .cid_addr = MT6397_CID,
+       .cid_shift = 0,
+};
+
 static int mt6397_probe(struct platform_device *pdev)
 {
        int ret;
        unsigned int id;
        struct mt6397_chip *pmic;
+       const struct chip_data *pmic_core;
 
        pmic = devm_kzalloc(&pdev->dev, sizeof(*pmic), GFP_KERNEL);
        if (!pmic)
@@ -277,50 +165,44 @@ static int mt6397_probe(struct platform_device *pdev)
        if (!pmic->regmap)
                return -ENODEV;
 
-       platform_set_drvdata(pdev, pmic);
+       pmic_core = of_device_get_match_data(&pdev->dev);
+       if (!pmic_core)
+               return -ENODEV;
 
-       ret = regmap_read(pmic->regmap, MT6397_CID, &id);
+       ret = regmap_read(pmic->regmap, pmic_core->cid_addr, &id);
        if (ret) {
-               dev_err(pmic->dev, "Failed to read chip id: %d\n", ret);
+               dev_err(&pdev->dev, "Failed to read chip id: %d\n", ret);
                return ret;
        }
 
+       pmic->chip_id = (id >> pmic_core->cid_shift) & 0xff;
+
+       platform_set_drvdata(pdev, pmic);
+
        pmic->irq = platform_get_irq(pdev, 0);
        if (pmic->irq <= 0)
                return pmic->irq;
 
-       switch (id & 0xff) {
-       case MT6323_CID_CODE:
-               pmic->int_con[0] = MT6323_INT_CON0;
-               pmic->int_con[1] = MT6323_INT_CON1;
-               pmic->int_status[0] = MT6323_INT_STATUS0;
-               pmic->int_status[1] = MT6323_INT_STATUS1;
-               ret = mt6397_irq_init(pmic);
-               if (ret)
-                       return ret;
+       ret = mt6397_irq_init(pmic);
+       if (ret)
+               return ret;
 
+       switch (pmic->chip_id) {
+       case MT6323_CHIP_ID:
                ret = devm_mfd_add_devices(&pdev->dev, -1, mt6323_devs,
                                           ARRAY_SIZE(mt6323_devs), NULL,
                                           0, pmic->irq_domain);
                break;
 
-       case MT6397_CID_CODE:
-       case MT6391_CID_CODE:
-               pmic->int_con[0] = MT6397_INT_CON0;
-               pmic->int_con[1] = MT6397_INT_CON1;
-               pmic->int_status[0] = MT6397_INT_STATUS0;
-               pmic->int_status[1] = MT6397_INT_STATUS1;
-               ret = mt6397_irq_init(pmic);
-               if (ret)
-                       return ret;
-
+       case MT6391_CHIP_ID:
+       case MT6397_CHIP_ID:
                ret = devm_mfd_add_devices(&pdev->dev, -1, mt6397_devs,
                                           ARRAY_SIZE(mt6397_devs), NULL,
                                           0, pmic->irq_domain);
                break;
 
        default:
-               dev_err(&pdev->dev, "unsupported chip: %d\n", id);
+               dev_err(&pdev->dev, "unsupported chip: %d\n", pmic->chip_id);
                return -ENODEV;
        }
 
@@ -333,9 +215,15 @@ static int mt6397_probe(struct platform_device *pdev)
 }
 
 static const struct of_device_id mt6397_of_match[] = {
-       { .compatible = "mediatek,mt6397" },
-       { .compatible = "mediatek,mt6323" },
-       { }
+       {
+               .compatible = "mediatek,mt6323",
+               .data = &mt6323_core,
+       }, {
+               .compatible = "mediatek,mt6397",
+               .data = &mt6397_core,
+       }, {
+               /* sentinel */
+       }
 };
 MODULE_DEVICE_TABLE(of, mt6397_of_match);
 
diff --git a/drivers/mfd/mt6397-irq.c b/drivers/mfd/mt6397-irq.c
new file mode 100644 (file)
index 0000000..b2d3ce1
--- /dev/null
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (c) 2019 MediaTek Inc.
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/mfd/mt6323/core.h>
+#include <linux/mfd/mt6323/registers.h>
+#include <linux/mfd/mt6397/core.h>
+#include <linux/mfd/mt6397/registers.h>
+
+static void mt6397_irq_lock(struct irq_data *data)
+{
+       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
+
+       mutex_lock(&mt6397->irqlock);
+}
+
+static void mt6397_irq_sync_unlock(struct irq_data *data)
+{
+       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
+
+       regmap_write(mt6397->regmap, mt6397->int_con[0],
+                    mt6397->irq_masks_cur[0]);
+       regmap_write(mt6397->regmap, mt6397->int_con[1],
+                    mt6397->irq_masks_cur[1]);
+
+       mutex_unlock(&mt6397->irqlock);
+}
+
+static void mt6397_irq_disable(struct irq_data *data)
+{
+       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
+       int shift = data->hwirq & 0xf;
+       int reg = data->hwirq >> 4;
+
+       mt6397->irq_masks_cur[reg] &= ~BIT(shift);
+}
+
+static void mt6397_irq_enable(struct irq_data *data)
+{
+       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
+       int shift = data->hwirq & 0xf;
+       int reg = data->hwirq >> 4;
+
+       mt6397->irq_masks_cur[reg] |= BIT(shift);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int mt6397_irq_set_wake(struct irq_data *irq_data, unsigned int on)
+{
+       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(irq_data);
+       int shift = irq_data->hwirq & 0xf;
+       int reg = irq_data->hwirq >> 4;
+
+       if (on)
+               mt6397->wake_mask[reg] |= BIT(shift);
+       else
+               mt6397->wake_mask[reg] &= ~BIT(shift);
+
+       return 0;
+}
+#else
+#define mt6397_irq_set_wake NULL
+#endif
+
+static struct irq_chip mt6397_irq_chip = {
+       .name = "mt6397-irq",
+       .irq_bus_lock = mt6397_irq_lock,
+       .irq_bus_sync_unlock = mt6397_irq_sync_unlock,
+       .irq_enable = mt6397_irq_enable,
+       .irq_disable = mt6397_irq_disable,
+       .irq_set_wake = mt6397_irq_set_wake,
+};
+
+static void mt6397_irq_handle_reg(struct mt6397_chip *mt6397, int reg,
+                                 int irqbase)
+{
+       unsigned int status;
+       int i, irq, ret;
+
+       ret = regmap_read(mt6397->regmap, reg, &status);
+       if (ret) {
+               dev_err(mt6397->dev, "Failed to read irq status: %d\n", ret);
+               return;
+       }
+
+       for (i = 0; i < 16; i++) {
+               if (status & BIT(i)) {
+                       irq = irq_find_mapping(mt6397->irq_domain, irqbase + i);
+                       if (irq)
+                               handle_nested_irq(irq);
+               }
+       }
+
+       regmap_write(mt6397->regmap, reg, status);
+}
+
+static irqreturn_t mt6397_irq_thread(int irq, void *data)
+{
+       struct mt6397_chip *mt6397 = data;
+
+       mt6397_irq_handle_reg(mt6397, mt6397->int_status[0], 0);
+       mt6397_irq_handle_reg(mt6397, mt6397->int_status[1], 16);
+
+       return IRQ_HANDLED;
+}
+
+static int mt6397_irq_domain_map(struct irq_domain *d, unsigned int irq,
+                                irq_hw_number_t hw)
+{
+       struct mt6397_chip *mt6397 = d->host_data;
+
+       irq_set_chip_data(irq, mt6397);
+       irq_set_chip_and_handler(irq, &mt6397_irq_chip, handle_level_irq);
+       irq_set_nested_thread(irq, 1);
+       irq_set_noprobe(irq);
+
+       return 0;
+}
+
+static const struct irq_domain_ops mt6397_irq_domain_ops = {
+       .map = mt6397_irq_domain_map,
+};
+
+int mt6397_irq_init(struct mt6397_chip *chip)
+{
+       int ret;
+
+       mutex_init(&chip->irqlock);
+
+       switch (chip->chip_id) {
+       case MT6323_CHIP_ID:
+               chip->int_con[0] = MT6323_INT_CON0;
+               chip->int_con[1] = MT6323_INT_CON1;
+               chip->int_status[0] = MT6323_INT_STATUS0;
+               chip->int_status[1] = MT6323_INT_STATUS1;
+               break;
+
+       case MT6391_CHIP_ID:
+       case MT6397_CHIP_ID:
+               chip->int_con[0] = MT6397_INT_CON0;
+               chip->int_con[1] = MT6397_INT_CON1;
+               chip->int_status[0] = MT6397_INT_STATUS0;
+               chip->int_status[1] = MT6397_INT_STATUS1;
+               break;
+
+       default:
+               dev_err(chip->dev, "unsupported chip: 0x%x\n", chip->chip_id);
+               return -ENODEV;
+       }
+
+       /* Mask all interrupt sources */
+       regmap_write(chip->regmap, chip->int_con[0], 0x0);
+       regmap_write(chip->regmap, chip->int_con[1], 0x0);
+
+       chip->irq_domain = irq_domain_add_linear(chip->dev->of_node,
+                                                MT6397_IRQ_NR,
+                                                &mt6397_irq_domain_ops,
+                                                chip);
+       if (!chip->irq_domain) {
+               dev_err(chip->dev, "could not create irq domain\n");
+               return -ENOMEM;
+       }
+
+       ret = devm_request_threaded_irq(chip->dev, chip->irq, NULL,
+                                       mt6397_irq_thread, IRQF_ONESHOT,
+                                       "mt6397-pmic", chip);
+       if (ret) {
+               dev_err(chip->dev, "failed to register irq=%d; err: %d\n",
+                       chip->irq, ret);
+               return ret;
+       }
+
+       return 0;
+}
index 6818ff3..f5b3fa9 100644 (file)
@@ -549,12 +549,12 @@ static int palmas_i2c_probe(struct i2c_client *i2c,
                        palmas->i2c_clients[i] = i2c;
                else {
                        palmas->i2c_clients[i] =
-                                       i2c_new_dummy(i2c->adapter,
+                                       i2c_new_dummy_device(i2c->adapter,
                                                        i2c->addr + i);
-                       if (!palmas->i2c_clients[i]) {
+                       if (IS_ERR(palmas->i2c_clients[i])) {
                                dev_err(palmas->dev,
                                        "can't attach client %d\n", i);
-                               ret = -ENOMEM;
+                               ret = PTR_ERR(palmas->i2c_clients[i]);
                                goto err_i2c;
                        }
                        palmas->i2c_clients[i]->dev.of_node = of_node_get(node);
index 4d7e900..71bc34b 100644 (file)
@@ -561,22 +561,16 @@ static int qcom_rpm_probe(struct platform_device *pdev)
        clk_prepare_enable(rpm->ramclk); /* Accepts NULL */
 
        irq_ack = platform_get_irq_byname(pdev, "ack");
-       if (irq_ack < 0) {
-               dev_err(&pdev->dev, "required ack interrupt missing\n");
+       if (irq_ack < 0)
                return irq_ack;
-       }
 
        irq_err = platform_get_irq_byname(pdev, "err");
-       if (irq_err < 0) {
-               dev_err(&pdev->dev, "required err interrupt missing\n");
+       if (irq_err < 0)
                return irq_err;
-       }
 
        irq_wakeup = platform_get_irq_byname(pdev, "wakeup");
-       if (irq_wakeup < 0) {
-               dev_err(&pdev->dev, "required wakeup interrupt missing\n");
+       if (irq_wakeup < 0)
                return irq_wakeup;
-       }
 
        match = of_match_device(qcom_rpm_of_match, &pdev->dev);
        if (!match)
index 9b9b06d..154270f 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/platform_device.h>
 #include <linux/pci.h>
 #include <linux/platform_data/i2c-gpio.h>
+#include <linux/gpio/driver.h>
 #include <linux/gpio/machine.h>
 #include <linux/slab.h>
 
@@ -1394,10 +1395,8 @@ static int sm501_plat_probe(struct platform_device *dev)
        sm->platdata = dev_get_platdata(&dev->dev);
 
        ret = platform_get_irq(dev, 0);
-       if (ret < 0) {
-               dev_err(&dev->dev, "failed to get irq resource\n");
+       if (ret < 0)
                goto err_res;
-       }
        sm->irq = ret;
 
        sm->io_res = platform_get_resource(dev, IORESOURCE_MEM, 1);
index 60c122e..faecbca 100644 (file)
@@ -626,8 +626,7 @@ static const struct mfd_cell timberdale_cells_bar2[] = {
 static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
        char *buf)
 {
-       struct pci_dev *pdev = to_pci_dev(dev);
-       struct timberdale_device *priv = pci_get_drvdata(pdev);
+       struct timberdale_device *priv = dev_get_drvdata(dev);
 
        return sprintf(buf, "%d.%d.%d\n", priv->fw.major, priv->fw.minor,
                priv->fw.config);
index 865257a..907452b 100644 (file)
@@ -437,12 +437,11 @@ static int tps80031_probe(struct i2c_client *client,
                if (tps80031_slave_address[i] == client->addr)
                        tps80031->clients[i] = client;
                else
-                       tps80031->clients[i] = i2c_new_dummy(client->adapter,
-                                               tps80031_slave_address[i]);
-               if (!tps80031->clients[i]) {
+                       tps80031->clients[i] = devm_i2c_new_dummy_device(&client->dev,
+                                               client->adapter, tps80031_slave_address[i]);
+               if (IS_ERR(tps80031->clients[i])) {
                        dev_err(&client->dev, "can't attach client %d\n", i);
-                       ret = -ENOMEM;
-                       goto fail_client_reg;
+                       return PTR_ERR(tps80031->clients[i]);
                }
 
                i2c_set_clientdata(tps80031->clients[i], tps80031);
@@ -452,7 +451,7 @@ static int tps80031_probe(struct i2c_client *client,
                        ret = PTR_ERR(tps80031->regmap[i]);
                        dev_err(&client->dev,
                                "regmap %d init failed, err %d\n", i, ret);
-                       goto fail_client_reg;
+                       return ret;
                }
        }
 
@@ -461,7 +460,7 @@ static int tps80031_probe(struct i2c_client *client,
        if (ret < 0) {
                dev_err(&client->dev,
                        "Silicon version number read failed: %d\n", ret);
-               goto fail_client_reg;
+               return ret;
        }
 
        ret = tps80031_read(&client->dev, TPS80031_SLAVE_ID3,
@@ -469,7 +468,7 @@ static int tps80031_probe(struct i2c_client *client,
        if (ret < 0) {
                dev_err(&client->dev,
                        "Silicon eeprom version read failed: %d\n", ret);
-               goto fail_client_reg;
+               return ret;
        }
 
        dev_info(&client->dev, "ES version 0x%02x and EPROM version 0x%02x\n",
@@ -482,7 +481,7 @@ static int tps80031_probe(struct i2c_client *client,
        ret = tps80031_irq_init(tps80031, client->irq, pdata->irq_base);
        if (ret) {
                dev_err(&client->dev, "IRQ init failed: %d\n", ret);
-               goto fail_client_reg;
+               return ret;
        }
 
        tps80031_pupd_init(tps80031, pdata);
@@ -506,12 +505,6 @@ static int tps80031_probe(struct i2c_client *client,
 
 fail_mfd_add:
        regmap_del_irq_chip(client->irq, tps80031->irq_data);
-
-fail_client_reg:
-       for (i = 0; i < TPS80031_NUM_SLAVES; i++) {
-               if (tps80031->clients[i]  && (tps80031->clients[i] != client))
-                       i2c_unregister_device(tps80031->clients[i]);
-       }
        return ret;
 }
 
index 448d939..20cf8cf 100644 (file)
@@ -1141,12 +1141,12 @@ twl_probe(struct i2c_client *client, const struct i2c_device_id *id)
                if (i == 0) {
                        twl->client = client;
                } else {
-                       twl->client = i2c_new_dummy(client->adapter,
+                       twl->client = i2c_new_dummy_device(client->adapter,
                                                    client->addr + i);
-                       if (!twl->client) {
+                       if (IS_ERR(twl->client)) {
                                dev_err(&client->dev,
                                        "can't attach client %d\n", i);
-                               status = -ENOMEM;
+                               status = PTR_ERR(twl->client);
                                goto fail;
                        }
                }
index 518945b..2cccd82 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/delay.h>
 #include <linux/mutex.h>
 #include <linux/mod_devicetable.h>
-#include <linux/log2.h>
 #include <linux/bitops.h>
 #include <linux/jiffies.h>
 #include <linux/property.h>
index 47ae84a..1b1a794 100644 (file)
@@ -527,6 +527,7 @@ static int fastrpc_dma_buf_attach(struct dma_buf *dmabuf,
                              FASTRPC_PHYS(buffer->phys), buffer->size);
        if (ret < 0) {
                dev_err(buffer->dev, "failed to get scatterlist from DMA API\n");
+               kfree(a);
                return -EINVAL;
        }
 
index 32e9b1a..0a2b99e 100644 (file)
@@ -218,13 +218,21 @@ static void mei_mkhi_fix(struct mei_cl_device *cldev)
 {
        int ret;
 
+       /* No need to enable the client if nothing is needed from it */
+       if (!cldev->bus->fw_f_fw_ver_supported &&
+           !cldev->bus->hbm_f_os_supported)
+               return;
+
        ret = mei_cldev_enable(cldev);
        if (ret)
                return;
 
-       ret = mei_fwver(cldev);
-       if (ret < 0)
-               dev_err(&cldev->dev, "FW version command failed %d\n", ret);
+       if (cldev->bus->fw_f_fw_ver_supported) {
+               ret = mei_fwver(cldev);
+               if (ret < 0)
+                       dev_err(&cldev->dev, "FW version command failed %d\n",
+                               ret);
+       }
 
        if (cldev->bus->hbm_f_os_supported) {
                ret = mei_osver(cldev);
index 77f7dff..c09f8bb 100644 (file)
@@ -79,6 +79,9 @@
 #define MEI_DEV_ID_CNP_H      0xA360  /* Cannon Point H */
 #define MEI_DEV_ID_CNP_H_4    0xA364  /* Cannon Point H 4 (iTouch) */
 
+#define MEI_DEV_ID_CMP_LP     0x02e0  /* Comet Point LP */
+#define MEI_DEV_ID_CMP_LP_3   0x02e4  /* Comet Point LP 3 (iTouch) */
+
 #define MEI_DEV_ID_ICP_LP     0x34E0  /* Ice Lake Point LP */
 
 #define MEI_DEV_ID_TGP_LP     0xA0E0  /* Tiger Lake Point LP */
index abe1b1f..c4f6991 100644 (file)
@@ -1355,6 +1355,8 @@ static bool mei_me_fw_type_sps(struct pci_dev *pdev)
 #define MEI_CFG_FW_SPS                           \
        .quirk_probe = mei_me_fw_type_sps
 
+#define MEI_CFG_FW_VER_SUPP                     \
+       .fw_ver_supported = 1
 
 #define MEI_CFG_ICH_HFS                      \
        .fw_status.count = 0
@@ -1392,31 +1394,41 @@ static const struct mei_cfg mei_me_ich10_cfg = {
        MEI_CFG_ICH10_HFS,
 };
 
-/* PCH devices */
-static const struct mei_cfg mei_me_pch_cfg = {
+/* PCH6 devices */
+static const struct mei_cfg mei_me_pch6_cfg = {
        MEI_CFG_PCH_HFS,
 };
 
+/* PCH7 devices */
+static const struct mei_cfg mei_me_pch7_cfg = {
+       MEI_CFG_PCH_HFS,
+       MEI_CFG_FW_VER_SUPP,
+};
+
 /* PCH Cougar Point and Patsburg with quirk for Node Manager exclusion */
 static const struct mei_cfg mei_me_pch_cpt_pbg_cfg = {
        MEI_CFG_PCH_HFS,
+       MEI_CFG_FW_VER_SUPP,
        MEI_CFG_FW_NM,
 };
 
 /* PCH8 Lynx Point and newer devices */
 static const struct mei_cfg mei_me_pch8_cfg = {
        MEI_CFG_PCH8_HFS,
+       MEI_CFG_FW_VER_SUPP,
 };
 
 /* PCH8 Lynx Point with quirk for SPS Firmware exclusion */
 static const struct mei_cfg mei_me_pch8_sps_cfg = {
        MEI_CFG_PCH8_HFS,
+       MEI_CFG_FW_VER_SUPP,
        MEI_CFG_FW_SPS,
 };
 
 /* Cannon Lake and newer devices */
 static const struct mei_cfg mei_me_pch12_cfg = {
        MEI_CFG_PCH8_HFS,
+       MEI_CFG_FW_VER_SUPP,
        MEI_CFG_DMA_128,
 };
 
@@ -1428,7 +1440,8 @@ static const struct mei_cfg *const mei_cfg_list[] = {
        [MEI_ME_UNDEF_CFG] = NULL,
        [MEI_ME_ICH_CFG] = &mei_me_ich_cfg,
        [MEI_ME_ICH10_CFG] = &mei_me_ich10_cfg,
-       [MEI_ME_PCH_CFG] = &mei_me_pch_cfg,
+       [MEI_ME_PCH6_CFG] = &mei_me_pch6_cfg,
+       [MEI_ME_PCH7_CFG] = &mei_me_pch7_cfg,
        [MEI_ME_PCH_CPT_PBG_CFG] = &mei_me_pch_cpt_pbg_cfg,
        [MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg,
        [MEI_ME_PCH8_SPS_CFG] = &mei_me_pch8_sps_cfg,
@@ -1473,6 +1486,8 @@ struct mei_device *mei_me_dev_init(struct pci_dev *pdev,
        mei_device_init(dev, &pdev->dev, &mei_me_hw_ops);
        hw->cfg = cfg;
 
+       dev->fw_f_fw_ver_supported = cfg->fw_ver_supported;
+
        return dev;
 }
 
index 08c84a0..1d87948 100644 (file)
  * @fw_status: FW status
  * @quirk_probe: device exclusion quirk
  * @dma_size: device DMA buffers size
+ * @fw_ver_supported: is fw version retrievable from FW
  */
 struct mei_cfg {
        const struct mei_fw_status fw_status;
        bool (*quirk_probe)(struct pci_dev *pdev);
        size_t dma_size[DMA_DSCR_NUM];
+       u32 fw_ver_supported:1;
 };
 
 
@@ -62,7 +64,8 @@ struct mei_me_hw {
  * @MEI_ME_UNDEF_CFG:      Lower sentinel.
  * @MEI_ME_ICH_CFG:        I/O Controller Hub legacy devices.
  * @MEI_ME_ICH10_CFG:      I/O Controller Hub platforms Gen10
- * @MEI_ME_PCH_CFG:        Platform Controller Hub platforms (Up to Gen8).
+ * @MEI_ME_PCH6_CFG:       Platform Controller Hub platforms (Gen6).
+ * @MEI_ME_PCH7_CFG:       Platform Controller Hub platforms (Gen7).
  * @MEI_ME_PCH_CPT_PBG_CFG:Platform Controller Hub workstations
  *                         with quirk for Node Manager exclusion.
  * @MEI_ME_PCH8_CFG:       Platform Controller Hub Gen8 and newer
@@ -77,7 +80,8 @@ enum mei_cfg_idx {
        MEI_ME_UNDEF_CFG,
        MEI_ME_ICH_CFG,
        MEI_ME_ICH10_CFG,
-       MEI_ME_PCH_CFG,
+       MEI_ME_PCH6_CFG,
+       MEI_ME_PCH7_CFG,
        MEI_ME_PCH_CPT_PBG_CFG,
        MEI_ME_PCH8_CFG,
        MEI_ME_PCH8_SPS_CFG,
index f71a023..0f21411 100644 (file)
@@ -426,6 +426,8 @@ struct mei_fw_version {
  *
  * @fw_ver : FW versions
  *
+ * @fw_f_fw_ver_supported : fw feature: fw version supported
+ *
  * @me_clients_rwsem: rw lock over me_clients list
  * @me_clients  : list of FW clients
  * @me_clients_map : FW clients bit map
@@ -506,6 +508,8 @@ struct mei_device {
 
        struct mei_fw_version fw_ver[MEI_MAX_FW_VER_BLOCKS];
 
+       unsigned int fw_f_fw_ver_supported:1;
+
        struct rw_semaphore me_clients_rwsem;
        struct list_head me_clients;
        DECLARE_BITMAP(me_clients_map, MEI_CLIENTS_MAX);
index d5a92c6..3dca63e 100644 (file)
@@ -61,13 +61,13 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
        {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_3, MEI_ME_ICH10_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_4, MEI_ME_ICH10_CFG)},
 
-       {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_1, MEI_ME_PCH_CFG)},
-       {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_2, MEI_ME_PCH_CFG)},
+       {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_1, MEI_ME_PCH6_CFG)},
+       {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_2, MEI_ME_PCH6_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_CPT_1, MEI_ME_PCH_CPT_PBG_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_PBG_1, MEI_ME_PCH_CPT_PBG_CFG)},
-       {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, MEI_ME_PCH_CFG)},
-       {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, MEI_ME_PCH_CFG)},
-       {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, MEI_ME_PCH_CFG)},
+       {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, MEI_ME_PCH7_CFG)},
+       {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, MEI_ME_PCH7_CFG)},
+       {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, MEI_ME_PCH7_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_H, MEI_ME_PCH8_SPS_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_W, MEI_ME_PCH8_SPS_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_LP, MEI_ME_PCH8_CFG)},
@@ -96,6 +96,9 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
        {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)},
 
+       {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)},
+       {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)},
+
        {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)},
 
        {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH12_CFG)},
index 3a52f57..49ea02c 100644 (file)
@@ -94,6 +94,7 @@ config MMC_SDHCI_PCI
        depends on MMC_SDHCI && PCI
        select MMC_CQHCI
        select IOSF_MBI if X86
+       select MMC_SDHCI_IO_ACCESSORS
        help
          This selects the PCI Secure Digital Host Controller Interface.
          Most controllers found today are PCI devices.
index 390ee16..11c4598 100644 (file)
@@ -13,7 +13,7 @@ obj-$(CONFIG_MMC_MXS)         += mxs-mmc.o
 obj-$(CONFIG_MMC_SDHCI)                += sdhci.o
 obj-$(CONFIG_MMC_SDHCI_PCI)    += sdhci-pci.o
 sdhci-pci-y                    += sdhci-pci-core.o sdhci-pci-o2micro.o sdhci-pci-arasan.o \
-                                  sdhci-pci-dwc-mshc.o
+                                  sdhci-pci-dwc-mshc.o sdhci-pci-gli.o
 obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI))       += sdhci-pci-data.o
 obj-$(CONFIG_MMC_SDHCI_ACPI)   += sdhci-acpi.o
 obj-$(CONFIG_MMC_SDHCI_PXAV3)  += sdhci-pxav3.o
index f7bdae5..5047f73 100644 (file)
@@ -611,7 +611,8 @@ static int cqhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
        cq_host->slot[tag].flags = 0;
 
        cq_host->qcnt += 1;
-
+       /* Make sure descriptors are ready before ringing the doorbell */
+       wmb();
        cqhci_writel(cq_host, 1 << tag, CQHCI_TDBR);
        if (!(cqhci_readl(cq_host, CQHCI_TDBR) & (1 << tag)))
                pr_debug("%s: cqhci: doorbell not set for tag %d\n",
index 78e7e35..4031217 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
+#include <linux/dma/mxs-dma.h>
 #include <linux/highmem.h>
 #include <linux/clk.h>
 #include <linux/err.h>
@@ -266,7 +267,7 @@ static void mxs_mmc_bc(struct mxs_mmc_host *host)
        ssp->ssp_pio_words[2] = cmd1;
        ssp->dma_dir = DMA_NONE;
        ssp->slave_dirn = DMA_TRANS_NONE;
-       desc = mxs_mmc_prep_dma(host, DMA_CTRL_ACK);
+       desc = mxs_mmc_prep_dma(host, MXS_DMA_CTRL_WAIT4END);
        if (!desc)
                goto out;
 
@@ -311,7 +312,7 @@ static void mxs_mmc_ac(struct mxs_mmc_host *host)
        ssp->ssp_pio_words[2] = cmd1;
        ssp->dma_dir = DMA_NONE;
        ssp->slave_dirn = DMA_TRANS_NONE;
-       desc = mxs_mmc_prep_dma(host, DMA_CTRL_ACK);
+       desc = mxs_mmc_prep_dma(host, MXS_DMA_CTRL_WAIT4END);
        if (!desc)
                goto out;
 
@@ -441,7 +442,7 @@ static void mxs_mmc_adtc(struct mxs_mmc_host *host)
        host->data = data;
        ssp->dma_dir = dma_data_dir;
        ssp->slave_dirn = slave_dirn;
-       desc = mxs_mmc_prep_dma(host, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+       desc = mxs_mmc_prep_dma(host, DMA_PREP_INTERRUPT | MXS_DMA_CTRL_WAIT4END);
        if (!desc)
                goto out;
 
index d4ada5c..234551a 100644 (file)
@@ -646,8 +646,8 @@ int renesas_sdhi_probe(struct platform_device *pdev,
        struct tmio_mmc_dma *dma_priv;
        struct tmio_mmc_host *host;
        struct renesas_sdhi *priv;
+       int num_irqs, irq, ret, i;
        struct resource *res;
-       int irq, ret, i;
        u16 ver;
 
        of_data = of_device_get_match_data(&pdev->dev);
@@ -825,24 +825,31 @@ int renesas_sdhi_probe(struct platform_device *pdev,
                host->hs400_complete = renesas_sdhi_hs400_complete;
        }
 
-       i = 0;
-       while (1) {
+       num_irqs = platform_irq_count(pdev);
+       if (num_irqs < 0) {
+               ret = num_irqs;
+               goto eirq;
+       }
+
+       /* There must be at least one IRQ source */
+       if (!num_irqs) {
+               ret = -ENXIO;
+               goto eirq;
+       }
+
+       for (i = 0; i < num_irqs; i++) {
                irq = platform_get_irq(pdev, i);
-               if (irq < 0)
-                       break;
-               i++;
+               if (irq < 0) {
+                       ret = irq;
+                       goto eirq;
+               }
+
                ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0,
                                       dev_name(&pdev->dev), host);
                if (ret)
                        goto eirq;
        }
 
-       /* There must be at least one IRQ source */
-       if (!i) {
-               ret = irq;
-               goto eirq;
-       }
-
        dev_info(&pdev->dev, "%s base at 0x%08lx max clock rate %u MHz\n",
                 mmc_hostname(host->mmc), (unsigned long)
                 (platform_get_resource(pdev, IORESOURCE_MEM, 0)->start),
index 2b9cdcd..f4f5f0a 100644 (file)
@@ -262,6 +262,7 @@ static const struct sdhci_iproc_data bcm2835_data = {
 };
 
 static const struct sdhci_pltfm_data sdhci_bcm2711_pltfm_data = {
+       .quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12,
        .ops = &sdhci_iproc_32only_ops,
 };
 
index 3271c2d..1d1953d 100644 (file)
@@ -495,7 +495,12 @@ static int esdhc_of_enable_dma(struct sdhci_host *host)
                dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
 
        value = sdhci_readl(host, ESDHC_DMA_SYSCTL);
-       value |= ESDHC_DMA_SNOOP;
+
+       if (of_dma_is_coherent(dev->of_node))
+               value |= ESDHC_DMA_SNOOP;
+       else
+               value &= ~ESDHC_DMA_SNOOP;
+
        sdhci_writel(host, value, ESDHC_DMA_SYSCTL);
        return 0;
 }
index 41c2677..083e7e0 100644 (file)
@@ -372,7 +372,7 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode)
         * on temperature
         */
        if (temperature < -20000)
-               phase_delay = min(max_window + 4 * max_len - 24,
+               phase_delay = min(max_window + 4 * (max_len - 1) - 24,
                                  max_window +
                                  DIV_ROUND_UP(13 * max_len, 16) * 4);
        else if (temperature < 20000)
index e1ca185..eaffa85 100644 (file)
@@ -1685,6 +1685,8 @@ static const struct pci_device_id pci_ids[] = {
        SDHCI_PCI_DEVICE(O2, SEABIRD1, o2),
        SDHCI_PCI_DEVICE(ARASAN, PHY_EMMC, arasan),
        SDHCI_PCI_DEVICE(SYNOPSYS, DWC_MSHC, snps),
+       SDHCI_PCI_DEVICE(GLI, 9750, gl9750),
+       SDHCI_PCI_DEVICE(GLI, 9755, gl9755),
        SDHCI_PCI_DEVICE_CLASS(AMD, SYSTEM_SDHCI, PCI_CLASS_MASK, amd),
        /* Generic SD host controller */
        {PCI_DEVICE_CLASS(SYSTEM_SDHCI, PCI_CLASS_MASK)},
diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c
new file mode 100644 (file)
index 0000000..5eea8d7
--- /dev/null
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 Genesys Logic, Inc.
+ *
+ * Authors: Ben Chuang <ben.chuang@genesyslogic.com.tw>
+ *
+ * Version: v0.9.0 (2019-08-08)
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/pci.h>
+#include <linux/mmc/mmc.h>
+#include <linux/delay.h>
+#include "sdhci.h"
+#include "sdhci-pci.h"
+
+/*  Genesys Logic extra registers */
+#define SDHCI_GLI_9750_WT         0x800
+#define   SDHCI_GLI_9750_WT_EN      BIT(0)
+#define   GLI_9750_WT_EN_ON        0x1
+#define   GLI_9750_WT_EN_OFF       0x0
+
+#define SDHCI_GLI_9750_DRIVING      0x860
+#define   SDHCI_GLI_9750_DRIVING_1    GENMASK(11, 0)
+#define   SDHCI_GLI_9750_DRIVING_2    GENMASK(27, 26)
+#define   GLI_9750_DRIVING_1_VALUE    0xFFF
+#define   GLI_9750_DRIVING_2_VALUE    0x3
+
+#define SDHCI_GLI_9750_PLL           0x864
+#define   SDHCI_GLI_9750_PLL_TX2_INV    BIT(23)
+#define   SDHCI_GLI_9750_PLL_TX2_DLY    GENMASK(22, 20)
+#define   GLI_9750_PLL_TX2_INV_VALUE    0x1
+#define   GLI_9750_PLL_TX2_DLY_VALUE    0x0
+
+#define SDHCI_GLI_9750_SW_CTRL      0x874
+#define   SDHCI_GLI_9750_SW_CTRL_4    GENMASK(7, 6)
+#define   GLI_9750_SW_CTRL_4_VALUE    0x3
+
+#define SDHCI_GLI_9750_MISC            0x878
+#define   SDHCI_GLI_9750_MISC_TX1_INV    BIT(2)
+#define   SDHCI_GLI_9750_MISC_RX_INV     BIT(3)
+#define   SDHCI_GLI_9750_MISC_TX1_DLY    GENMASK(6, 4)
+#define   GLI_9750_MISC_TX1_INV_VALUE    0x0
+#define   GLI_9750_MISC_RX_INV_ON        0x1
+#define   GLI_9750_MISC_RX_INV_OFF       0x0
+#define   GLI_9750_MISC_RX_INV_VALUE     GLI_9750_MISC_RX_INV_OFF
+#define   GLI_9750_MISC_TX1_DLY_VALUE    0x5
+
+#define SDHCI_GLI_9750_TUNING_CONTROL            0x540
+#define   SDHCI_GLI_9750_TUNING_CONTROL_EN          BIT(4)
+#define   GLI_9750_TUNING_CONTROL_EN_ON             0x1
+#define   GLI_9750_TUNING_CONTROL_EN_OFF            0x0
+#define   SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_1    BIT(16)
+#define   SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_2    GENMASK(20, 19)
+#define   GLI_9750_TUNING_CONTROL_GLITCH_1_VALUE    0x1
+#define   GLI_9750_TUNING_CONTROL_GLITCH_2_VALUE    0x2
+
+#define SDHCI_GLI_9750_TUNING_PARAMETERS           0x544
+#define   SDHCI_GLI_9750_TUNING_PARAMETERS_RX_DLY    GENMASK(2, 0)
+#define   GLI_9750_TUNING_PARAMETERS_RX_DLY_VALUE    0x1
+
+#define GLI_MAX_TUNING_LOOP 40
+
+/* Genesys Logic chipset */
+static inline void gl9750_wt_on(struct sdhci_host *host)
+{
+       u32 wt_value;
+       u32 wt_enable;
+
+       wt_value = sdhci_readl(host, SDHCI_GLI_9750_WT);
+       wt_enable = FIELD_GET(SDHCI_GLI_9750_WT_EN, wt_value);
+
+       if (wt_enable == GLI_9750_WT_EN_ON)
+               return;
+
+       wt_value &= ~SDHCI_GLI_9750_WT_EN;
+       wt_value |= FIELD_PREP(SDHCI_GLI_9750_WT_EN, GLI_9750_WT_EN_ON);
+
+       sdhci_writel(host, wt_value, SDHCI_GLI_9750_WT);
+}
+
+static inline void gl9750_wt_off(struct sdhci_host *host)
+{
+       u32 wt_value;
+       u32 wt_enable;
+
+       wt_value = sdhci_readl(host, SDHCI_GLI_9750_WT);
+       wt_enable = FIELD_GET(SDHCI_GLI_9750_WT_EN, wt_value);
+
+       if (wt_enable == GLI_9750_WT_EN_OFF)
+               return;
+
+       wt_value &= ~SDHCI_GLI_9750_WT_EN;
+       wt_value |= FIELD_PREP(SDHCI_GLI_9750_WT_EN, GLI_9750_WT_EN_OFF);
+
+       sdhci_writel(host, wt_value, SDHCI_GLI_9750_WT);
+}
+
+static void gli_set_9750(struct sdhci_host *host)
+{
+       u32 driving_value;
+       u32 pll_value;
+       u32 sw_ctrl_value;
+       u32 misc_value;
+       u32 parameter_value;
+       u32 control_value;
+       u16 ctrl2;
+
+       gl9750_wt_on(host);
+
+       driving_value = sdhci_readl(host, SDHCI_GLI_9750_DRIVING);
+       pll_value = sdhci_readl(host, SDHCI_GLI_9750_PLL);
+       sw_ctrl_value = sdhci_readl(host, SDHCI_GLI_9750_SW_CTRL);
+       misc_value = sdhci_readl(host, SDHCI_GLI_9750_MISC);
+       parameter_value = sdhci_readl(host, SDHCI_GLI_9750_TUNING_PARAMETERS);
+       control_value = sdhci_readl(host, SDHCI_GLI_9750_TUNING_CONTROL);
+
+       driving_value &= ~(SDHCI_GLI_9750_DRIVING_1);
+       driving_value &= ~(SDHCI_GLI_9750_DRIVING_2);
+       driving_value |= FIELD_PREP(SDHCI_GLI_9750_DRIVING_1,
+                                   GLI_9750_DRIVING_1_VALUE);
+       driving_value |= FIELD_PREP(SDHCI_GLI_9750_DRIVING_2,
+                                   GLI_9750_DRIVING_2_VALUE);
+       sdhci_writel(host, driving_value, SDHCI_GLI_9750_DRIVING);
+
+       sw_ctrl_value &= ~SDHCI_GLI_9750_SW_CTRL_4;
+       sw_ctrl_value |= FIELD_PREP(SDHCI_GLI_9750_SW_CTRL_4,
+                                   GLI_9750_SW_CTRL_4_VALUE);
+       sdhci_writel(host, sw_ctrl_value, SDHCI_GLI_9750_SW_CTRL);
+
+       /* reset the tuning flow after reinit and before starting tuning */
+       pll_value &= ~SDHCI_GLI_9750_PLL_TX2_INV;
+       pll_value &= ~SDHCI_GLI_9750_PLL_TX2_DLY;
+       pll_value |= FIELD_PREP(SDHCI_GLI_9750_PLL_TX2_INV,
+                               GLI_9750_PLL_TX2_INV_VALUE);
+       pll_value |= FIELD_PREP(SDHCI_GLI_9750_PLL_TX2_DLY,
+                               GLI_9750_PLL_TX2_DLY_VALUE);
+
+       misc_value &= ~SDHCI_GLI_9750_MISC_TX1_INV;
+       misc_value &= ~SDHCI_GLI_9750_MISC_RX_INV;
+       misc_value &= ~SDHCI_GLI_9750_MISC_TX1_DLY;
+       misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_TX1_INV,
+                                GLI_9750_MISC_TX1_INV_VALUE);
+       misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_RX_INV,
+                                GLI_9750_MISC_RX_INV_VALUE);
+       misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_TX1_DLY,
+                                GLI_9750_MISC_TX1_DLY_VALUE);
+
+       parameter_value &= ~SDHCI_GLI_9750_TUNING_PARAMETERS_RX_DLY;
+       parameter_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_PARAMETERS_RX_DLY,
+                                     GLI_9750_TUNING_PARAMETERS_RX_DLY_VALUE);
+
+       control_value &= ~SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_1;
+       control_value &= ~SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_2;
+       control_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_1,
+                                   GLI_9750_TUNING_CONTROL_GLITCH_1_VALUE);
+       control_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_2,
+                                   GLI_9750_TUNING_CONTROL_GLITCH_2_VALUE);
+
+       sdhci_writel(host, pll_value, SDHCI_GLI_9750_PLL);
+       sdhci_writel(host, misc_value, SDHCI_GLI_9750_MISC);
+
+       /* disable tuned clk */
+       ctrl2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+       ctrl2 &= ~SDHCI_CTRL_TUNED_CLK;
+       sdhci_writew(host, ctrl2, SDHCI_HOST_CONTROL2);
+
+       /* enable tuning parameters control */
+       control_value &= ~SDHCI_GLI_9750_TUNING_CONTROL_EN;
+       control_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_CONTROL_EN,
+                                   GLI_9750_TUNING_CONTROL_EN_ON);
+       sdhci_writel(host, control_value, SDHCI_GLI_9750_TUNING_CONTROL);
+
+       /* write tuning parameters */
+       sdhci_writel(host, parameter_value, SDHCI_GLI_9750_TUNING_PARAMETERS);
+
+       /* disable tuning parameters control */
+       control_value &= ~SDHCI_GLI_9750_TUNING_CONTROL_EN;
+       control_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_CONTROL_EN,
+                                   GLI_9750_TUNING_CONTROL_EN_OFF);
+       sdhci_writel(host, control_value, SDHCI_GLI_9750_TUNING_CONTROL);
+
+       /* clear tuned clk */
+       ctrl2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+       ctrl2 &= ~SDHCI_CTRL_TUNED_CLK;
+       sdhci_writew(host, ctrl2, SDHCI_HOST_CONTROL2);
+
+       gl9750_wt_off(host);
+}
+
+static void gli_set_9750_rx_inv(struct sdhci_host *host, bool b)
+{
+       u32 misc_value;
+
+       gl9750_wt_on(host);
+
+       misc_value = sdhci_readl(host, SDHCI_GLI_9750_MISC);
+       misc_value &= ~SDHCI_GLI_9750_MISC_RX_INV;
+       if (b) {
+               misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_RX_INV,
+                                        GLI_9750_MISC_RX_INV_ON);
+       } else {
+               misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_RX_INV,
+                                        GLI_9750_MISC_RX_INV_OFF);
+       }
+       sdhci_writel(host, misc_value, SDHCI_GLI_9750_MISC);
+
+       gl9750_wt_off(host);
+}
+
+static int __sdhci_execute_tuning_9750(struct sdhci_host *host, u32 opcode)
+{
+       int i;
+       int rx_inv;
+
+       for (rx_inv = 0; rx_inv < 2; rx_inv++) {
+               gli_set_9750_rx_inv(host, !!rx_inv);
+               sdhci_start_tuning(host);
+
+               for (i = 0; i < GLI_MAX_TUNING_LOOP; i++) {
+                       u16 ctrl;
+
+                       sdhci_send_tuning(host, opcode);
+
+                       if (!host->tuning_done) {
+                               sdhci_abort_tuning(host, opcode);
+                               break;
+                       }
+
+                       ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+                       if (!(ctrl & SDHCI_CTRL_EXEC_TUNING)) {
+                               if (ctrl & SDHCI_CTRL_TUNED_CLK)
+                                       return 0; /* Success! */
+                               break;
+                       }
+               }
+       }
+       if (!host->tuning_done) {
+               pr_info("%s: Tuning timeout, falling back to fixed sampling clock\n",
+                       mmc_hostname(host->mmc));
+               return -ETIMEDOUT;
+       }
+
+       pr_info("%s: Tuning failed, falling back to fixed sampling clock\n",
+               mmc_hostname(host->mmc));
+       sdhci_reset_tuning(host);
+
+       return -EAGAIN;
+}
+
+static int gl9750_execute_tuning(struct sdhci_host *host, u32 opcode)
+{
+       host->mmc->retune_period = 0;
+       if (host->tuning_mode == SDHCI_TUNING_MODE_1)
+               host->mmc->retune_period = host->tuning_count;
+
+       gli_set_9750(host);
+       host->tuning_err = __sdhci_execute_tuning_9750(host, opcode);
+       sdhci_end_tuning(host);
+
+       return 0;
+}
+
+static int gli_probe_slot_gl9750(struct sdhci_pci_slot *slot)
+{
+       struct sdhci_host *host = slot->host;
+
+       slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO;
+       sdhci_enable_v4_mode(host);
+
+       return 0;
+}
+
+static int gli_probe_slot_gl9755(struct sdhci_pci_slot *slot)
+{
+       struct sdhci_host *host = slot->host;
+
+       slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO;
+       sdhci_enable_v4_mode(host);
+
+       return 0;
+}
+
+static void sdhci_gli_voltage_switch(struct sdhci_host *host)
+{
+       /*
+        * According to Section 3.6.1 signal voltage switch procedure in
+        * SD Host Controller Simplified Spec. 4.20, steps 6~8 are as
+        * follows:
+        * (6) Set 1.8V Signal Enable in the Host Control 2 register.
+        * (7) Wait 5ms. 1.8V voltage regulator shall be stable within this
+        *     period.
+        * (8) If 1.8V Signal Enable is cleared by Host Controller, go to
+        *     step (12).
+        *
+        * Wait 5ms after set 1.8V signal enable in Host Control 2 register
+        * to ensure 1.8V signal enable bit is set by GL9750/GL9755.
+        */
+       usleep_range(5000, 5500);
+}
+
+static void sdhci_gl9750_reset(struct sdhci_host *host, u8 mask)
+{
+       sdhci_reset(host, mask);
+       gli_set_9750(host);
+}
+
+static u32 sdhci_gl9750_readl(struct sdhci_host *host, int reg)
+{
+       u32 value;
+
+       value = readl(host->ioaddr + reg);
+       if (unlikely(reg == SDHCI_MAX_CURRENT && !(value & 0xff)))
+               value |= 0xc8;
+
+       return value;
+}
+
+static const struct sdhci_ops sdhci_gl9755_ops = {
+       .set_clock              = sdhci_set_clock,
+       .enable_dma             = sdhci_pci_enable_dma,
+       .set_bus_width          = sdhci_set_bus_width,
+       .reset                  = sdhci_reset,
+       .set_uhs_signaling      = sdhci_set_uhs_signaling,
+       .voltage_switch         = sdhci_gli_voltage_switch,
+};
+
+const struct sdhci_pci_fixes sdhci_gl9755 = {
+       .quirks         = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+       .quirks2        = SDHCI_QUIRK2_BROKEN_DDR50,
+       .probe_slot     = gli_probe_slot_gl9755,
+       .ops            = &sdhci_gl9755_ops,
+};
+
+static const struct sdhci_ops sdhci_gl9750_ops = {
+       .read_l                 = sdhci_gl9750_readl,
+       .set_clock              = sdhci_set_clock,
+       .enable_dma             = sdhci_pci_enable_dma,
+       .set_bus_width          = sdhci_set_bus_width,
+       .reset                  = sdhci_gl9750_reset,
+       .set_uhs_signaling      = sdhci_set_uhs_signaling,
+       .voltage_switch         = sdhci_gli_voltage_switch,
+       .platform_execute_tuning = gl9750_execute_tuning,
+};
+
+const struct sdhci_pci_fixes sdhci_gl9750 = {
+       .quirks         = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+       .quirks2        = SDHCI_QUIRK2_BROKEN_DDR50,
+       .probe_slot     = gli_probe_slot_gl9750,
+       .ops            = &sdhci_gl9750_ops,
+};
index 1abc9d4..558202f 100644 (file)
@@ -68,6 +68,9 @@
 
 #define PCI_DEVICE_ID_SYNOPSYS_DWC_MSHC 0xc202
 
+#define PCI_DEVICE_ID_GLI_9755         0x9755
+#define PCI_DEVICE_ID_GLI_9750         0x9750
+
 /*
  * PCI device class and mask
  */
@@ -188,5 +191,7 @@ int sdhci_pci_enable_dma(struct sdhci_host *host);
 extern const struct sdhci_pci_fixes sdhci_arasan;
 extern const struct sdhci_pci_fixes sdhci_snps;
 extern const struct sdhci_pci_fixes sdhci_o2;
+extern const struct sdhci_pci_fixes sdhci_gl9750;
+extern const struct sdhci_pci_fixes sdhci_gl9755;
 
 #endif /* __SDHCI_PCI_H */
index 02d8f52..7bc9505 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include <linux/delay.h>
+#include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 
 struct sdhci_tegra_soc_data {
        const struct sdhci_pltfm_data *pdata;
+       u64 dma_mask;
        u32 nvquirks;
        u8 min_tap_delay;
        u8 max_tap_delay;
@@ -1233,11 +1235,25 @@ static const struct cqhci_host_ops sdhci_tegra_cqhci_ops = {
        .update_dcmd_desc = sdhci_tegra_update_dcmd_desc,
 };
 
+static int tegra_sdhci_set_dma_mask(struct sdhci_host *host)
+{
+       struct sdhci_pltfm_host *platform = sdhci_priv(host);
+       struct sdhci_tegra *tegra = sdhci_pltfm_priv(platform);
+       const struct sdhci_tegra_soc_data *soc = tegra->soc_data;
+       struct device *dev = mmc_dev(host->mmc);
+
+       if (soc->dma_mask)
+               return dma_set_mask_and_coherent(dev, soc->dma_mask);
+
+       return 0;
+}
+
 static const struct sdhci_ops tegra_sdhci_ops = {
        .get_ro     = tegra_sdhci_get_ro,
        .read_w     = tegra_sdhci_readw,
        .write_l    = tegra_sdhci_writel,
        .set_clock  = tegra_sdhci_set_clock,
+       .set_dma_mask = tegra_sdhci_set_dma_mask,
        .set_bus_width = sdhci_set_bus_width,
        .reset      = tegra_sdhci_reset,
        .platform_execute_tuning = tegra_sdhci_execute_tuning,
@@ -1257,6 +1273,7 @@ static const struct sdhci_pltfm_data sdhci_tegra20_pdata = {
 
 static const struct sdhci_tegra_soc_data soc_data_tegra20 = {
        .pdata = &sdhci_tegra20_pdata,
+       .dma_mask = DMA_BIT_MASK(32),
        .nvquirks = NVQUIRK_FORCE_SDHCI_SPEC_200 |
                    NVQUIRK_ENABLE_BLOCK_GAP_DET,
 };
@@ -1283,6 +1300,7 @@ static const struct sdhci_pltfm_data sdhci_tegra30_pdata = {
 
 static const struct sdhci_tegra_soc_data soc_data_tegra30 = {
        .pdata = &sdhci_tegra30_pdata,
+       .dma_mask = DMA_BIT_MASK(32),
        .nvquirks = NVQUIRK_ENABLE_SDHCI_SPEC_300 |
                    NVQUIRK_ENABLE_SDR50 |
                    NVQUIRK_ENABLE_SDR104 |
@@ -1295,6 +1313,7 @@ static const struct sdhci_ops tegra114_sdhci_ops = {
        .write_w    = tegra_sdhci_writew,
        .write_l    = tegra_sdhci_writel,
        .set_clock  = tegra_sdhci_set_clock,
+       .set_dma_mask = tegra_sdhci_set_dma_mask,
        .set_bus_width = sdhci_set_bus_width,
        .reset      = tegra_sdhci_reset,
        .platform_execute_tuning = tegra_sdhci_execute_tuning,
@@ -1316,6 +1335,7 @@ static const struct sdhci_pltfm_data sdhci_tegra114_pdata = {
 
 static const struct sdhci_tegra_soc_data soc_data_tegra114 = {
        .pdata = &sdhci_tegra114_pdata,
+       .dma_mask = DMA_BIT_MASK(32),
 };
 
 static const struct sdhci_pltfm_data sdhci_tegra124_pdata = {
@@ -1325,22 +1345,13 @@ static const struct sdhci_pltfm_data sdhci_tegra124_pdata = {
                  SDHCI_QUIRK_NO_HISPD_BIT |
                  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
                  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
-       .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
-                  /*
-                   * The TRM states that the SD/MMC controller found on
-                   * Tegra124 can address 34 bits (the maximum supported by
-                   * the Tegra memory controller), but tests show that DMA
-                   * to or from above 4 GiB doesn't work. This is possibly
-                   * caused by missing programming, though it's not obvious
-                   * what sequence is required. Mark 64-bit DMA broken for
-                   * now to fix this for existing users (e.g. Nyan boards).
-                   */
-                  SDHCI_QUIRK2_BROKEN_64_BIT_DMA,
+       .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
        .ops  = &tegra114_sdhci_ops,
 };
 
 static const struct sdhci_tegra_soc_data soc_data_tegra124 = {
        .pdata = &sdhci_tegra124_pdata,
+       .dma_mask = DMA_BIT_MASK(34),
 };
 
 static const struct sdhci_ops tegra210_sdhci_ops = {
@@ -1349,6 +1360,7 @@ static const struct sdhci_ops tegra210_sdhci_ops = {
        .write_w    = tegra210_sdhci_writew,
        .write_l    = tegra_sdhci_writel,
        .set_clock  = tegra_sdhci_set_clock,
+       .set_dma_mask = tegra_sdhci_set_dma_mask,
        .set_bus_width = sdhci_set_bus_width,
        .reset      = tegra_sdhci_reset,
        .set_uhs_signaling = tegra_sdhci_set_uhs_signaling,
@@ -1369,6 +1381,7 @@ static const struct sdhci_pltfm_data sdhci_tegra210_pdata = {
 
 static const struct sdhci_tegra_soc_data soc_data_tegra210 = {
        .pdata = &sdhci_tegra210_pdata,
+       .dma_mask = DMA_BIT_MASK(34),
        .nvquirks = NVQUIRK_NEEDS_PAD_CONTROL |
                    NVQUIRK_HAS_PADCALIB |
                    NVQUIRK_DIS_CARD_CLK_CONFIG_TAP |
@@ -1383,6 +1396,7 @@ static const struct sdhci_ops tegra186_sdhci_ops = {
        .read_w     = tegra_sdhci_readw,
        .write_l    = tegra_sdhci_writel,
        .set_clock  = tegra_sdhci_set_clock,
+       .set_dma_mask = tegra_sdhci_set_dma_mask,
        .set_bus_width = sdhci_set_bus_width,
        .reset      = tegra_sdhci_reset,
        .set_uhs_signaling = tegra_sdhci_set_uhs_signaling,
@@ -1398,20 +1412,13 @@ static const struct sdhci_pltfm_data sdhci_tegra186_pdata = {
                  SDHCI_QUIRK_NO_HISPD_BIT |
                  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
                  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
-       .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
-                  /* SDHCI controllers on Tegra186 support 40-bit addressing.
-                   * IOVA addresses are 48-bit wide on Tegra186.
-                   * With 64-bit dma mask used for SDHCI, accesses can
-                   * be broken. Disable 64-bit dma, which would fall back
-                   * to 32-bit dma mask. Ideally 40-bit dma mask would work,
-                   * But it is not supported as of now.
-                   */
-                  SDHCI_QUIRK2_BROKEN_64_BIT_DMA,
+       .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
        .ops  = &tegra186_sdhci_ops,
 };
 
 static const struct sdhci_tegra_soc_data soc_data_tegra186 = {
        .pdata = &sdhci_tegra186_pdata,
+       .dma_mask = DMA_BIT_MASK(40),
        .nvquirks = NVQUIRK_NEEDS_PAD_CONTROL |
                    NVQUIRK_HAS_PADCALIB |
                    NVQUIRK_DIS_CARD_CLK_CONFIG_TAP |
@@ -1424,6 +1431,7 @@ static const struct sdhci_tegra_soc_data soc_data_tegra186 = {
 
 static const struct sdhci_tegra_soc_data soc_data_tegra194 = {
        .pdata = &sdhci_tegra186_pdata,
+       .dma_mask = DMA_BIT_MASK(39),
        .nvquirks = NVQUIRK_NEEDS_PAD_CONTROL |
                    NVQUIRK_HAS_PADCALIB |
                    NVQUIRK_DIS_CARD_CLK_CONFIG_TAP |
index 4b297f3..b056400 100644 (file)
@@ -2874,6 +2874,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
 static void sdhci_adma_show_error(struct sdhci_host *host)
 {
        void *desc = host->adma_table;
+       dma_addr_t dma = host->adma_addr;
 
        sdhci_dumpregs(host);
 
@@ -2881,18 +2882,21 @@ static void sdhci_adma_show_error(struct sdhci_host *host)
                struct sdhci_adma2_64_desc *dma_desc = desc;
 
                if (host->flags & SDHCI_USE_64_BIT_DMA)
-                       DBG("%p: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
-                           desc, le32_to_cpu(dma_desc->addr_hi),
+                       SDHCI_DUMP("%08llx: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
+                           (unsigned long long)dma,
+                           le32_to_cpu(dma_desc->addr_hi),
                            le32_to_cpu(dma_desc->addr_lo),
                            le16_to_cpu(dma_desc->len),
                            le16_to_cpu(dma_desc->cmd));
                else
-                       DBG("%p: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
-                           desc, le32_to_cpu(dma_desc->addr_lo),
+                       SDHCI_DUMP("%08llx: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
+                           (unsigned long long)dma,
+                           le32_to_cpu(dma_desc->addr_lo),
                            le16_to_cpu(dma_desc->len),
                            le16_to_cpu(dma_desc->cmd));
 
                desc += host->desc_sz;
+               dma += host->desc_sz;
 
                if (dma_desc->cmd & cpu_to_le16(ADMA2_END))
                        break;
@@ -2968,7 +2972,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
                        != MMC_BUS_TEST_R)
                host->data->error = -EILSEQ;
        else if (intmask & SDHCI_INT_ADMA_ERROR) {
-               pr_err("%s: ADMA error\n", mmc_hostname(host->mmc));
+               pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
+                      intmask);
                sdhci_adma_show_error(host);
                host->data->error = -EIO;
                if (host->ops->adma_workaround)
@@ -3776,18 +3781,14 @@ int sdhci_setup_host(struct sdhci_host *host)
                host->flags &= ~SDHCI_USE_ADMA;
        }
 
-       /*
-        * It is assumed that a 64-bit capable device has set a 64-bit DMA mask
-        * and *must* do 64-bit DMA.  A driver has the opportunity to change
-        * that during the first call to ->enable_dma().  Similarly
-        * SDHCI_QUIRK2_BROKEN_64_BIT_DMA must be left to the drivers to
-        * implement.
-        */
        if (sdhci_can_64bit_dma(host))
                host->flags |= SDHCI_USE_64_BIT_DMA;
 
        if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
-               ret = sdhci_set_dma_mask(host);
+               if (host->ops->set_dma_mask)
+                       ret = host->ops->set_dma_mask(host);
+               else
+                       ret = sdhci_set_dma_mask(host);
 
                if (!ret && host->ops->enable_dma)
                        ret = host->ops->enable_dma(host);
index a29c4cd..0ed3e0e 100644 (file)
@@ -622,6 +622,7 @@ struct sdhci_ops {
 
        u32             (*irq)(struct sdhci_host *host, u32 intmask);
 
+       int             (*set_dma_mask)(struct sdhci_host *host);
        int             (*enable_dma)(struct sdhci_host *host);
        unsigned int    (*get_max_clock)(struct sdhci_host *host);
        unsigned int    (*get_min_clock)(struct sdhci_host *host);
index 81bd9af..98c575d 100644 (file)
@@ -1393,11 +1393,9 @@ static int sh_mmcif_probe(struct platform_device *pdev)
        const char *name;
 
        irq[0] = platform_get_irq(pdev, 0);
-       irq[1] = platform_get_irq(pdev, 1);
-       if (irq[0] < 0) {
-               dev_err(dev, "Get irq error\n");
+       irq[1] = platform_get_irq_optional(pdev, 1);
+       if (irq[0] < 0)
                return -ENXIO;
-       }
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        reg = devm_ioremap_resource(dev, res);
index 97a97a9..e10b760 100644 (file)
@@ -134,16 +134,15 @@ static void au_write_buf16(struct nand_chip *this, const u_char *buf, int len)
 
 /**
  * au_read_buf16 -  read chip data into buffer
- * @mtd:       MTD device structure
+ * @this:      NAND chip object
  * @buf:       buffer to store date
  * @len:       number of bytes to read
  *
  * read function for 16bit buswidth
  */
-static void au_read_buf16(struct mtd_info *mtd, u_char *buf, int len)
+static void au_read_buf16(struct nand_chip *this, u_char *buf, int len)
 {
        int i;
-       struct nand_chip *this = mtd_to_nand(mtd);
        u16 *p = (u16 *) buf;
        len >>= 1;
 
index 1d8621d..7acf4a9 100644 (file)
@@ -487,7 +487,7 @@ static int write_sr(struct spi_nor *nor, u8 val)
                        SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR, 1),
                                   SPI_MEM_OP_NO_ADDR,
                                   SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1));
+                                  SPI_MEM_OP_DATA_OUT(1, nor->bouncebuf, 1));
 
                return spi_mem_exec_op(nor->spimem, &op);
        }
index 48e209e..df1c798 100644 (file)
@@ -487,7 +487,7 @@ config FUJITSU_ES
        depends on ACPI
        help
          This driver provides support for Extended Socket network device
-          on Extended Partitioning of FUJITSU PRIMEQUEST 2000 E2 series.
+         on Extended Partitioning of FUJITSU PRIMEQUEST 2000 E2 series.
 
 config THUNDERBOLT_NET
        tristate "Networking over Thunderbolt cable"
index faeb441..27551bf 100644 (file)
@@ -56,19 +56,19 @@ config ARCNET_CAP
        tristate "Enable CAP mode packet interface"
        help
          ARCnet "cap mode" packet encapsulation. Used to get the hardware
-          acknowledge back to userspace. After the initial protocol byte every
-          packet is stuffed with an extra 4 byte "cookie" which doesn't
-          actually appear on the network. After transmit the driver will send
-          back a packet with protocol byte 0 containing the status of the
-          transmission:
-             0=no hardware acknowledge
-             1=excessive nak
-             2=transmission accepted by the receiver hardware
-
-          Received packets are also stuffed with the extra 4 bytes but it will
-          be random data.
-
-          Cap only listens to protocol 1-8.
+         acknowledge back to userspace. After the initial protocol byte every
+         packet is stuffed with an extra 4 byte "cookie" which doesn't
+         actually appear on the network. After transmit the driver will send
+         back a packet with protocol byte 0 containing the status of the
+         transmission:
+            0=no hardware acknowledge
+            1=excessive nak
+            2=transmission accepted by the receiver hardware
+
+         Received packets are also stuffed with the extra 4 bytes but it will
+         be random data.
+
+         Cap only listens to protocol 1-8.
 
 config ARCNET_COM90xx
        tristate "ARCnet COM90xx (normal) chipset driver"
index 8459115..553776c 100644 (file)
@@ -1063,31 +1063,34 @@ EXPORT_SYMBOL(arcnet_interrupt);
 static void arcnet_rx(struct net_device *dev, int bufnum)
 {
        struct arcnet_local *lp = netdev_priv(dev);
-       struct archdr pkt;
+       union {
+               struct archdr pkt;
+               char buf[512];
+       } rxdata;
        struct arc_rfc1201 *soft;
        int length, ofs;
 
-       soft = &pkt.soft.rfc1201;
+       soft = &rxdata.pkt.soft.rfc1201;
 
-       lp->hw.copy_from_card(dev, bufnum, 0, &pkt, ARC_HDR_SIZE);
-       if (pkt.hard.offset[0]) {
-               ofs = pkt.hard.offset[0];
+       lp->hw.copy_from_card(dev, bufnum, 0, &rxdata.pkt, ARC_HDR_SIZE);
+       if (rxdata.pkt.hard.offset[0]) {
+               ofs = rxdata.pkt.hard.offset[0];
                length = 256 - ofs;
        } else {
-               ofs = pkt.hard.offset[1];
+               ofs = rxdata.pkt.hard.offset[1];
                length = 512 - ofs;
        }
 
        /* get the full header, if possible */
-       if (sizeof(pkt.soft) <= length) {
-               lp->hw.copy_from_card(dev, bufnum, ofs, soft, sizeof(pkt.soft));
+       if (sizeof(rxdata.pkt.soft) <= length) {
+               lp->hw.copy_from_card(dev, bufnum, ofs, soft, sizeof(rxdata.pkt.soft));
        } else {
-               memset(&pkt.soft, 0, sizeof(pkt.soft));
+               memset(&rxdata.pkt.soft, 0, sizeof(rxdata.pkt.soft));
                lp->hw.copy_from_card(dev, bufnum, ofs, soft, length);
        }
 
        arc_printk(D_DURING, dev, "Buffer #%d: received packet from %02Xh to %02Xh (%d+4 bytes)\n",
-                  bufnum, pkt.hard.source, pkt.hard.dest, length);
+                  bufnum, rxdata.pkt.hard.source, rxdata.pkt.hard.dest, length);
 
        dev->stats.rx_packets++;
        dev->stats.rx_bytes += length + ARC_HDR_SIZE;
@@ -1096,13 +1099,13 @@ static void arcnet_rx(struct net_device *dev, int bufnum)
        if (arc_proto_map[soft->proto]->is_ip) {
                if (BUGLVL(D_PROTO)) {
                        struct ArcProto
-                       *oldp = arc_proto_map[lp->default_proto[pkt.hard.source]],
+                       *oldp = arc_proto_map[lp->default_proto[rxdata.pkt.hard.source]],
                        *newp = arc_proto_map[soft->proto];
 
                        if (oldp != newp) {
                                arc_printk(D_PROTO, dev,
                                           "got protocol %02Xh; encap for host %02Xh is now '%c' (was '%c')\n",
-                                          soft->proto, pkt.hard.source,
+                                          soft->proto, rxdata.pkt.hard.source,
                                           newp->suffix, oldp->suffix);
                        }
                }
@@ -1111,10 +1114,10 @@ static void arcnet_rx(struct net_device *dev, int bufnum)
                lp->default_proto[0] = soft->proto;
 
                /* in striking contrast, the following isn't a hack. */
-               lp->default_proto[pkt.hard.source] = soft->proto;
+               lp->default_proto[rxdata.pkt.hard.source] = soft->proto;
        }
        /* call the protocol-specific receiver. */
-       arc_proto_map[soft->proto]->rx(dev, bufnum, &pkt, length);
+       arc_proto_map[soft->proto]->rx(dev, bufnum, &rxdata.pkt, length);
 }
 
 static void null_rx(struct net_device *dev, int bufnum,
index 8c79bad..4f2e691 100644 (file)
@@ -952,7 +952,7 @@ static int alb_upper_dev_walk(struct net_device *upper, void *_data)
        struct bond_vlan_tag *tags;
 
        if (is_vlan_dev(upper) &&
-           bond->nest_level == vlan_get_encap_level(upper) - 1) {
+           bond->dev->lower_level == upper->lower_level - 1) {
                if (upper->addr_assign_type == NET_ADDR_STOLEN) {
                        alb_send_lp_vid(slave, mac_addr,
                                        vlan_dev_vlan_proto(upper),
index 931d9d9..480f945 100644 (file)
@@ -1733,8 +1733,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
                goto err_upper_unlink;
        }
 
-       bond->nest_level = dev_get_nest_level(bond_dev) + 1;
-
        /* If the mode uses primary, then the following is handled by
         * bond_change_active_slave().
         */
@@ -1816,7 +1814,8 @@ err_detach:
        slave_disable_netpoll(new_slave);
 
 err_close:
-       slave_dev->priv_flags &= ~IFF_BONDING;
+       if (!netif_is_bond_master(slave_dev))
+               slave_dev->priv_flags &= ~IFF_BONDING;
        dev_close(slave_dev);
 
 err_restore_mac:
@@ -1956,9 +1955,6 @@ static int __bond_release_one(struct net_device *bond_dev,
        if (!bond_has_slaves(bond)) {
                bond_set_carrier(bond);
                eth_hw_addr_random(bond_dev);
-               bond->nest_level = SINGLE_DEPTH_NESTING;
-       } else {
-               bond->nest_level = dev_get_nest_level(bond_dev) + 1;
        }
 
        unblock_netpoll_tx();
@@ -2017,7 +2013,8 @@ static int __bond_release_one(struct net_device *bond_dev,
        else
                dev_set_mtu(slave_dev, slave->original_mtu);
 
-       slave_dev->priv_flags &= ~IFF_BONDING;
+       if (!netif_is_bond_master(slave_dev))
+               slave_dev->priv_flags &= ~IFF_BONDING;
 
        bond_free_slave(slave);
 
@@ -3442,13 +3439,6 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res,
        }
 }
 
-static int bond_get_nest_level(struct net_device *bond_dev)
-{
-       struct bonding *bond = netdev_priv(bond_dev);
-
-       return bond->nest_level;
-}
-
 static void bond_get_stats(struct net_device *bond_dev,
                           struct rtnl_link_stats64 *stats)
 {
@@ -3457,7 +3447,7 @@ static void bond_get_stats(struct net_device *bond_dev,
        struct list_head *iter;
        struct slave *slave;
 
-       spin_lock_nested(&bond->stats_lock, bond_get_nest_level(bond_dev));
+       spin_lock(&bond->stats_lock);
        memcpy(stats, &bond->bond_stats, sizeof(*stats));
 
        rcu_read_lock();
@@ -4039,7 +4029,7 @@ out:
                 * this to-be-skipped slave to send a packet out.
                 */
                old_arr = rtnl_dereference(bond->slave_arr);
-               for (idx = 0; idx < old_arr->count; idx++) {
+               for (idx = 0; old_arr != NULL && idx < old_arr->count; idx++) {
                        if (skipslave == old_arr->arr[idx]) {
                                old_arr->arr[idx] =
                                    old_arr->arr[old_arr->count-1];
@@ -4268,7 +4258,6 @@ static const struct net_device_ops bond_netdev_ops = {
        .ndo_neigh_setup        = bond_neigh_setup,
        .ndo_vlan_rx_add_vid    = bond_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = bond_vlan_rx_kill_vid,
-       .ndo_get_lock_subclass  = bond_get_nest_level,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_netpoll_setup      = bond_netpoll_setup,
        .ndo_netpoll_cleanup    = bond_netpoll_cleanup,
@@ -4296,7 +4285,6 @@ void bond_setup(struct net_device *bond_dev)
        struct bonding *bond = netdev_priv(bond_dev);
 
        spin_lock_init(&bond->mode_lock);
-       spin_lock_init(&bond->stats_lock);
        bond->params = bonding_defaults;
 
        /* Initialize pointers */
@@ -4365,6 +4353,7 @@ static void bond_uninit(struct net_device *bond_dev)
 
        list_del(&bond->bond_list);
 
+       lockdep_unregister_key(&bond->stats_lock_key);
        bond_debug_unregister(bond);
 }
 
@@ -4768,8 +4757,9 @@ static int bond_init(struct net_device *bond_dev)
        if (!bond->wq)
                return -ENOMEM;
 
-       bond->nest_level = SINGLE_DEPTH_NESTING;
-       netdev_lockdep_set_classes(bond_dev);
+       spin_lock_init(&bond->stats_lock);
+       lockdep_register_key(&bond->stats_lock_key);
+       lockdep_set_class(&bond->stats_lock, &bond->stats_lock_key);
 
        list_add_tail(&bond->bond_list, &bn->dev_list);
 
index 4b3d0dd..b412f7b 100644 (file)
@@ -15,10 +15,10 @@ config CAN_EMS_USB
          from EMS Dr. Thomas Wuensche (http://www.ems-wuensche.de).
 
 config CAN_ESD_USB2
-        tristate "ESD USB/2 CAN/USB interface"
-        ---help---
-          This driver supports the CAN-USB/2 interface
-          from esd electronic system design gmbh (http://www.esd.eu).
+       tristate "ESD USB/2 CAN/USB interface"
+       ---help---
+         This driver supports the CAN-USB/2 interface
+         from esd electronic system design gmbh (http://www.esd.eu).
 
 config CAN_GS_USB
        tristate "Geschwister Schneider UG interfaces"
index 526ba2a..cc35363 100644 (file)
@@ -1845,7 +1845,6 @@ int b53_mirror_add(struct dsa_switch *ds, int port,
                loc = B53_EG_MIR_CTL;
 
        b53_read16(dev, B53_MGMT_PAGE, loc, &reg);
-       reg &= ~MIRROR_MASK;
        reg |= BIT(port);
        b53_write16(dev, B53_MGMT_PAGE, loc, reg);
 
index 3bb4f91..55d280f 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
- *
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/*
  * Northstar Plus switch SerDes/SGMII PHY definitions
  *
  * Copyright (C) 2018 Florian Fainelli <f.fainelli@gmail.com>
index 26509fa..d44651a 100644 (file)
@@ -37,22 +37,11 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
        unsigned int i;
        u32 reg, offset;
 
-       if (priv->type == BCM7445_DEVICE_ID)
-               offset = CORE_STS_OVERRIDE_IMP;
-       else
-               offset = CORE_STS_OVERRIDE_IMP2;
-
        /* Enable the port memories */
        reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL);
        reg &= ~P_TXQ_PSM_VDD(port);
        core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL);
 
-       /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */
-       reg = core_readl(priv, CORE_IMP_CTL);
-       reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN);
-       reg &= ~(RX_DIS | TX_DIS);
-       core_writel(priv, reg, CORE_IMP_CTL);
-
        /* Enable forwarding */
        core_writel(priv, SW_FWDG_EN, CORE_SWMODE);
 
@@ -71,10 +60,27 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
 
        b53_brcm_hdr_setup(ds, port);
 
-       /* Force link status for IMP port */
-       reg = core_readl(priv, offset);
-       reg |= (MII_SW_OR | LINK_STS);
-       core_writel(priv, reg, offset);
+       if (port == 8) {
+               if (priv->type == BCM7445_DEVICE_ID)
+                       offset = CORE_STS_OVERRIDE_IMP;
+               else
+                       offset = CORE_STS_OVERRIDE_IMP2;
+
+               /* Force link status for IMP port */
+               reg = core_readl(priv, offset);
+               reg |= (MII_SW_OR | LINK_STS);
+               core_writel(priv, reg, offset);
+
+               /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */
+               reg = core_readl(priv, CORE_IMP_CTL);
+               reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN);
+               reg &= ~(RX_DIS | TX_DIS);
+               core_writel(priv, reg, CORE_IMP_CTL);
+       } else {
+               reg = core_readl(priv, CORE_G_PCTL_PORT(port));
+               reg &= ~(RX_DIS | TX_DIS);
+               core_writel(priv, reg, CORE_G_PCTL_PORT(port));
+       }
 }
 
 static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable)
index 1806631..e2be31f 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * PCE microcode extracted from UGW 7.1.1 switch api
  *
index a23d3ff..24a5e99 100644 (file)
@@ -1224,10 +1224,6 @@ static int ksz8795_switch_init(struct ksz_device *dev)
 {
        int i;
 
-       mutex_init(&dev->stats_mutex);
-       mutex_init(&dev->alu_mutex);
-       mutex_init(&dev->vlan_mutex);
-
        dev->ds->ops = &ksz8795_switch_ops;
 
        for (i = 0; i < ARRAY_SIZE(ksz8795_switch_chips); i++) {
index d0f8153..8b00f8e 100644 (file)
@@ -25,6 +25,7 @@ KSZ_REGMAP_TABLE(ksz8795, 16, SPI_ADDR_SHIFT,
 
 static int ksz8795_spi_probe(struct spi_device *spi)
 {
+       struct regmap_config rc;
        struct ksz_device *dev;
        int i, ret;
 
@@ -33,9 +34,9 @@ static int ksz8795_spi_probe(struct spi_device *spi)
                return -ENOMEM;
 
        for (i = 0; i < ARRAY_SIZE(ksz8795_regmap_config); i++) {
-               dev->regmap[i] = devm_regmap_init_spi(spi,
-                                                     &ksz8795_regmap_config
-                                                     [i]);
+               rc = ksz8795_regmap_config[i];
+               rc.lock_arg = &dev->regmap_mutex;
+               dev->regmap[i] = devm_regmap_init_spi(spi, &rc);
                if (IS_ERR(dev->regmap[i])) {
                        ret = PTR_ERR(dev->regmap[i]);
                        dev_err(&spi->dev,
index 0b1e01f..fdffd9e 100644 (file)
@@ -17,6 +17,7 @@ KSZ_REGMAP_TABLE(ksz9477, not_used, 16, 0, 0);
 static int ksz9477_i2c_probe(struct i2c_client *i2c,
                             const struct i2c_device_id *i2c_id)
 {
+       struct regmap_config rc;
        struct ksz_device *dev;
        int i, ret;
 
@@ -25,8 +26,9 @@ static int ksz9477_i2c_probe(struct i2c_client *i2c,
                return -ENOMEM;
 
        for (i = 0; i < ARRAY_SIZE(ksz9477_regmap_config); i++) {
-               dev->regmap[i] = devm_regmap_init_i2c(i2c,
-                                       &ksz9477_regmap_config[i]);
+               rc = ksz9477_regmap_config[i];
+               rc.lock_arg = &dev->regmap_mutex;
+               dev->regmap[i] = devm_regmap_init_i2c(i2c, &rc);
                if (IS_ERR(dev->regmap[i])) {
                        ret = PTR_ERR(dev->regmap[i]);
                        dev_err(&i2c->dev,
index 2938e89..16939f2 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
  * Microchip KSZ9477 register definitions
  *
  * Copyright (C) 2017-2018 Microchip Technology Inc.
index f4198d6..c5f6495 100644 (file)
@@ -24,6 +24,7 @@ KSZ_REGMAP_TABLE(ksz9477, 32, SPI_ADDR_SHIFT,
 
 static int ksz9477_spi_probe(struct spi_device *spi)
 {
+       struct regmap_config rc;
        struct ksz_device *dev;
        int i, ret;
 
@@ -32,8 +33,9 @@ static int ksz9477_spi_probe(struct spi_device *spi)
                return -ENOMEM;
 
        for (i = 0; i < ARRAY_SIZE(ksz9477_regmap_config); i++) {
-               dev->regmap[i] = devm_regmap_init_spi(spi,
-                                       &ksz9477_regmap_config[i]);
+               rc = ksz9477_regmap_config[i];
+               rc.lock_arg = &dev->regmap_mutex;
+               dev->regmap[i] = devm_regmap_init_spi(spi, &rc);
                if (IS_ERR(dev->regmap[i])) {
                        ret = PTR_ERR(dev->regmap[i]);
                        dev_err(&spi->dev,
index b0b870f..fe47180 100644 (file)
@@ -436,7 +436,7 @@ int ksz_switch_register(struct ksz_device *dev,
        }
 
        mutex_init(&dev->dev_mutex);
-       mutex_init(&dev->stats_mutex);
+       mutex_init(&dev->regmap_mutex);
        mutex_init(&dev->alu_mutex);
        mutex_init(&dev->vlan_mutex);
 
index a24d8e6..a20ebb7 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Microchip switch driver common header
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Microchip switch driver common header
  *
  * Copyright (C) 2017-2019 Microchip Technology Inc.
  */
@@ -47,7 +47,7 @@ struct ksz_device {
        const char *name;
 
        struct mutex dev_mutex;         /* device access */
-       struct mutex stats_mutex;       /* status access */
+       struct mutex regmap_mutex;      /* regmap access */
        struct mutex alu_mutex;         /* ALU access */
        struct mutex vlan_mutex;        /* vlan access */
        const struct ksz_dev_ops *dev_ops;
@@ -290,6 +290,18 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset,
        ksz_write32(dev, dev->dev_ops->get_port_addr(port, offset), data);
 }
 
+static inline void ksz_regmap_lock(void *__mtx)
+{
+       struct mutex *mtx = __mtx;
+       mutex_lock(mtx);
+}
+
+static inline void ksz_regmap_unlock(void *__mtx)
+{
+       struct mutex *mtx = __mtx;
+       mutex_unlock(mtx);
+}
+
 /* Regmap tables generation */
 #define KSZ_SPI_OP_RD          3
 #define KSZ_SPI_OP_WR          2
@@ -303,7 +315,7 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset,
        {                                                               \
                .name = #width,                                         \
                .val_bits = (width),                                    \
-               .reg_stride = (width) / 8,                              \
+               .reg_stride = 1,                                        \
                .reg_bits = (regbits) + (regalign),                     \
                .pad_bits = (regpad),                                   \
                .max_register = BIT(regbits) - 1,                       \
@@ -314,6 +326,8 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset,
                .write_flag_mask =                                      \
                        KSZ_SPI_OP_FLAG_MASK(KSZ_SPI_OP_WR, swp,        \
                                             regbits, regpad),          \
+               .lock = ksz_regmap_lock,                                \
+               .unlock = ksz_regmap_unlock,                            \
                .reg_format_endian = REGMAP_ENDIAN_BIG,                 \
                .val_format_endian = REGMAP_ENDIAN_BIG                  \
        }
index 16f15c9..b00274c 100644 (file)
@@ -705,7 +705,7 @@ qca8k_setup(struct dsa_switch *ds)
                    BIT(0) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S);
 
        /* Setup connection between CPU port & user ports */
-       for (i = 0; i < DSA_MAX_PORTS; i++) {
+       for (i = 0; i < QCA8K_NUM_PORTS; i++) {
                /* CPU port gets connected to all user ports of the switch */
                if (dsa_is_cpu_port(ds, i)) {
                        qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT),
@@ -936,6 +936,9 @@ qca8k_port_enable(struct dsa_switch *ds, int port,
 {
        struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 
+       if (!dsa_is_user_port(ds, port))
+               return 0;
+
        qca8k_port_set_status(priv, port, 1);
        priv->port_sts[port].enabled = 1;
 
@@ -1074,7 +1077,7 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
        if (id != QCA8K_ID_QCA8337)
                return -ENODEV;
 
-       priv->ds = dsa_switch_alloc(&mdiodev->dev, DSA_MAX_PORTS);
+       priv->ds = dsa_switch_alloc(&mdiodev->dev, QCA8K_NUM_PORTS);
        if (!priv->ds)
                return -ENOMEM;
 
index ca3d17e..ac88cac 100644 (file)
@@ -339,10 +339,12 @@ int rtl8366_vlan_prepare(struct dsa_switch *ds, int port,
                         const struct switchdev_obj_port_vlan *vlan)
 {
        struct realtek_smi *smi = ds->priv;
+       u16 vid;
        int ret;
 
-       if (!smi->ops->is_vlan_valid(smi, port))
-               return -EINVAL;
+       for (vid = vlan->vid_begin; vid < vlan->vid_end; vid++)
+               if (!smi->ops->is_vlan_valid(smi, vid))
+                       return -EINVAL;
 
        dev_info(smi->dev, "prepare VLANs %04x..%04x\n",
                 vlan->vid_begin, vlan->vid_end);
@@ -370,8 +372,9 @@ void rtl8366_vlan_add(struct dsa_switch *ds, int port,
        u16 vid;
        int ret;
 
-       if (!smi->ops->is_vlan_valid(smi, port))
-               return;
+       for (vid = vlan->vid_begin; vid < vlan->vid_end; vid++)
+               if (!smi->ops->is_vlan_valid(smi, vid))
+                       return;
 
        dev_info(smi->dev, "add VLAN on port %d, %s, %s\n",
                 port,
index a268085..f5cc8b0 100644 (file)
@@ -507,7 +507,8 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi)
        irq = of_irq_get(intc, 0);
        if (irq <= 0) {
                dev_err(smi->dev, "failed to get parent IRQ\n");
-               return irq ? irq : -EINVAL;
+               ret = irq ? irq : -EINVAL;
+               goto out_put_node;
        }
 
        /* This clears the IRQ status register */
@@ -515,7 +516,7 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi)
                          &val);
        if (ret) {
                dev_err(smi->dev, "can't read interrupt status\n");
-               return ret;
+               goto out_put_node;
        }
 
        /* Fetch IRQ edge information from the descriptor */
@@ -537,7 +538,7 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi)
                                 val);
        if (ret) {
                dev_err(smi->dev, "could not configure IRQ polarity\n");
-               return ret;
+               goto out_put_node;
        }
 
        ret = devm_request_threaded_irq(smi->dev, irq, NULL,
@@ -545,7 +546,7 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi)
                                        "RTL8366RB", smi);
        if (ret) {
                dev_err(smi->dev, "unable to request irq: %d\n", ret);
-               return ret;
+               goto out_put_node;
        }
        smi->irqdomain = irq_domain_add_linear(intc,
                                               RTL8366RB_NUM_INTERRUPT,
@@ -553,12 +554,15 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi)
                                               smi);
        if (!smi->irqdomain) {
                dev_err(smi->dev, "failed to create IRQ domain\n");
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out_put_node;
        }
        for (i = 0; i < smi->num_ports; i++)
                irq_set_parent(irq_create_mapping(smi->irqdomain, i), irq);
 
-       return 0;
+out_put_node:
+       of_node_put(intc);
+       return ret;
 }
 
 static int rtl8366rb_set_addr(struct realtek_smi *smi)
index 55424f3..ffac0ea 100644 (file)
@@ -26,7 +26,8 @@ config NET_DSA_SJA1105_PTP
 
 config NET_DSA_SJA1105_TAS
        bool "Support for the Time-Aware Scheduler on NXP SJA1105"
-       depends on NET_DSA_SJA1105
+       depends on NET_DSA_SJA1105 && NET_SCH_TAPRIO
+       depends on NET_SCH_TAPRIO=y || NET_DSA_SJA1105=m
        help
          This enables support for the TTEthernet-based egress scheduling
          engine in the SJA1105 DSA driver, which is controlled using a
index e53e494..fbb564c 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2018, Sensor-Technik Wiedemann GmbH
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Sensor-Technik Wiedemann GmbH
  * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #ifndef _SJA1105_H
index 740dadf..1fc0d13 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #ifndef _SJA1105_DYNAMIC_CONFIG_H
 #define _SJA1105_DYNAMIC_CONFIG_H
index b9def74..7687ddc 100644 (file)
@@ -1897,7 +1897,9 @@ static int sja1105_set_ageing_time(struct dsa_switch *ds,
        return sja1105_static_config_reload(priv);
 }
 
-/* Caller must hold priv->tagger_data.meta_lock */
+/* Must be called only with priv->tagger_data.state bit
+ * SJA1105_HWTS_RX_EN cleared
+ */
 static int sja1105_change_rxtstamping(struct sja1105_private *priv,
                                      bool on)
 {
@@ -1954,16 +1956,17 @@ static int sja1105_hwtstamp_set(struct dsa_switch *ds, int port,
                break;
        }
 
-       if (rx_on != priv->tagger_data.hwts_rx_en) {
-               spin_lock(&priv->tagger_data.meta_lock);
+       if (rx_on != test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state)) {
+               clear_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state);
+
                rc = sja1105_change_rxtstamping(priv, rx_on);
-               spin_unlock(&priv->tagger_data.meta_lock);
                if (rc < 0) {
                        dev_err(ds->dev,
                                "Failed to change RX timestamping: %d\n", rc);
-                       return -EFAULT;
+                       return rc;
                }
-               priv->tagger_data.hwts_rx_en = rx_on;
+               if (rx_on)
+                       set_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state);
        }
 
        if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
@@ -1982,7 +1985,7 @@ static int sja1105_hwtstamp_get(struct dsa_switch *ds, int port,
                config.tx_type = HWTSTAMP_TX_ON;
        else
                config.tx_type = HWTSTAMP_TX_OFF;
-       if (priv->tagger_data.hwts_rx_en)
+       if (test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state))
                config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
        else
                config.rx_filter = HWTSTAMP_FILTER_NONE;
@@ -2005,12 +2008,12 @@ static void sja1105_rxtstamp_work(struct work_struct *work)
 
        mutex_lock(&priv->ptp_lock);
 
-       now = priv->tstamp_cc.read(&priv->tstamp_cc);
-
        while ((skb = skb_dequeue(&data->skb_rxtstamp_queue)) != NULL) {
                struct skb_shared_hwtstamps *shwt = skb_hwtstamps(skb);
                u64 ts;
 
+               now = priv->tstamp_cc.read(&priv->tstamp_cc);
+
                *shwt = (struct skb_shared_hwtstamps) {0};
 
                ts = SJA1105_SKB_CB(skb)->meta_tstamp;
@@ -2031,7 +2034,7 @@ static bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
        struct sja1105_private *priv = ds->priv;
        struct sja1105_tagger_data *data = &priv->tagger_data;
 
-       if (!data->hwts_rx_en)
+       if (!test_bit(SJA1105_HWTS_RX_EN, &data->state))
                return false;
 
        /* We need to read the full PTP clock to reconstruct the Rx
@@ -2201,6 +2204,7 @@ static int sja1105_probe(struct spi_device *spi)
        tagger_data = &priv->tagger_data;
        skb_queue_head_init(&tagger_data->skb_rxtstamp_queue);
        INIT_WORK(&tagger_data->rxtstamp_work, sja1105_rxtstamp_work);
+       spin_lock_init(&tagger_data->meta_lock);
 
        /* Connections between dsa_port and sja1105_port */
        for (i = 0; i < SJA1105_NUM_PORTS; i++) {
index af456b0..394e12a 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #ifndef _SJA1105_PTP_H
 #define _SJA1105_PTP_H
index 84dc603..58dd37e 100644 (file)
@@ -409,7 +409,8 @@ int sja1105_static_config_upload(struct sja1105_private *priv)
        rc = static_config_buf_prepare_for_upload(priv, config_buf, buf_len);
        if (rc < 0) {
                dev_err(dev, "Invalid config, cannot upload\n");
-               return -EINVAL;
+               rc = -EINVAL;
+               goto out;
        }
        /* Prevent PHY jabbering during switch reset by inhibiting
         * Tx on all ports and waiting for current packet to drain.
@@ -418,7 +419,8 @@ int sja1105_static_config_upload(struct sja1105_private *priv)
        rc = sja1105_inhibit_tx(priv, port_bitmap, true);
        if (rc < 0) {
                dev_err(dev, "Failed to inhibit Tx on ports\n");
-               return -ENXIO;
+               rc = -ENXIO;
+               goto out;
        }
        /* Wait for an eventual egress packet to finish transmission
         * (reach IFG). It is guaranteed that a second one will not
index 7f87022..f4a5c5c 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright (c) 2016-2018, NXP Semiconductors
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2016-2018, NXP Semiconductors
  * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #ifndef _SJA1105_STATIC_CONFIG_H
index 0b803c3..0aad212 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #ifndef _SJA1105_TAS_H
 #define _SJA1105_TAS_H
index 1e2de9d..e8e9c16 100644 (file)
@@ -140,17 +140,6 @@ source "drivers/net/ethernet/neterion/Kconfig"
 source "drivers/net/ethernet/netronome/Kconfig"
 source "drivers/net/ethernet/ni/Kconfig"
 source "drivers/net/ethernet/8390/Kconfig"
-
-config NET_NETX
-       tristate "NetX Ethernet support"
-       select MII
-       depends on ARCH_NETX
-       ---help---
-         This is support for the Hilscher netX builtin Ethernet ports
-
-         To compile this driver as a module, choose M here. The module
-         will be called netx-eth.
-
 source "drivers/net/ethernet/nvidia/Kconfig"
 source "drivers/net/ethernet/nxp/Kconfig"
 source "drivers/net/ethernet/oki-semi/Kconfig"
index 77f9838..05abebc 100644 (file)
@@ -64,7 +64,6 @@ obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/
 obj-$(CONFIG_NET_VENDOR_NETERION) += neterion/
 obj-$(CONFIG_NET_VENDOR_NETRONOME) += netronome/
 obj-$(CONFIG_NET_VENDOR_NI) += ni/
-obj-$(CONFIG_NET_NETX) += netx-eth.o
 obj-$(CONFIG_NET_VENDOR_NVIDIA) += nvidia/
 obj-$(CONFIG_LPC_ENET) += nxp/
 obj-$(CONFIG_NET_VENDOR_OKI) += oki-semi/
index a5e2bcb..264a482 100644 (file)
@@ -21,17 +21,17 @@ config NET_VENDOR_ALLWINNER
 if NET_VENDOR_ALLWINNER
 
 config SUN4I_EMAC
-        tristate "Allwinner A10 EMAC support"
+       tristate "Allwinner A10 EMAC support"
        depends on ARCH_SUNXI
        depends on OF
        select CRC32
        select MII
        select PHYLIB
        select MDIO_SUN4I
-        ---help---
-          Support for Allwinner A10 EMAC ethernet driver.
+       ---help---
+         Support for Allwinner A10 EMAC ethernet driver.
 
-          To compile this driver as a module, choose M here.  The module
-          will be called sun4i-emac.
+         To compile this driver as a module, choose M here.  The module
+         will be called sun4i-emac.
 
 endif # NET_VENDOR_ALLWINNER
index 69ca99d..cca72a7 100644 (file)
@@ -19,6 +19,7 @@ if NET_VENDOR_AMAZON
 config ENA_ETHERNET
        tristate "Elastic Network Adapter (ENA) support"
        depends on PCI_MSI && !CPU_BIG_ENDIAN
+       select DIMLIB
        ---help---
          This driver supports Elastic Network Adapter (ENA)"
 
index 38046bf..2845ac2 100644 (file)
@@ -211,8 +211,8 @@ static int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq)
 
                pkt_ctrl->curr_bounce_buf =
                        ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl);
-                       memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
-                              0x0, llq_info->desc_list_entry_size);
+               memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
+                      0x0, llq_info->desc_list_entry_size);
 
                pkt_ctrl->idx = 0;
                if (unlikely(llq_info->desc_stride_ctrl == ENA_ADMIN_SINGLE_DESC_PER_ENTRY))
index b4a0fb2..bb65dd3 100644 (file)
@@ -194,9 +194,7 @@ static void aq_ndev_set_multicast_settings(struct net_device *ndev)
 {
        struct aq_nic_s *aq_nic = netdev_priv(ndev);
 
-       aq_nic_set_packet_filter(aq_nic, ndev->flags);
-
-       aq_nic_set_multicast_list(aq_nic, ndev);
+       (void)aq_nic_set_multicast_list(aq_nic, ndev);
 }
 
 static int aq_ndo_vlan_rx_add_vid(struct net_device *ndev, __be16 proto,
index 8f66e78..137c1de 100644 (file)
@@ -631,9 +631,12 @@ err_exit:
 
 int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev)
 {
-       unsigned int packet_filter = self->packet_filter;
+       const struct aq_hw_ops *hw_ops = self->aq_hw_ops;
+       struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
+       unsigned int packet_filter = ndev->flags;
        struct netdev_hw_addr *ha = NULL;
        unsigned int i = 0U;
+       int err = 0;
 
        self->mc_list.count = 0;
        if (netdev_uc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) {
@@ -641,29 +644,28 @@ int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev)
        } else {
                netdev_for_each_uc_addr(ha, ndev) {
                        ether_addr_copy(self->mc_list.ar[i++], ha->addr);
-
-                       if (i >= AQ_HW_MULTICAST_ADDRESS_MAX)
-                               break;
                }
        }
 
-       if (i + netdev_mc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) {
-               packet_filter |= IFF_ALLMULTI;
-       } else {
-               netdev_for_each_mc_addr(ha, ndev) {
-                       ether_addr_copy(self->mc_list.ar[i++], ha->addr);
-
-                       if (i >= AQ_HW_MULTICAST_ADDRESS_MAX)
-                               break;
+       cfg->is_mc_list_enabled = !!(packet_filter & IFF_MULTICAST);
+       if (cfg->is_mc_list_enabled) {
+               if (i + netdev_mc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) {
+                       packet_filter |= IFF_ALLMULTI;
+               } else {
+                       netdev_for_each_mc_addr(ha, ndev) {
+                               ether_addr_copy(self->mc_list.ar[i++],
+                                               ha->addr);
+                       }
                }
        }
 
        if (i > 0 && i <= AQ_HW_MULTICAST_ADDRESS_MAX) {
-               packet_filter |= IFF_MULTICAST;
                self->mc_list.count = i;
-               self->aq_hw_ops->hw_multicast_list_set(self->aq_hw,
-                                                      self->mc_list.ar,
-                                                      self->mc_list.count);
+               err = hw_ops->hw_multicast_list_set(self->aq_hw,
+                                                   self->mc_list.ar,
+                                                   self->mc_list.count);
+               if (err < 0)
+                       return err;
        }
        return aq_nic_set_packet_filter(self, packet_filter);
 }
index 3901d79..76bdbe1 100644 (file)
@@ -313,6 +313,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                                        break;
 
                                buff->is_error |= buff_->is_error;
+                               buff->is_cso_err |= buff_->is_cso_err;
 
                        } while (!buff_->is_eop);
 
@@ -320,7 +321,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                                err = 0;
                                goto err_exit;
                        }
-                       if (buff->is_error) {
+                       if (buff->is_error || buff->is_cso_err) {
                                buff_ = buff;
                                do {
                                        next_ = buff_->next,
index 28892b8..a95c263 100644 (file)
@@ -306,15 +306,13 @@ irqreturn_t aq_vec_isr_legacy(int irq, void *private)
 {
        struct aq_vec_s *self = private;
        u64 irq_mask = 0U;
-       irqreturn_t err = 0;
+       int err;
 
-       if (!self) {
-               err = -EINVAL;
-               goto err_exit;
-       }
+       if (!self)
+               return IRQ_NONE;
        err = self->aq_hw_ops->hw_irq_read(self->aq_hw, &irq_mask);
        if (err < 0)
-               goto err_exit;
+               return IRQ_NONE;
 
        if (irq_mask) {
                self->aq_hw_ops->hw_irq_disable(self->aq_hw,
@@ -322,11 +320,10 @@ irqreturn_t aq_vec_isr_legacy(int irq, void *private)
                napi_schedule(&self->napi);
        } else {
                self->aq_hw_ops->hw_irq_enable(self->aq_hw, 1U);
-               err = IRQ_NONE;
+               return IRQ_NONE;
        }
 
-err_exit:
-       return err >= 0 ? IRQ_HANDLED : IRQ_NONE;
+       return IRQ_HANDLED;
 }
 
 cpumask_t *aq_vec_get_affinity_mask(struct aq_vec_s *self)
index 30f7fc4..2ad3fa6 100644 (file)
@@ -818,14 +818,15 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,
                                     cfg->is_vlan_force_promisc);
 
        hw_atl_rpfl2multicast_flr_en_set(self,
-                                        IS_FILTER_ENABLED(IFF_ALLMULTI), 0);
+                                        IS_FILTER_ENABLED(IFF_ALLMULTI) &&
+                                        IS_FILTER_ENABLED(IFF_MULTICAST), 0);
 
        hw_atl_rpfl2_accept_all_mc_packets_set(self,
-                                              IS_FILTER_ENABLED(IFF_ALLMULTI));
+                                             IS_FILTER_ENABLED(IFF_ALLMULTI) &&
+                                             IS_FILTER_ENABLED(IFF_MULTICAST));
 
        hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST));
 
-       cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST);
 
        for (i = HW_ATL_B0_MAC_MIN; i < HW_ATL_B0_MAC_MAX; ++i)
                hw_atl_rpfl2_uc_flr_en_set(self,
@@ -968,14 +969,26 @@ static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self)
 
 static int hw_atl_b0_hw_stop(struct aq_hw_s *self)
 {
+       int err;
+       u32 val;
+
        hw_atl_b0_hw_irq_disable(self, HW_ATL_B0_INT_MASK);
 
        /* Invalidate Descriptor Cache to prevent writing to the cached
         * descriptors and to the data pointer of those descriptors
         */
-       hw_atl_rdm_rx_dma_desc_cache_init_set(self, 1);
+       hw_atl_rdm_rx_dma_desc_cache_init_tgl(self);
 
-       return aq_hw_err_from_flags(self);
+       err = aq_hw_err_from_flags(self);
+
+       if (err)
+               goto err_exit;
+
+       readx_poll_timeout_atomic(hw_atl_rdm_rx_dma_desc_cache_init_done_get,
+                                 self, val, val == 1, 1000U, 10000U);
+
+err_exit:
+       return err;
 }
 
 static int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self,
index 1149812..6f34069 100644 (file)
@@ -606,12 +606,25 @@ void hw_atl_rpb_rx_flow_ctl_mode_set(struct aq_hw_s *aq_hw, u32 rx_flow_ctl_mode
                            HW_ATL_RPB_RX_FC_MODE_SHIFT, rx_flow_ctl_mode);
 }
 
-void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init)
+void hw_atl_rdm_rx_dma_desc_cache_init_tgl(struct aq_hw_s *aq_hw)
 {
+       u32 val;
+
+       val = aq_hw_read_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR,
+                                HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK,
+                                HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT);
+
        aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR,
                            HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK,
                            HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT,
-                           init);
+                           val ^ 1);
+}
+
+u32 hw_atl_rdm_rx_dma_desc_cache_init_done_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg_bit(aq_hw, RDM_RX_DMA_DESC_CACHE_INIT_DONE_ADR,
+                                 RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSK,
+                                 RDM_RX_DMA_DESC_CACHE_INIT_DONE_SHIFT);
 }
 
 void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
index 0c37abb..c3ee278 100644 (file)
@@ -313,8 +313,11 @@ void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
                                            u32 rx_pkt_buff_size_per_tc,
                                            u32 buffer);
 
-/* set rdm rx dma descriptor cache init */
-void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init);
+/* toggle rdm rx dma descriptor cache init */
+void hw_atl_rdm_rx_dma_desc_cache_init_tgl(struct aq_hw_s *aq_hw);
+
+/* get rdm rx dma descriptor cache init done */
+u32 hw_atl_rdm_rx_dma_desc_cache_init_done_get(struct aq_hw_s *aq_hw);
 
 /* set rx xoff enable (per tc) */
 void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_xoff_en_per_tc,
index c3febcd..35887ad 100644 (file)
 /* default value of bitfield rdm_desc_init_i */
 #define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_DEFAULT 0x0
 
+/* rdm_desc_init_done_i bitfield definitions
+ * preprocessor definitions for the bitfield rdm_desc_init_done_i.
+ * port="pif_rdm_desc_init_done_i"
+ */
+
+/* register address for bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_ADR 0x00005a10
+/* bitmask for bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSK 0x00000001U
+/* inverted bitmask for bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSKN 0xfffffffe
+/* lower bit position of bitfield  rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_SHIFT 0U
+/* width of bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_WIDTH 1
+/* default value of bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_DEFAULT 0x0
+
+
 /* rx int_desc_wrb_en bitfield definitions
  * preprocessor definitions for the bitfield "int_desc_wrb_en".
  * port="pif_rdm_int_desc_wrb_en_i"
index da72648..7bc51f8 100644 (file)
@@ -337,7 +337,7 @@ static int aq_fw2x_get_phy_temp(struct aq_hw_s *self, int *temp)
        /* Convert PHY temperature from 1/256 degree Celsius
         * to 1/1000 degree Celsius.
         */
-       *temp = temp_res  * 1000 / 256;
+       *temp = (temp_res & 0xFFFF) * 1000 / 256;
 
        return 0;
 }
index 42d2e1b..664d664 100644 (file)
@@ -256,6 +256,9 @@ static int emac_rockchip_remove(struct platform_device *pdev)
        if (priv->regulator)
                regulator_disable(priv->regulator);
 
+       if (priv->soc_data->need_div_macclk)
+               clk_disable_unprepare(priv->macclk);
+
        free_netdev(ndev);
        return err;
 }
index 7548247..1b1a090 100644 (file)
@@ -526,7 +526,7 @@ static int ag71xx_mdio_probe(struct ag71xx *ag)
        struct device *dev = &ag->pdev->dev;
        struct net_device *ndev = ag->ndev;
        static struct mii_bus *mii_bus;
-       struct device_node *np;
+       struct device_node *np, *mnp;
        int err;
 
        np = dev->of_node;
@@ -571,7 +571,9 @@ static int ag71xx_mdio_probe(struct ag71xx *ag)
                msleep(200);
        }
 
-       err = of_mdiobus_register(mii_bus, np);
+       mnp = of_get_child_by_name(np, "mdio");
+       err = of_mdiobus_register(mii_bus, mnp);
+       of_node_put(mnp);
        if (err)
                goto mdio_err_put_clk;
 
index e24f5d2..53055ce 100644 (file)
@@ -8,7 +8,6 @@ config NET_VENDOR_BROADCOM
        default y
        depends on (SSB_POSSIBLE && HAS_DMA) || PCI || BCM63XX || \
                   SIBYTE_SB1xxx_SOC
-       select DIMLIB
        ---help---
          If you have a network (Ethernet) chipset belonging to this class,
          say Y.
@@ -69,6 +68,7 @@ config BCMGENET
        select FIXED_PHY
        select BCM7XXX_PHY
        select MDIO_BCM_UNIMAC
+       select DIMLIB
        help
          This driver supports the built-in Ethernet MACs found in the
          Broadcom BCM7xxx Set Top Box family chipset.
@@ -188,6 +188,7 @@ config SYSTEMPORT
        select MII
        select PHYLIB
        select FIXED_PHY
+       select DIMLIB
        help
          This driver supports the built-in Ethernet MACs found in the
          Broadcom BCM7xxx Set Top Box family chipset using an internal
@@ -200,6 +201,7 @@ config BNXT
        select LIBCRC32C
        select NET_DEVLINK
        select PAGE_POOL
+       select DIMLIB
        ---help---
          This driver supports Broadcom NetXtreme-C/E 10/25/40/50 gigabit
          Ethernet cards.  To compile this driver as a module, choose M here:
index 7df887e..a977a45 100644 (file)
@@ -2481,7 +2481,7 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 
        priv->phy_interface = of_get_phy_mode(dn);
        /* Default to GMII interface mode */
-       if (priv->phy_interface < 0)
+       if ((int)priv->phy_interface < 0)
                priv->phy_interface = PHY_INTERFACE_MODE_GMII;
 
        /* In the case of a fixed PHY, the DT node associated
index b4a8cf6..04ec909 100644 (file)
@@ -10382,7 +10382,8 @@ static void bnxt_cleanup_pci(struct bnxt *bp)
 {
        bnxt_unmap_bars(bp, bp->pdev);
        pci_release_regions(bp->pdev);
-       pci_disable_device(bp->pdev);
+       if (pci_is_enabled(bp->pdev))
+               pci_disable_device(bp->pdev);
 }
 
 static void bnxt_init_dflt_coal(struct bnxt *bp)
@@ -10669,14 +10670,11 @@ static void bnxt_fw_reset_task(struct work_struct *work)
                bp->fw_reset_state = BNXT_FW_RESET_STATE_RESET_FW;
        }
        /* fall through */
-       case BNXT_FW_RESET_STATE_RESET_FW: {
-               u32 wait_dsecs = bp->fw_health->post_reset_wait_dsecs;
-
+       case BNXT_FW_RESET_STATE_RESET_FW:
                bnxt_reset_all(bp);
                bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV;
-               bnxt_queue_fw_reset_work(bp, wait_dsecs * HZ / 10);
+               bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10);
                return;
-       }
        case BNXT_FW_RESET_STATE_ENABLE_DEV:
                if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) &&
                    bp->fw_health) {
index e664392..7151244 100644 (file)
@@ -29,25 +29,20 @@ static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
        val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
        health_status = val & 0xffff;
 
-       if (health_status == BNXT_FW_STATUS_HEALTHY) {
-               rc = devlink_fmsg_string_pair_put(fmsg, "FW status",
-                                                 "Healthy;");
-               if (rc)
-                       return rc;
-       } else if (health_status < BNXT_FW_STATUS_HEALTHY) {
-               rc = devlink_fmsg_string_pair_put(fmsg, "FW status",
-                                                 "Not yet completed initialization;");
+       if (health_status < BNXT_FW_STATUS_HEALTHY) {
+               rc = devlink_fmsg_string_pair_put(fmsg, "Description",
+                                                 "Not yet completed initialization");
                if (rc)
                        return rc;
        } else if (health_status > BNXT_FW_STATUS_HEALTHY) {
-               rc = devlink_fmsg_string_pair_put(fmsg, "FW status",
-                                                 "Encountered fatal error and cannot recover;");
+               rc = devlink_fmsg_string_pair_put(fmsg, "Description",
+                                                 "Encountered fatal error and cannot recover");
                if (rc)
                        return rc;
        }
 
        if (val >> 16) {
-               rc = devlink_fmsg_u32_pair_put(fmsg, "Error", val >> 16);
+               rc = devlink_fmsg_u32_pair_put(fmsg, "Error code", val >> 16);
                if (rc)
                        return rc;
        }
@@ -215,25 +210,68 @@ enum bnxt_dl_param_id {
 
 static const struct bnxt_dl_nvm_param nvm_params[] = {
        {DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV, NVM_OFF_ENABLE_SRIOV,
-        BNXT_NVM_SHARED_CFG, 1},
+        BNXT_NVM_SHARED_CFG, 1, 1},
        {DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI, NVM_OFF_IGNORE_ARI,
-        BNXT_NVM_SHARED_CFG, 1},
+        BNXT_NVM_SHARED_CFG, 1, 1},
        {DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
-        NVM_OFF_MSIX_VEC_PER_PF_MAX, BNXT_NVM_SHARED_CFG, 10},
+        NVM_OFF_MSIX_VEC_PER_PF_MAX, BNXT_NVM_SHARED_CFG, 10, 4},
        {DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
-        NVM_OFF_MSIX_VEC_PER_PF_MIN, BNXT_NVM_SHARED_CFG, 7},
+        NVM_OFF_MSIX_VEC_PER_PF_MIN, BNXT_NVM_SHARED_CFG, 7, 4},
        {BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK, NVM_OFF_DIS_GRE_VER_CHECK,
-        BNXT_NVM_SHARED_CFG, 1},
+        BNXT_NVM_SHARED_CFG, 1, 1},
 };
 
+union bnxt_nvm_data {
+       u8      val8;
+       __le32  val32;
+};
+
+static void bnxt_copy_to_nvm_data(union bnxt_nvm_data *dst,
+                                 union devlink_param_value *src,
+                                 int nvm_num_bits, int dl_num_bytes)
+{
+       u32 val32 = 0;
+
+       if (nvm_num_bits == 1) {
+               dst->val8 = src->vbool;
+               return;
+       }
+       if (dl_num_bytes == 4)
+               val32 = src->vu32;
+       else if (dl_num_bytes == 2)
+               val32 = (u32)src->vu16;
+       else if (dl_num_bytes == 1)
+               val32 = (u32)src->vu8;
+       dst->val32 = cpu_to_le32(val32);
+}
+
+static void bnxt_copy_from_nvm_data(union devlink_param_value *dst,
+                                   union bnxt_nvm_data *src,
+                                   int nvm_num_bits, int dl_num_bytes)
+{
+       u32 val32;
+
+       if (nvm_num_bits == 1) {
+               dst->vbool = src->val8;
+               return;
+       }
+       val32 = le32_to_cpu(src->val32);
+       if (dl_num_bytes == 4)
+               dst->vu32 = val32;
+       else if (dl_num_bytes == 2)
+               dst->vu16 = (u16)val32;
+       else if (dl_num_bytes == 1)
+               dst->vu8 = (u8)val32;
+}
+
 static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
                             int msg_len, union devlink_param_value *val)
 {
        struct hwrm_nvm_get_variable_input *req = msg;
-       void *data_addr = NULL, *buf = NULL;
        struct bnxt_dl_nvm_param nvm_param;
-       int bytesize, idx = 0, rc, i;
+       union bnxt_nvm_data *data;
        dma_addr_t data_dma_addr;
+       int idx = 0, rc, i;
 
        /* Get/Set NVM CFG parameter is supported only on PFs */
        if (BNXT_VF(bp))
@@ -254,47 +292,31 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
        else if (nvm_param.dir_type == BNXT_NVM_FUNC_CFG)
                idx = bp->pf.fw_fid - BNXT_FIRST_PF_FID;
 
-       bytesize = roundup(nvm_param.num_bits, BITS_PER_BYTE) / BITS_PER_BYTE;
-       switch (bytesize) {
-       case 1:
-               if (nvm_param.num_bits == 1)
-                       buf = &val->vbool;
-               else
-                       buf = &val->vu8;
-               break;
-       case 2:
-               buf = &val->vu16;
-               break;
-       case 4:
-               buf = &val->vu32;
-               break;
-       default:
-               return -EFAULT;
-       }
-
-       data_addr = dma_alloc_coherent(&bp->pdev->dev, bytesize,
-                                      &data_dma_addr, GFP_KERNEL);
-       if (!data_addr)
+       data = dma_alloc_coherent(&bp->pdev->dev, sizeof(*data),
+                                 &data_dma_addr, GFP_KERNEL);
+       if (!data)
                return -ENOMEM;
 
        req->dest_data_addr = cpu_to_le64(data_dma_addr);
-       req->data_len = cpu_to_le16(nvm_param.num_bits);
+       req->data_len = cpu_to_le16(nvm_param.nvm_num_bits);
        req->option_num = cpu_to_le16(nvm_param.offset);
        req->index_0 = cpu_to_le16(idx);
        if (idx)
                req->dimensions = cpu_to_le16(1);
 
        if (req->req_type == cpu_to_le16(HWRM_NVM_SET_VARIABLE)) {
-               memcpy(data_addr, buf, bytesize);
+               bnxt_copy_to_nvm_data(data, val, nvm_param.nvm_num_bits,
+                                     nvm_param.dl_num_bytes);
                rc = hwrm_send_message(bp, msg, msg_len, HWRM_CMD_TIMEOUT);
        } else {
                rc = hwrm_send_message_silent(bp, msg, msg_len,
                                              HWRM_CMD_TIMEOUT);
+               if (!rc)
+                       bnxt_copy_from_nvm_data(val, data,
+                                               nvm_param.nvm_num_bits,
+                                               nvm_param.dl_num_bytes);
        }
-       if (!rc && req->req_type == cpu_to_le16(HWRM_NVM_GET_VARIABLE))
-               memcpy(buf, data_addr, bytesize);
-
-       dma_free_coherent(&bp->pdev->dev, bytesize, data_addr, data_dma_addr);
+       dma_free_coherent(&bp->pdev->dev, sizeof(*data), data, data_dma_addr);
        if (rc == -EACCES)
                netdev_err(bp->dev, "PF does not have admin privileges to modify NVM config\n");
        return rc;
index b97e0ba..2f4fd0a 100644 (file)
@@ -52,7 +52,8 @@ struct bnxt_dl_nvm_param {
        u16 id;
        u16 offset;
        u16 dir_type;
-       u16 num_bits;
+       u16 nvm_num_bits;
+       u8 dl_num_bytes;
 };
 
 void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event);
index 12cb77e..0f13828 100644 (file)
@@ -2018,6 +2018,8 @@ static void bcmgenet_link_intr_enable(struct bcmgenet_priv *priv)
         */
        if (priv->internal_phy) {
                int0_enable |= UMAC_IRQ_LINK_EVENT;
+               if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv))
+                       int0_enable |= UMAC_IRQ_PHY_DET_R;
        } else if (priv->ext_phy) {
                int0_enable |= UMAC_IRQ_LINK_EVENT;
        } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
@@ -2611,11 +2613,14 @@ static void bcmgenet_irq_task(struct work_struct *work)
        priv->irq0_stat = 0;
        spin_unlock_irq(&priv->lock);
 
+       if (status & UMAC_IRQ_PHY_DET_R &&
+           priv->dev->phydev->autoneg != AUTONEG_ENABLE)
+               phy_init_hw(priv->dev->phydev);
+
        /* Link UP/DOWN event */
-       if (status & UMAC_IRQ_LINK_EVENT) {
-               priv->dev->phydev->link = !!(status & UMAC_IRQ_LINK_UP);
+       if (status & UMAC_IRQ_LINK_EVENT)
                phy_mac_interrupt(priv->dev->phydev);
-       }
+
 }
 
 /* bcmgenet_isr1: handle Rx and Tx priority queues */
@@ -2710,7 +2715,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
        }
 
        /* all other interested interrupts handled in bottom half */
-       status &= UMAC_IRQ_LINK_EVENT;
+       status &= (UMAC_IRQ_LINK_EVENT | UMAC_IRQ_PHY_DET_R);
        if (status) {
                /* Save irq status for bottom-half processing. */
                spin_lock_irqsave(&priv->lock, flags);
@@ -2874,6 +2879,12 @@ static int bcmgenet_open(struct net_device *dev)
        if (priv->internal_phy)
                bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
 
+       ret = bcmgenet_mii_connect(dev);
+       if (ret) {
+               netdev_err(dev, "failed to connect to PHY\n");
+               goto err_clk_disable;
+       }
+
        /* take MAC out of reset */
        bcmgenet_umac_reset(priv);
 
@@ -2883,6 +2894,12 @@ static int bcmgenet_open(struct net_device *dev)
        reg = bcmgenet_umac_readl(priv, UMAC_CMD);
        priv->crc_fwd_en = !!(reg & CMD_CRC_FWD);
 
+       ret = bcmgenet_mii_config(dev, true);
+       if (ret) {
+               netdev_err(dev, "unsupported PHY\n");
+               goto err_disconnect_phy;
+       }
+
        bcmgenet_set_hw_addr(priv, dev->dev_addr);
 
        if (priv->internal_phy) {
@@ -2898,7 +2915,7 @@ static int bcmgenet_open(struct net_device *dev)
        ret = bcmgenet_init_dma(priv);
        if (ret) {
                netdev_err(dev, "failed to initialize DMA\n");
-               goto err_clk_disable;
+               goto err_disconnect_phy;
        }
 
        /* Always enable ring 16 - descriptor ring */
@@ -2921,25 +2938,19 @@ static int bcmgenet_open(struct net_device *dev)
                goto err_irq0;
        }
 
-       ret = bcmgenet_mii_probe(dev);
-       if (ret) {
-               netdev_err(dev, "failed to connect to PHY\n");
-               goto err_irq1;
-       }
-
        bcmgenet_netif_start(dev);
 
        netif_tx_start_all_queues(dev);
 
        return 0;
 
-err_irq1:
-       free_irq(priv->irq1, priv);
 err_irq0:
        free_irq(priv->irq0, priv);
 err_fini_dma:
        bcmgenet_dma_teardown(priv);
        bcmgenet_fini_dma(priv);
+err_disconnect_phy:
+       phy_disconnect(dev->phydev);
 err_clk_disable:
        if (priv->internal_phy)
                bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
@@ -3620,6 +3631,8 @@ static int bcmgenet_resume(struct device *d)
        if (priv->internal_phy)
                bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
 
+       phy_init_hw(dev->phydev);
+
        bcmgenet_umac_reset(priv);
 
        init_umac(priv);
@@ -3628,8 +3641,6 @@ static int bcmgenet_resume(struct device *d)
        if (priv->wolopts)
                clk_disable_unprepare(priv->clk_wol);
 
-       phy_init_hw(dev->phydev);
-
        /* Speed settings must be restored */
        bcmgenet_mii_config(priv->dev, false);
 
index 4a8fc03..7fbf573 100644 (file)
@@ -366,6 +366,7 @@ struct bcmgenet_mib_counters {
 #define  EXT_PWR_DOWN_PHY_EN           (1 << 20)
 
 #define EXT_RGMII_OOB_CTRL             0x0C
+#define  RGMII_MODE_EN_V123            (1 << 0)
 #define  RGMII_LINK                    (1 << 4)
 #define  OOB_DISABLE                   (1 << 5)
 #define  RGMII_MODE_EN                 (1 << 6)
@@ -719,8 +720,8 @@ GENET_IO_MACRO(rbuf, GENET_RBUF_OFF);
 
 /* MDIO routines */
 int bcmgenet_mii_init(struct net_device *dev);
+int bcmgenet_mii_connect(struct net_device *dev);
 int bcmgenet_mii_config(struct net_device *dev, bool init);
-int bcmgenet_mii_probe(struct net_device *dev);
 void bcmgenet_mii_exit(struct net_device *dev);
 void bcmgenet_phy_power_set(struct net_device *dev, bool enable);
 void bcmgenet_mii_setup(struct net_device *dev);
index 970e478..17bb8d6 100644 (file)
@@ -173,6 +173,46 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
                                          bcmgenet_fixed_phy_link_update);
 }
 
+int bcmgenet_mii_connect(struct net_device *dev)
+{
+       struct bcmgenet_priv *priv = netdev_priv(dev);
+       struct device_node *dn = priv->pdev->dev.of_node;
+       struct phy_device *phydev;
+       u32 phy_flags = 0;
+       int ret;
+
+       /* Communicate the integrated PHY revision */
+       if (priv->internal_phy)
+               phy_flags = priv->gphy_rev;
+
+       /* Initialize link state variables that bcmgenet_mii_setup() uses */
+       priv->old_link = -1;
+       priv->old_speed = -1;
+       priv->old_duplex = -1;
+       priv->old_pause = -1;
+
+       if (dn) {
+               phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup,
+                                       phy_flags, priv->phy_interface);
+               if (!phydev) {
+                       pr_err("could not attach to PHY\n");
+                       return -ENODEV;
+               }
+       } else {
+               phydev = dev->phydev;
+               phydev->dev_flags = phy_flags;
+
+               ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
+                                        priv->phy_interface);
+               if (ret) {
+                       pr_err("could not attach to PHY\n");
+                       return -ENODEV;
+               }
+       }
+
+       return 0;
+}
+
 int bcmgenet_mii_config(struct net_device *dev, bool init)
 {
        struct bcmgenet_priv *priv = netdev_priv(dev);
@@ -258,74 +298,29 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
         */
        if (priv->ext_phy) {
                reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL);
-               reg |= RGMII_MODE_EN | id_mode_dis;
+               reg |= id_mode_dis;
+               if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv))
+                       reg |= RGMII_MODE_EN_V123;
+               else
+                       reg |= RGMII_MODE_EN;
                bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL);
        }
 
-       if (init)
-               dev_info(kdev, "configuring instance for %s\n", phy_name);
-
-       return 0;
-}
-
-int bcmgenet_mii_probe(struct net_device *dev)
-{
-       struct bcmgenet_priv *priv = netdev_priv(dev);
-       struct device_node *dn = priv->pdev->dev.of_node;
-       struct phy_device *phydev;
-       u32 phy_flags;
-       int ret;
-
-       /* Communicate the integrated PHY revision */
-       phy_flags = priv->gphy_rev;
-
-       /* Initialize link state variables that bcmgenet_mii_setup() uses */
-       priv->old_link = -1;
-       priv->old_speed = -1;
-       priv->old_duplex = -1;
-       priv->old_pause = -1;
-
-       if (dn) {
-               phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup,
-                                       phy_flags, priv->phy_interface);
-               if (!phydev) {
-                       pr_err("could not attach to PHY\n");
-                       return -ENODEV;
-               }
-       } else {
-               phydev = dev->phydev;
-               phydev->dev_flags = phy_flags;
+       if (init) {
+               linkmode_copy(phydev->advertising, phydev->supported);
 
-               ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
-                                        priv->phy_interface);
-               if (ret) {
-                       pr_err("could not attach to PHY\n");
-                       return -ENODEV;
-               }
-       }
+               /* The internal PHY has its link interrupts routed to the
+                * Ethernet MAC ISRs. On GENETv5 there is a hardware issue
+                * that prevents the signaling of link UP interrupts when
+                * the link operates at 10Mbps, so fallback to polling for
+                * those versions of GENET.
+                */
+               if (priv->internal_phy && !GENET_IS_V5(priv))
+                       phydev->irq = PHY_IGNORE_INTERRUPT;
 
-       /* Configure port multiplexer based on what the probed PHY device since
-        * reading the 'max-speed' property determines the maximum supported
-        * PHY speed which is needed for bcmgenet_mii_config() to configure
-        * things appropriately.
-        */
-       ret = bcmgenet_mii_config(dev, true);
-       if (ret) {
-               phy_disconnect(dev->phydev);
-               return ret;
+               dev_info(kdev, "configuring instance for %s\n", phy_name);
        }
 
-       linkmode_copy(phydev->advertising, phydev->supported);
-
-       /* The internal PHY has its link interrupts routed to the
-        * Ethernet MAC ISRs. On GENETv5 there is a hardware issue
-        * that prevents the signaling of link UP interrupts when
-        * the link operates at 10Mbps, so fallback to polling for
-        * those versions of GENET.
-        */
-       if (priv->internal_phy && !GENET_IS_V5(priv))
-               dev->phydev->irq = PHY_IGNORE_INTERRUPT;
-
        return 0;
 }
 
index 35b59b5..1e1b774 100644 (file)
@@ -165,9 +165,8 @@ static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int desc_idx
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 static struct macb_dma_desc_64 *macb_64b_desc(struct macb *bp, struct macb_dma_desc *desc)
 {
-       if (bp->hw_dma_cap & HW_DMA_CAP_64B)
-               return (struct macb_dma_desc_64 *)((void *)desc + sizeof(struct macb_dma_desc));
-       return NULL;
+       return (struct macb_dma_desc_64 *)((void *)desc
+               + sizeof(struct macb_dma_desc));
 }
 #endif
 
@@ -3406,17 +3405,17 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
                return err;
        }
 
-       *tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
+       *tx_clk = devm_clk_get_optional(&pdev->dev, "tx_clk");
        if (IS_ERR(*tx_clk))
-               *tx_clk = NULL;
+               return PTR_ERR(*tx_clk);
 
-       *rx_clk = devm_clk_get(&pdev->dev, "rx_clk");
+       *rx_clk = devm_clk_get_optional(&pdev->dev, "rx_clk");
        if (IS_ERR(*rx_clk))
-               *rx_clk = NULL;
+               return PTR_ERR(*rx_clk);
 
-       *tsu_clk = devm_clk_get(&pdev->dev, "tsu_clk");
+       *tsu_clk = devm_clk_get_optional(&pdev->dev, "tsu_clk");
        if (IS_ERR(*tsu_clk))
-               *tsu_clk = NULL;
+               return PTR_ERR(*tsu_clk);
 
        err = clk_prepare_enable(*pclk);
        if (err) {
index be2bafc..a04eccb 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /* cavium_ptp.h - PTP 1588 clock on Cavium hardware
  * Copyright (c) 2003-2015, 2017 Cavium, Inc.
  */
index 71854a1..3802487 100644 (file)
@@ -5701,7 +5701,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        whoami = t4_read_reg(adapter, PL_WHOAMI_A);
        pci_read_config_word(pdev, PCI_DEVICE_ID, &device_id);
        chip = t4_get_chip_type(adapter, CHELSIO_PCI_ID_VER(device_id));
-       if (chip < 0) {
+       if ((int)chip < 0) {
                dev_err(&pdev->dev, "Device %d is not supported\n", device_id);
                err = chip;
                goto out_free_adapter;
index 5b60224..86b528d 100644 (file)
@@ -137,13 +137,12 @@ static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
 static int alloc_uld_rxqs(struct adapter *adap,
                          struct sge_uld_rxq_info *rxq_info, bool lro)
 {
-       struct sge *s = &adap->sge;
        unsigned int nq = rxq_info->nrxq + rxq_info->nciq;
+       int i, err, msi_idx, que_idx = 0, bmap_idx = 0;
        struct sge_ofld_rxq *q = rxq_info->uldrxq;
        unsigned short *ids = rxq_info->rspq_id;
-       unsigned int bmap_idx = 0;
+       struct sge *s = &adap->sge;
        unsigned int per_chan;
-       int i, err, msi_idx, que_idx = 0;
 
        per_chan = rxq_info->nrxq / adap->params.nports;
 
@@ -161,6 +160,10 @@ static int alloc_uld_rxqs(struct adapter *adap,
 
                if (msi_idx >= 0) {
                        bmap_idx = get_msix_idx_from_bmap(adap);
+                       if (bmap_idx < 0) {
+                               err = -ENOSPC;
+                               goto freeout;
+                       }
                        msi_idx = adap->msix_info_ulds[bmap_idx].idx;
                }
                err = t4_sge_alloc_rxq(adap, &q->rspq, false,
@@ -692,10 +695,10 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
        lld->write_cmpl_support = adap->params.write_cmpl_support;
 }
 
-static void uld_attach(struct adapter *adap, unsigned int uld)
+static int uld_attach(struct adapter *adap, unsigned int uld)
 {
-       void *handle;
        struct cxgb4_lld_info lli;
+       void *handle;
 
        uld_init(adap, &lli);
        uld_queue_init(adap, uld, &lli);
@@ -705,7 +708,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
                dev_warn(adap->pdev_dev,
                         "could not attach to the %s driver, error %ld\n",
                         adap->uld[uld].name, PTR_ERR(handle));
-               return;
+               return PTR_ERR(handle);
        }
 
        adap->uld[uld].handle = handle;
@@ -713,22 +716,22 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
 
        if (adap->flags & CXGB4_FULL_INIT_DONE)
                adap->uld[uld].state_change(handle, CXGB4_STATE_UP);
+
+       return 0;
 }
 
-/**
- *     cxgb4_register_uld - register an upper-layer driver
- *     @type: the ULD type
- *     @p: the ULD methods
+/* cxgb4_register_uld - register an upper-layer driver
+ * @type: the ULD type
+ * @p: the ULD methods
  *
- *     Registers an upper-layer driver with this driver and notifies the ULD
- *     about any presently available devices that support its type.  Returns
- *     %-EBUSY if a ULD of the same type is already registered.
+ * Registers an upper-layer driver with this driver and notifies the ULD
+ * about any presently available devices that support its type.
  */
 void cxgb4_register_uld(enum cxgb4_uld type,
                        const struct cxgb4_uld_info *p)
 {
-       int ret = 0;
        struct adapter *adap;
+       int ret = 0;
 
        if (type >= CXGB4_ULD_MAX)
                return;
@@ -760,8 +763,12 @@ void cxgb4_register_uld(enum cxgb4_uld type,
                if (ret)
                        goto free_irq;
                adap->uld[type] = *p;
-               uld_attach(adap, type);
+               ret = uld_attach(adap, type);
+               if (ret)
+                       goto free_txq;
                continue;
+free_txq:
+               release_sge_txq_uld(adap, type);
 free_irq:
                if (adap->flags & CXGB4_FULL_INIT_DONE)
                        quiesce_rx_uld(adap, type);
index b3da81e..928bfea 100644 (file)
@@ -3791,15 +3791,11 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
         * write the CIDX Updates into the Status Page at the end of the
         * TX Queue.
         */
-       c.autoequiqe_to_viid = htonl((dbqt
-                                     ? FW_EQ_ETH_CMD_AUTOEQUIQE_F
-                                     : FW_EQ_ETH_CMD_AUTOEQUEQE_F) |
+       c.autoequiqe_to_viid = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
                                     FW_EQ_ETH_CMD_VIID_V(pi->viid));
 
        c.fetchszm_to_iqid =
-               htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(dbqt
-                                                ? HOSTFCMODE_INGRESS_QUEUE_X
-                                                : HOSTFCMODE_STATUS_PAGE_X) |
+               htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) |
                      FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) |
                      FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid));
 
index 0b12f89..9fdf77d 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Register definitions for Gemini GMAC Ethernet device driver
  *
  * Copyright (C) 2006 Storlink, Corp.
index e8c7eb8..17d300e 100644 (file)
@@ -48,5 +48,5 @@ config BE2NET_SKYHAWK
          chipsets. (e.g. OneConnect OCe14xxx)
 
 comment "WARNING: be2net is useless without any enabled chip"
-        depends on BE2NET_BE2=n && BE2NET_BE3=n && BE2NET_LANCER=n && \
+       depends on BE2NET_BE2=n && BE2NET_BE3=n && BE2NET_LANCER=n && \
        BE2NET_SKYHAWK=n && BE2NET
index 9b7af94..96e9565 100644 (file)
@@ -727,6 +727,18 @@ static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb,
         */
        nfrags = skb_shinfo(skb)->nr_frags;
 
+       /* Setup HW checksumming */
+       csum_vlan = 0;
+       if (skb->ip_summed == CHECKSUM_PARTIAL &&
+           !ftgmac100_prep_tx_csum(skb, &csum_vlan))
+               goto drop;
+
+       /* Add VLAN tag */
+       if (skb_vlan_tag_present(skb)) {
+               csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG;
+               csum_vlan |= skb_vlan_tag_get(skb) & 0xffff;
+       }
+
        /* Get header len */
        len = skb_headlen(skb);
 
@@ -753,19 +765,6 @@ static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb,
        if (nfrags == 0)
                f_ctl_stat |= FTGMAC100_TXDES0_LTS;
        txdes->txdes3 = cpu_to_le32(map);
-
-       /* Setup HW checksumming */
-       csum_vlan = 0;
-       if (skb->ip_summed == CHECKSUM_PARTIAL &&
-           !ftgmac100_prep_tx_csum(skb, &csum_vlan))
-               goto drop;
-
-       /* Add VLAN tag */
-       if (skb_vlan_tag_present(skb)) {
-               csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG;
-               csum_vlan |= skb_vlan_tag_get(skb) & 0xffff;
-       }
-
        txdes->txdes1 = cpu_to_le32(csum_vlan);
 
        /* Next descriptor */
index 162d7d8..19379ba 100644 (file)
@@ -1235,6 +1235,8 @@ static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv, bool enable)
        priv->rx_td_enabled = enable;
 }
 
+static void update_tx_fqids(struct dpaa2_eth_priv *priv);
+
 static int link_state_update(struct dpaa2_eth_priv *priv)
 {
        struct dpni_link_state state = {0};
@@ -1261,6 +1263,7 @@ static int link_state_update(struct dpaa2_eth_priv *priv)
                goto out;
 
        if (state.up) {
+               update_tx_fqids(priv);
                netif_carrier_on(priv->net_dev);
                netif_tx_start_all_queues(priv->net_dev);
        } else {
@@ -2533,6 +2536,47 @@ static int set_pause(struct dpaa2_eth_priv *priv)
        return 0;
 }
 
+static void update_tx_fqids(struct dpaa2_eth_priv *priv)
+{
+       struct dpni_queue_id qid = {0};
+       struct dpaa2_eth_fq *fq;
+       struct dpni_queue queue;
+       int i, j, err;
+
+       /* We only use Tx FQIDs for FQID-based enqueue, so check
+        * if DPNI version supports it before updating FQIDs
+        */
+       if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_ENQUEUE_FQID_VER_MAJOR,
+                                  DPNI_ENQUEUE_FQID_VER_MINOR) < 0)
+               return;
+
+       for (i = 0; i < priv->num_fqs; i++) {
+               fq = &priv->fq[i];
+               if (fq->type != DPAA2_TX_CONF_FQ)
+                       continue;
+               for (j = 0; j < dpaa2_eth_tc_count(priv); j++) {
+                       err = dpni_get_queue(priv->mc_io, 0, priv->mc_token,
+                                            DPNI_QUEUE_TX, j, fq->flowid,
+                                            &queue, &qid);
+                       if (err)
+                               goto out_err;
+
+                       fq->tx_fqid[j] = qid.fqid;
+                       if (fq->tx_fqid[j] == 0)
+                               goto out_err;
+               }
+       }
+
+       priv->enqueue = dpaa2_eth_enqueue_fq;
+
+       return;
+
+out_err:
+       netdev_info(priv->net_dev,
+                   "Error reading Tx FQID, fallback to QDID-based enqueue\n");
+       priv->enqueue = dpaa2_eth_enqueue_qd;
+}
+
 /* Configure the DPNI object this interface is associated with */
 static int setup_dpni(struct fsl_mc_device *ls_dev)
 {
@@ -3306,6 +3350,9 @@ static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg)
        if (status & DPNI_IRQ_EVENT_LINK_CHANGED)
                link_state_update(netdev_priv(net_dev));
 
+       if (status & DPNI_IRQ_EVENT_ENDPOINT_CHANGED)
+               set_mac_addr(netdev_priv(net_dev));
+
        return IRQ_HANDLED;
 }
 
@@ -3331,7 +3378,8 @@ static int setup_irqs(struct fsl_mc_device *ls_dev)
        }
 
        err = dpni_set_irq_mask(ls_dev->mc_io, 0, ls_dev->mc_handle,
-                               DPNI_IRQ_INDEX, DPNI_IRQ_EVENT_LINK_CHANGED);
+                               DPNI_IRQ_INDEX, DPNI_IRQ_EVENT_LINK_CHANGED |
+                               DPNI_IRQ_EVENT_ENDPOINT_CHANGED);
        if (err < 0) {
                dev_err(&ls_dev->dev, "dpni_set_irq_mask(): %d\n", err);
                goto free_irq;
index ff2e177..df2458a 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright 2018 NXP
  */
index fd58391..ee0711d 100644 (file)
@@ -133,9 +133,12 @@ int dpni_reset(struct fsl_mc_io    *mc_io,
  */
 #define DPNI_IRQ_INDEX                         0
 /**
- * IRQ event - indicates a change in link state
+ * IRQ events:
+ *       indicates a change in link state
+ *       indicates a change in endpoint
  */
 #define DPNI_IRQ_EVENT_LINK_CHANGED            0x00000001
+#define DPNI_IRQ_EVENT_ENDPOINT_CHANGED                0x00000002
 
 int dpni_set_irq_enable(struct fsl_mc_io       *mc_io,
                        u32                     cmd_flags,
index 720cd50..4ac05bf 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright 2013-2016 Freescale Semiconductor Inc.
  * Copyright 2016-2018 NXP
index be7914c..311c184 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright 2013-2016 Freescale Semiconductor Inc.
  * Copyright 2016-2018 NXP
index 7d6513f..b73421c 100644 (file)
@@ -785,7 +785,7 @@ static int enetc_of_get_phy(struct enetc_ndev_priv *priv)
        }
 
        priv->if_mode = of_get_phy_mode(np);
-       if (priv->if_mode < 0) {
+       if ((int)priv->if_mode < 0) {
                dev_err(priv->dev, "missing phy type\n");
                of_node_put(priv->phy_node);
                if (of_phy_is_fixed_link(np))
index d4d4c72..22c01b2 100644 (file)
@@ -3558,7 +3558,7 @@ fec_probe(struct platform_device *pdev)
 
        for (i = 0; i < irq_cnt; i++) {
                snprintf(irq_name, sizeof(irq_name), "int%d", i);
-               irq = platform_get_irq_byname(pdev, irq_name);
+               irq = platform_get_irq_byname_optional(pdev, irq_name);
                if (irq < 0)
                        irq = platform_get_irq(pdev, i);
                if (irq < 0) {
index 19e2365..945643c 100644 (file)
@@ -600,9 +600,9 @@ void fec_ptp_init(struct platform_device *pdev, int irq_idx)
 
        INIT_DELAYED_WORK(&fep->time_keep, fec_time_keep);
 
-       irq = platform_get_irq_byname(pdev, "pps");
+       irq = platform_get_irq_byname_optional(pdev, "pps");
        if (irq < 0)
-               irq = platform_get_irq(pdev, irq_idx);
+               irq = platform_get_irq_optional(pdev, irq_idx);
        /* Failure to get an irq is not fatal,
         * only the PTP_CLOCK_PPS clock events should stop
         */
index 24bf7f6..51ad864 100644 (file)
@@ -2067,7 +2067,7 @@ static int gfar_change_mtu(struct net_device *dev, int new_mtu)
        return 0;
 }
 
-void reset_gfar(struct net_device *ndev)
+static void reset_gfar(struct net_device *ndev)
 {
        struct gfar_private *priv = netdev_priv(ndev);
 
index 59564ac..edec61d 100644 (file)
@@ -289,6 +289,8 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
 
        len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
        page_info = &rx->data.page_info[idx];
+       dma_sync_single_for_cpu(&priv->pdev->dev, rx->data.qpl->page_buses[idx],
+                               PAGE_SIZE, DMA_FROM_DEVICE);
 
        /* gvnic can only receive into registered segments. If the buffer
         * can't be recycled, our only choice is to copy the data out of
index 778b87b..0a9a7ee 100644 (file)
@@ -390,7 +390,21 @@ static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc,
        seg_desc->seg.seg_addr = cpu_to_be64(addr);
 }
 
-static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb)
+static void gve_dma_sync_for_device(struct device *dev, dma_addr_t *page_buses,
+                                   u64 iov_offset, u64 iov_len)
+{
+       dma_addr_t dma;
+       u64 addr;
+
+       for (addr = iov_offset; addr < iov_offset + iov_len;
+            addr += PAGE_SIZE) {
+               dma = page_buses[addr / PAGE_SIZE];
+               dma_sync_single_for_device(dev, dma, PAGE_SIZE, DMA_TO_DEVICE);
+       }
+}
+
+static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb,
+                         struct device *dev)
 {
        int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset;
        union gve_tx_desc *pkt_desc, *seg_desc;
@@ -432,6 +446,9 @@ static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb)
        skb_copy_bits(skb, 0,
                      tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset,
                      hlen);
+       gve_dma_sync_for_device(dev, tx->tx_fifo.qpl->page_buses,
+                               info->iov[hdr_nfrags - 1].iov_offset,
+                               info->iov[hdr_nfrags - 1].iov_len);
        copy_offset = hlen;
 
        for (i = payload_iov; i < payload_nfrags + payload_iov; i++) {
@@ -445,6 +462,9 @@ static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb)
                skb_copy_bits(skb, copy_offset,
                              tx->tx_fifo.base + info->iov[i].iov_offset,
                              info->iov[i].iov_len);
+               gve_dma_sync_for_device(dev, tx->tx_fifo.qpl->page_buses,
+                                       info->iov[i].iov_offset,
+                                       info->iov[i].iov_len);
                copy_offset += info->iov[i].iov_len;
        }
 
@@ -473,7 +493,7 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
                gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
                return NETDEV_TX_BUSY;
        }
-       nsegs = gve_tx_add_skb(tx, skb);
+       nsegs = gve_tx_add_skb(tx, skb, &priv->pdev->dev);
 
        netdev_tx_sent_queue(tx->netdev_txq, skb->len);
        skb_tx_timestamp(skb);
index c841674..4606a7e 100644 (file)
@@ -237,6 +237,7 @@ struct hip04_priv {
        dma_addr_t rx_phys[RX_DESC_NUM];
        unsigned int rx_head;
        unsigned int rx_buf_size;
+       unsigned int rx_cnt_remaining;
 
        struct device_node *phy_node;
        struct phy_device *phy;
@@ -575,7 +576,6 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget)
        struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi);
        struct net_device *ndev = priv->ndev;
        struct net_device_stats *stats = &ndev->stats;
-       unsigned int cnt = hip04_recv_cnt(priv);
        struct rx_desc *desc;
        struct sk_buff *skb;
        unsigned char *buf;
@@ -588,8 +588,8 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget)
 
        /* clean up tx descriptors */
        tx_remaining = hip04_tx_reclaim(ndev, false);
-
-       while (cnt && !last) {
+       priv->rx_cnt_remaining += hip04_recv_cnt(priv);
+       while (priv->rx_cnt_remaining && !last) {
                buf = priv->rx_buf[priv->rx_head];
                skb = build_skb(buf, priv->rx_buf_size);
                if (unlikely(!skb)) {
@@ -635,11 +635,13 @@ refill:
                hip04_set_recv_desc(priv, phys);
 
                priv->rx_head = RX_NEXT(priv->rx_head);
-               if (rx >= budget)
+               if (rx >= budget) {
+                       --priv->rx_cnt_remaining;
                        goto done;
+               }
 
-               if (--cnt == 0)
-                       cnt = hip04_recv_cnt(priv);
+               if (--priv->rx_cnt_remaining == 0)
+                       priv->rx_cnt_remaining += hip04_recv_cnt(priv);
        }
 
        if (!(priv->reg_inten & RCV_INT)) {
@@ -724,6 +726,7 @@ static int hip04_mac_open(struct net_device *ndev)
        int i;
 
        priv->rx_head = 0;
+       priv->rx_cnt_remaining = 0;
        priv->tx_head = 0;
        priv->tx_tail = 0;
        hip04_reset_ppe(priv);
@@ -1038,7 +1041,6 @@ static int hip04_remove(struct platform_device *pdev)
 
        hip04_free_ring(ndev, d);
        unregister_netdev(ndev);
-       free_irq(ndev->irq, ndev);
        of_node_put(priv->phy_node);
        cancel_work_sync(&priv->tx_timeout_task);
        free_netdev(ndev);
index 95a6b09..c41b19c 100644 (file)
@@ -1194,7 +1194,7 @@ static int hix5hd2_dev_probe(struct platform_device *pdev)
                goto err_free_mdio;
 
        priv->phy_mode = of_get_phy_mode(node);
-       if (priv->phy_mode < 0) {
+       if ((int)priv->phy_mode < 0) {
                netdev_err(ndev, "not find phy-mode\n");
                ret = -EINVAL;
                goto err_mdiobus;
index c4b7bf8..75ccc1e 100644 (file)
@@ -32,6 +32,8 @@
 
 #define HNAE3_MOD_VERSION "1.0"
 
+#define HNAE3_MIN_VECTOR_NUM   2 /* first one for misc, another for IO */
+
 /* Device IDs */
 #define HNAE3_DEV_ID_GE                                0xA220
 #define HNAE3_DEV_ID_25GE                      0xA221
index fd7f943..e02e01b 100644 (file)
@@ -906,6 +906,9 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
                hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
                                HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
 
+               /* nic's msix numbers is always equals to the roce's. */
+               hdev->num_nic_msi = hdev->num_roce_msi;
+
                /* PF should have NIC vectors and Roce vectors,
                 * NIC vectors are queued before Roce vectors.
                 */
@@ -915,6 +918,15 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
                hdev->num_msi =
                hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
                                HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+
+               hdev->num_nic_msi = hdev->num_msi;
+       }
+
+       if (hdev->num_nic_msi < HNAE3_MIN_VECTOR_NUM) {
+               dev_err(&hdev->pdev->dev,
+                       "Just %u msi resources, not enough for pf(min:2).\n",
+                       hdev->num_nic_msi);
+               return -EINVAL;
        }
 
        return 0;
@@ -1507,6 +1519,10 @@ static int  hclge_assign_tqp(struct hclge_vport *vport, u16 num_tqps)
        kinfo->rss_size = min_t(u16, hdev->rss_size_max,
                                vport->alloc_tqps / hdev->tm_info.num_tc);
 
+       /* ensure one to one mapping between irq and queue at default */
+       kinfo->rss_size = min_t(u16, kinfo->rss_size,
+                               (hdev->num_nic_msi - 1) / hdev->tm_info.num_tc);
+
        return 0;
 }
 
@@ -2285,7 +2301,8 @@ static int hclge_init_msi(struct hclge_dev *hdev)
        int vectors;
        int i;
 
-       vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
+       vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM,
+                                       hdev->num_msi,
                                        PCI_IRQ_MSI | PCI_IRQ_MSIX);
        if (vectors < 0) {
                dev_err(&pdev->dev,
@@ -2300,6 +2317,7 @@ static int hclge_init_msi(struct hclge_dev *hdev)
 
        hdev->num_msi = vectors;
        hdev->num_msi_left = vectors;
+
        hdev->base_msi_vector = pdev->irq;
        hdev->roce_base_vector = hdev->base_msi_vector +
                                hdev->roce_base_msix_offset;
@@ -3903,6 +3921,7 @@ static int hclge_get_vector(struct hnae3_handle *handle, u16 vector_num,
        int alloc = 0;
        int i, j;
 
+       vector_num = min_t(u16, hdev->num_nic_msi - 1, vector_num);
        vector_num = min(hdev->num_msi_left, vector_num);
 
        for (j = 0; j < vector_num; j++) {
index 3e9574a..c3d56b8 100644 (file)
@@ -763,6 +763,7 @@ struct hclge_dev {
        u32 base_msi_vector;
        u16 *vector_status;
        int *vector_irq;
+       u16 num_nic_msi;        /* Num of nic vectors for this PF */
        u16 num_roce_msi;       /* Num of roce vectors for this PF */
        int roce_base_vector;
 
index 9f0e35f..62399cc 100644 (file)
@@ -537,9 +537,16 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
                kinfo->rss_size = kinfo->req_rss_size;
        } else if (kinfo->rss_size > max_rss_size ||
                   (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) {
+               /* if user not set rss, the rss_size should compare with the
+                * valid msi numbers to ensure one to one map between tqp and
+                * irq as default.
+                */
+               if (!kinfo->req_rss_size)
+                       max_rss_size = min_t(u16, max_rss_size,
+                                            (hdev->num_nic_msi - 1) /
+                                            kinfo->num_tc);
+
                /* Set to the maximum specification value (max_rss_size). */
-               dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n",
-                        kinfo->rss_size, max_rss_size);
                kinfo->rss_size = max_rss_size;
        }
 
index e3090b3..7d7e712 100644 (file)
@@ -411,6 +411,13 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev)
                kinfo->tqp[i] = &hdev->htqp[i].q;
        }
 
+       /* after init the max rss_size and tqps, adjust the default tqp numbers
+        * and rss size with the actual vector numbers
+        */
+       kinfo->num_tqps = min_t(u16, hdev->num_nic_msix - 1, kinfo->num_tqps);
+       kinfo->rss_size = min_t(u16, kinfo->num_tqps / kinfo->num_tc,
+                               kinfo->rss_size);
+
        return 0;
 }
 
@@ -502,6 +509,7 @@ static int hclgevf_get_vector(struct hnae3_handle *handle, u16 vector_num,
        int alloc = 0;
        int i, j;
 
+       vector_num = min_t(u16, hdev->num_nic_msix - 1, vector_num);
        vector_num = min(hdev->num_msi_left, vector_num);
 
        for (j = 0; j < vector_num; j++) {
@@ -2246,13 +2254,14 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev)
        int vectors;
        int i;
 
-       if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B))
+       if (hnae3_dev_roce_supported(hdev))
                vectors = pci_alloc_irq_vectors(pdev,
                                                hdev->roce_base_msix_offset + 1,
                                                hdev->num_msi,
                                                PCI_IRQ_MSIX);
        else
-               vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
+               vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM,
+                                               hdev->num_msi,
                                                PCI_IRQ_MSI | PCI_IRQ_MSIX);
 
        if (vectors < 0) {
@@ -2268,6 +2277,7 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev)
 
        hdev->num_msi = vectors;
        hdev->num_msi_left = vectors;
+
        hdev->base_msi_vector = pdev->irq;
        hdev->roce_base_vector = pdev->irq + hdev->roce_base_msix_offset;
 
@@ -2533,7 +2543,7 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev)
 
        req = (struct hclgevf_query_res_cmd *)desc.data;
 
-       if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)) {
+       if (hnae3_dev_roce_supported(hdev)) {
                hdev->roce_base_msix_offset =
                hnae3_get_field(__le16_to_cpu(req->msixcap_localid_ba_rocee),
                                HCLGEVF_MSIX_OFT_ROCEE_M,
@@ -2542,6 +2552,9 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev)
                hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number),
                                HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S);
 
+               /* nic's msix numbers is always equals to the roce's. */
+               hdev->num_nic_msix = hdev->num_roce_msix;
+
                /* VF should have NIC vectors and Roce vectors, NIC vectors
                 * are queued before Roce vectors. The offset is fixed to 64.
                 */
@@ -2551,6 +2564,15 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev)
                hdev->num_msi =
                hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number),
                                HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S);
+
+               hdev->num_nic_msix = hdev->num_msi;
+       }
+
+       if (hdev->num_nic_msix < HNAE3_MIN_VECTOR_NUM) {
+               dev_err(&hdev->pdev->dev,
+                       "Just %u msi resources, not enough for vf(min:2).\n",
+                       hdev->num_nic_msix);
+               return -EINVAL;
        }
 
        return 0;
index bdde3af..2b8d6bc 100644 (file)
@@ -270,6 +270,7 @@ struct hclgevf_dev {
        u16 num_msi;
        u16 num_msi_left;
        u16 num_msi_used;
+       u16 num_nic_msix;       /* Num of nic vectors for this VF */
        u16 num_roce_msix;      /* Num of roce vectors for this VF */
        u16 roce_base_msix_offset;
        int roce_base_vector;
index 3e863a7..7df5d7d 100644 (file)
@@ -148,11 +148,15 @@ static int mdio_sc_cfg_reg_write(struct hns_mdio_device *mdio_dev,
 {
        u32 time_cnt;
        u32 reg_value;
+       int ret;
 
        regmap_write(mdio_dev->subctrl_vbase, cfg_reg, set_val);
 
        for (time_cnt = MDIO_TIMEOUT; time_cnt; time_cnt--) {
-               regmap_read(mdio_dev->subctrl_vbase, st_reg, &reg_value);
+               ret = regmap_read(mdio_dev->subctrl_vbase, st_reg, &reg_value);
+               if (ret)
+                       return ret;
+
                reg_value &= st_msk;
                if ((!!check_st) == (!!reg_value))
                        break;
index 211c5f7..aec7e98 100644 (file)
@@ -96,6 +96,8 @@
 
 #define OPT_SWAP_PORT  0x0001  /* Need to wordswp on the MPU port */
 
+#define LIB82596_DMA_ATTR      DMA_ATTR_NON_CONSISTENT
+
 #define DMA_WBACK(ndev, addr, len) \
        do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_TO_DEVICE); } while (0)
 
@@ -200,7 +202,7 @@ static int __exit lan_remove_chip(struct parisc_device *pdev)
 
        unregister_netdev (dev);
        dma_free_attrs(&pdev->dev, sizeof(struct i596_private), lp->dma,
-                      lp->dma_addr, DMA_ATTR_NON_CONSISTENT);
+                      lp->dma_addr, LIB82596_DMA_ATTR);
        free_netdev (dev);
        return 0;
 }
index 1274ad2..f9742af 100644 (file)
@@ -1065,7 +1065,7 @@ static int i82596_probe(struct net_device *dev)
 
        dma = dma_alloc_attrs(dev->dev.parent, sizeof(struct i596_dma),
                              &lp->dma_addr, GFP_KERNEL,
-                             DMA_ATTR_NON_CONSISTENT);
+                             LIB82596_DMA_ATTR);
        if (!dma) {
                printk(KERN_ERR "%s: Couldn't get shared memory\n", __FILE__);
                return -ENOMEM;
@@ -1087,7 +1087,7 @@ static int i82596_probe(struct net_device *dev)
        i = register_netdev(dev);
        if (i) {
                dma_free_attrs(dev->dev.parent, sizeof(struct i596_dma),
-                              dma, lp->dma_addr, DMA_ATTR_NON_CONSISTENT);
+                              dma, lp->dma_addr, LIB82596_DMA_ATTR);
                return i;
        }
 
index 6eb6c2f..6436a98 100644 (file)
@@ -24,6 +24,8 @@
 
 static const char sni_82596_string[] = "snirm_82596";
 
+#define LIB82596_DMA_ATTR      0
+
 #define DMA_WBACK(priv, addr, len)     do { } while (0)
 #define DMA_INV(priv, addr, len)       do { } while (0)
 #define DMA_WBACK_INV(priv, addr, len) do { } while (0)
@@ -152,7 +154,7 @@ static int sni_82596_driver_remove(struct platform_device *pdev)
 
        unregister_netdev(dev);
        dma_free_attrs(dev->dev.parent, sizeof(struct i596_private), lp->dma,
-                      lp->dma_addr, DMA_ATTR_NON_CONSISTENT);
+                      lp->dma_addr, LIB82596_DMA_ATTR);
        iounmap(lp->ca);
        iounmap(lp->mpu_port);
        free_netdev (dev);
index 2e5172f..f59d9a8 100644 (file)
@@ -1207,7 +1207,7 @@ static void ibmvnic_cleanup(struct net_device *netdev)
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
        /* ensure that transmissions are stopped if called by do_reset */
-       if (adapter->resetting)
+       if (test_bit(0, &adapter->resetting))
                netif_tx_disable(netdev);
        else
                netif_tx_stop_all_queues(netdev);
@@ -1428,7 +1428,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
        u8 proto = 0;
        netdev_tx_t ret = NETDEV_TX_OK;
 
-       if (adapter->resetting) {
+       if (test_bit(0, &adapter->resetting)) {
                if (!netif_subqueue_stopped(netdev, skb))
                        netif_stop_subqueue(netdev, queue_num);
                dev_kfree_skb_any(skb);
@@ -1723,6 +1723,86 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
        return rc;
 }
 
+/**
+ * do_change_param_reset returns zero if we are able to keep processing reset
+ * events, or non-zero if we hit a fatal error and must halt.
+ */
+static int do_change_param_reset(struct ibmvnic_adapter *adapter,
+                                struct ibmvnic_rwi *rwi,
+                                u32 reset_state)
+{
+       struct net_device *netdev = adapter->netdev;
+       int i, rc;
+
+       netdev_dbg(adapter->netdev, "Change param resetting driver (%d)\n",
+                  rwi->reset_reason);
+
+       netif_carrier_off(netdev);
+       adapter->reset_reason = rwi->reset_reason;
+
+       ibmvnic_cleanup(netdev);
+
+       if (reset_state == VNIC_OPEN) {
+               rc = __ibmvnic_close(netdev);
+               if (rc)
+                       return rc;
+       }
+
+       release_resources(adapter);
+       release_sub_crqs(adapter, 1);
+       release_crq_queue(adapter);
+
+       adapter->state = VNIC_PROBED;
+
+       rc = init_crq_queue(adapter);
+
+       if (rc) {
+               netdev_err(adapter->netdev,
+                          "Couldn't initialize crq. rc=%d\n", rc);
+               return rc;
+       }
+
+       rc = ibmvnic_reset_init(adapter);
+       if (rc)
+               return IBMVNIC_INIT_FAILED;
+
+       /* If the adapter was in PROBE state prior to the reset,
+        * exit here.
+        */
+       if (reset_state == VNIC_PROBED)
+               return 0;
+
+       rc = ibmvnic_login(netdev);
+       if (rc) {
+               adapter->state = reset_state;
+               return rc;
+       }
+
+       rc = init_resources(adapter);
+       if (rc)
+               return rc;
+
+       ibmvnic_disable_irqs(adapter);
+
+       adapter->state = VNIC_CLOSED;
+
+       if (reset_state == VNIC_CLOSED)
+               return 0;
+
+       rc = __ibmvnic_open(netdev);
+       if (rc)
+               return IBMVNIC_OPEN_FAILED;
+
+       /* refresh device's multicast list */
+       ibmvnic_set_multi(netdev);
+
+       /* kick napi */
+       for (i = 0; i < adapter->req_rx_queues; i++)
+               napi_schedule(&adapter->napi[i]);
+
+       return 0;
+}
+
 /**
  * do_reset returns zero if we are able to keep processing reset events, or
  * non-zero if we hit a fatal error and must halt.
@@ -1738,6 +1818,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
        netdev_dbg(adapter->netdev, "Re-setting driver (%d)\n",
                   rwi->reset_reason);
 
+       rtnl_lock();
+
        netif_carrier_off(netdev);
        adapter->reset_reason = rwi->reset_reason;
 
@@ -1751,16 +1833,25 @@ static int do_reset(struct ibmvnic_adapter *adapter,
        if (reset_state == VNIC_OPEN &&
            adapter->reset_reason != VNIC_RESET_MOBILITY &&
            adapter->reset_reason != VNIC_RESET_FAILOVER) {
-               rc = __ibmvnic_close(netdev);
+               adapter->state = VNIC_CLOSING;
+
+               /* Release the RTNL lock before link state change and
+                * re-acquire after the link state change to allow
+                * linkwatch_event to grab the RTNL lock and run during
+                * a reset.
+                */
+               rtnl_unlock();
+               rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
+               rtnl_lock();
                if (rc)
-                       return rc;
-       }
+                       goto out;
 
-       if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
-           adapter->wait_for_reset) {
-               release_resources(adapter);
-               release_sub_crqs(adapter, 1);
-               release_crq_queue(adapter);
+               if (adapter->state != VNIC_CLOSING) {
+                       rc = -1;
+                       goto out;
+               }
+
+               adapter->state = VNIC_CLOSED;
        }
 
        if (adapter->reset_reason != VNIC_RESET_NON_FATAL) {
@@ -1769,9 +1860,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
                 */
                adapter->state = VNIC_PROBED;
 
-               if (adapter->wait_for_reset) {
-                       rc = init_crq_queue(adapter);
-               } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
+               if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
                        rc = ibmvnic_reenable_crq_queue(adapter);
                        release_sub_crqs(adapter, 1);
                } else {
@@ -1783,36 +1872,35 @@ static int do_reset(struct ibmvnic_adapter *adapter,
                if (rc) {
                        netdev_err(adapter->netdev,
                                   "Couldn't initialize crq. rc=%d\n", rc);
-                       return rc;
+                       goto out;
                }
 
                rc = ibmvnic_reset_init(adapter);
-               if (rc)
-                       return IBMVNIC_INIT_FAILED;
+               if (rc) {
+                       rc = IBMVNIC_INIT_FAILED;
+                       goto out;
+               }
 
                /* If the adapter was in PROBE state prior to the reset,
                 * exit here.
                 */
-               if (reset_state == VNIC_PROBED)
-                       return 0;
+               if (reset_state == VNIC_PROBED) {
+                       rc = 0;
+                       goto out;
+               }
 
                rc = ibmvnic_login(netdev);
                if (rc) {
                        adapter->state = reset_state;
-                       return rc;
+                       goto out;
                }
 
-               if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
-                   adapter->wait_for_reset) {
-                       rc = init_resources(adapter);
-                       if (rc)
-                               return rc;
-               } else if (adapter->req_rx_queues != old_num_rx_queues ||
-                          adapter->req_tx_queues != old_num_tx_queues ||
-                          adapter->req_rx_add_entries_per_subcrq !=
-                                                       old_num_rx_slots ||
-                          adapter->req_tx_entries_per_subcrq !=
-                                                       old_num_tx_slots) {
+               if (adapter->req_rx_queues != old_num_rx_queues ||
+                   adapter->req_tx_queues != old_num_tx_queues ||
+                   adapter->req_rx_add_entries_per_subcrq !=
+                   old_num_rx_slots ||
+                   adapter->req_tx_entries_per_subcrq !=
+                   old_num_tx_slots) {
                        release_rx_pools(adapter);
                        release_tx_pools(adapter);
                        release_napi(adapter);
@@ -1820,32 +1908,30 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 
                        rc = init_resources(adapter);
                        if (rc)
-                               return rc;
+                               goto out;
 
                } else {
                        rc = reset_tx_pools(adapter);
                        if (rc)
-                               return rc;
+                               goto out;
 
                        rc = reset_rx_pools(adapter);
                        if (rc)
-                               return rc;
+                               goto out;
                }
                ibmvnic_disable_irqs(adapter);
        }
        adapter->state = VNIC_CLOSED;
 
-       if (reset_state == VNIC_CLOSED)
-               return 0;
+       if (reset_state == VNIC_CLOSED) {
+               rc = 0;
+               goto out;
+       }
 
        rc = __ibmvnic_open(netdev);
        if (rc) {
-               if (list_empty(&adapter->rwi_list))
-                       adapter->state = VNIC_CLOSED;
-               else
-                       adapter->state = reset_state;
-
-               return 0;
+               rc = IBMVNIC_OPEN_FAILED;
+               goto out;
        }
 
        /* refresh device's multicast list */
@@ -1855,11 +1941,15 @@ static int do_reset(struct ibmvnic_adapter *adapter,
        for (i = 0; i < adapter->req_rx_queues; i++)
                napi_schedule(&adapter->napi[i]);
 
-       if (adapter->reset_reason != VNIC_RESET_FAILOVER &&
-           adapter->reset_reason != VNIC_RESET_CHANGE_PARAM)
+       if (adapter->reset_reason != VNIC_RESET_FAILOVER)
                call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev);
 
-       return 0;
+       rc = 0;
+
+out:
+       rtnl_unlock();
+
+       return rc;
 }
 
 static int do_hard_reset(struct ibmvnic_adapter *adapter,
@@ -1919,14 +2009,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
                return 0;
 
        rc = __ibmvnic_open(netdev);
-       if (rc) {
-               if (list_empty(&adapter->rwi_list))
-                       adapter->state = VNIC_CLOSED;
-               else
-                       adapter->state = reset_state;
-
-               return 0;
-       }
+       if (rc)
+               return IBMVNIC_OPEN_FAILED;
 
        return 0;
 }
@@ -1965,20 +2049,17 @@ static void __ibmvnic_reset(struct work_struct *work)
 {
        struct ibmvnic_rwi *rwi;
        struct ibmvnic_adapter *adapter;
-       bool we_lock_rtnl = false;
        u32 reset_state;
        int rc = 0;
 
        adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
 
-       /* netif_set_real_num_xx_queues needs to take rtnl lock here
-        * unless wait_for_reset is set, in which case the rtnl lock
-        * has already been taken before initializing the reset
-        */
-       if (!adapter->wait_for_reset) {
-               rtnl_lock();
-               we_lock_rtnl = true;
+       if (test_and_set_bit_lock(0, &adapter->resetting)) {
+               schedule_delayed_work(&adapter->ibmvnic_delayed_reset,
+                                     IBMVNIC_RESET_DELAY);
+               return;
        }
+
        reset_state = adapter->state;
 
        rwi = get_next_rwi(adapter);
@@ -1990,22 +2071,43 @@ static void __ibmvnic_reset(struct work_struct *work)
                        break;
                }
 
-               if (adapter->force_reset_recovery) {
-                       adapter->force_reset_recovery = false;
-                       rc = do_hard_reset(adapter, rwi, reset_state);
+               if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) {
+                       /* CHANGE_PARAM requestor holds rtnl_lock */
+                       rc = do_change_param_reset(adapter, rwi, reset_state);
+               } else if (adapter->force_reset_recovery) {
+                       /* Transport event occurred during previous reset */
+                       if (adapter->wait_for_reset) {
+                               /* Previous was CHANGE_PARAM; caller locked */
+                               adapter->force_reset_recovery = false;
+                               rc = do_hard_reset(adapter, rwi, reset_state);
+                       } else {
+                               rtnl_lock();
+                               adapter->force_reset_recovery = false;
+                               rc = do_hard_reset(adapter, rwi, reset_state);
+                               rtnl_unlock();
+                       }
                } else {
                        rc = do_reset(adapter, rwi, reset_state);
                }
                kfree(rwi);
-               if (rc && rc != IBMVNIC_INIT_FAILED &&
+               if (rc == IBMVNIC_OPEN_FAILED) {
+                       if (list_empty(&adapter->rwi_list))
+                               adapter->state = VNIC_CLOSED;
+                       else
+                               adapter->state = reset_state;
+                       rc = 0;
+               } else if (rc && rc != IBMVNIC_INIT_FAILED &&
                    !adapter->force_reset_recovery)
                        break;
 
                rwi = get_next_rwi(adapter);
+
+               if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER ||
+                           rwi->reset_reason == VNIC_RESET_MOBILITY))
+                       adapter->force_reset_recovery = true;
        }
 
        if (adapter->wait_for_reset) {
-               adapter->wait_for_reset = false;
                adapter->reset_done_rc = rc;
                complete(&adapter->reset_done);
        }
@@ -2015,9 +2117,16 @@ static void __ibmvnic_reset(struct work_struct *work)
                free_all_rwi(adapter);
        }
 
-       adapter->resetting = false;
-       if (we_lock_rtnl)
-               rtnl_unlock();
+       clear_bit_unlock(0, &adapter->resetting);
+}
+
+static void __ibmvnic_delayed_reset(struct work_struct *work)
+{
+       struct ibmvnic_adapter *adapter;
+
+       adapter = container_of(work, struct ibmvnic_adapter,
+                              ibmvnic_delayed_reset.work);
+       __ibmvnic_reset(&adapter->ibmvnic_reset);
 }
 
 static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
@@ -2072,14 +2181,11 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
        rwi->reset_reason = reason;
        list_add_tail(&rwi->list, &adapter->rwi_list);
        spin_unlock_irqrestore(&adapter->rwi_lock, flags);
-       adapter->resetting = true;
        netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
        schedule_work(&adapter->ibmvnic_reset);
 
        return 0;
 err:
-       if (adapter->wait_for_reset)
-               adapter->wait_for_reset = false;
        return -ret;
 }
 
@@ -2119,7 +2225,7 @@ restart_poll:
                u16 offset;
                u8 flags = 0;
 
-               if (unlikely(adapter->resetting &&
+               if (unlikely(test_bit(0, &adapter->resetting) &&
                             adapter->reset_reason != VNIC_RESET_NON_FATAL)) {
                        enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]);
                        napi_complete_done(napi, frames_processed);
@@ -2770,14 +2876,12 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter,
                return 1;
        }
 
-       if (adapter->resetting &&
+       if (test_bit(0, &adapter->resetting) &&
            adapter->reset_reason == VNIC_RESET_MOBILITY) {
-               u64 val = (0xff000000) | scrq->hw_irq;
+               struct irq_desc *desc = irq_to_desc(scrq->irq);
+               struct irq_chip *chip = irq_desc_get_chip(desc);
 
-               rc = plpar_hcall_norets(H_EOI, val);
-               if (rc)
-                       dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
-                               val, rc);
+               chip->irq_eoi(&desc->irq_data);
        }
 
        rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
@@ -3320,7 +3424,7 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter,
        if (rc) {
                if (rc == H_CLOSED) {
                        dev_warn(dev, "CRQ Queue closed\n");
-                       if (adapter->resetting)
+                       if (test_bit(0, &adapter->resetting))
                                ibmvnic_reset(adapter, VNIC_RESET_FATAL);
                }
 
@@ -4312,13 +4416,14 @@ static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq,
 {
        struct net_device *netdev = adapter->netdev;
        int rc;
+       __be32 rspeed = cpu_to_be32(crq->query_phys_parms_rsp.speed);
 
        rc = crq->query_phys_parms_rsp.rc.code;
        if (rc) {
                netdev_err(netdev, "Error %d in QUERY_PHYS_PARMS\n", rc);
                return rc;
        }
-       switch (cpu_to_be32(crq->query_phys_parms_rsp.speed)) {
+       switch (rspeed) {
        case IBMVNIC_10MBPS:
                adapter->speed = SPEED_10;
                break;
@@ -4344,8 +4449,8 @@ static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq,
                adapter->speed = SPEED_100000;
                break;
        default:
-               netdev_warn(netdev, "Unknown speed 0x%08x\n",
-                           cpu_to_be32(crq->query_phys_parms_rsp.speed));
+               if (netif_carrier_ok(netdev))
+                       netdev_warn(netdev, "Unknown speed 0x%08x\n", rspeed);
                adapter->speed = SPEED_UNKNOWN;
        }
        if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_FULL_DUPLEX)
@@ -4395,7 +4500,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
        case IBMVNIC_CRQ_XPORT_EVENT:
                netif_carrier_off(netdev);
                adapter->crq.active = false;
-               if (adapter->resetting)
+               if (test_bit(0, &adapter->resetting))
                        adapter->force_reset_recovery = true;
                if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
                        dev_info(dev, "Migrated, re-enabling adapter\n");
@@ -4733,7 +4838,7 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
                return -1;
        }
 
-       if (adapter->resetting && !adapter->wait_for_reset &&
+       if (test_bit(0, &adapter->resetting) && !adapter->wait_for_reset &&
            adapter->reset_reason != VNIC_RESET_MOBILITY) {
                if (adapter->req_rx_queues != old_num_rx_queues ||
                    adapter->req_tx_queues != old_num_tx_queues) {
@@ -4845,10 +4950,12 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
        spin_lock_init(&adapter->stats_lock);
 
        INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
+       INIT_DELAYED_WORK(&adapter->ibmvnic_delayed_reset,
+                         __ibmvnic_delayed_reset);
        INIT_LIST_HEAD(&adapter->rwi_list);
        spin_lock_init(&adapter->rwi_lock);
        init_completion(&adapter->init_done);
-       adapter->resetting = false;
+       clear_bit(0, &adapter->resetting);
 
        do {
                rc = init_crq_queue(adapter);
index 70bd286..ebc3924 100644 (file)
@@ -20,6 +20,7 @@
 #define IBMVNIC_INVALID_MAP    -1
 #define IBMVNIC_STATS_TIMEOUT  1
 #define IBMVNIC_INIT_FAILED    2
+#define IBMVNIC_OPEN_FAILED    3
 
 /* basic structures plus 100 2k buffers */
 #define IBMVNIC_IO_ENTITLEMENT_DEFAULT 610305
@@ -38,6 +39,8 @@
 #define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE)
 #define IBMVNIC_BUFFER_HLEN 500
 
+#define IBMVNIC_RESET_DELAY 100
+
 static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = {
 #define IBMVNIC_USE_SERVER_MAXES 0x1
        "use-server-maxes"
@@ -1076,7 +1079,8 @@ struct ibmvnic_adapter {
        spinlock_t rwi_lock;
        struct list_head rwi_list;
        struct work_struct ibmvnic_reset;
-       bool resetting;
+       struct delayed_work ibmvnic_delayed_reset;
+       unsigned long resetting;
        bool napi_enabled, from_passive_init;
 
        bool failover_pending;
index 71d3d88..be56e63 100644 (file)
@@ -607,6 +607,7 @@ static int e1000_set_ringparam(struct net_device *netdev,
        for (i = 0; i < adapter->num_rx_queues; i++)
                rxdr[i].count = rxdr->count;
 
+       err = 0;
        if (netif_running(adapter->netdev)) {
                /* Try to get new resources before deleting old */
                err = e1000_setup_all_rx_resources(adapter);
@@ -627,14 +628,13 @@ static int e1000_set_ringparam(struct net_device *netdev,
                adapter->rx_ring = rxdr;
                adapter->tx_ring = txdr;
                err = e1000_up(adapter);
-               if (err)
-                       goto err_setup;
        }
        kfree(tx_old);
        kfree(rx_old);
 
        clear_bit(__E1000_RESETTING, &adapter->flags);
-       return 0;
+       return err;
+
 err_setup_tx:
        e1000_free_all_rx_resources(adapter);
 err_setup_rx:
@@ -646,7 +646,6 @@ err_alloc_rx:
 err_alloc_tx:
        if (netif_running(adapter->netdev))
                e1000_up(adapter);
-err_setup:
        clear_bit(__E1000_RESETTING, &adapter->flags);
        return err;
 }
index 34cd679..6c51b1b 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/io.h>
 #include <linux/netdevice.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/crc32.h>
 #include <linux/if_vlan.h>
 #include <linux/timecounter.h>
index b1c3227..a05dfec 100644 (file)
@@ -157,11 +157,6 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
                err = i40e_queue_pair_enable(vsi, qid);
                if (err)
                        return err;
-
-               /* Kick start the NAPI context so that receiving will start */
-               err = i40e_xsk_wakeup(vsi->netdev, qid, XDP_WAKEUP_RX);
-               if (err)
-                       return err;
        }
 
        return 0;
index 3ec2ce0..8a6ef35 100644 (file)
@@ -466,7 +466,7 @@ static s32 igb_init_mac_params_82575(struct e1000_hw *hw)
                        ? igb_setup_copper_link_82575
                        : igb_setup_serdes_link_82575;
 
-       if (mac->type == e1000_82580) {
+       if (mac->type == e1000_82580 || mac->type == e1000_i350) {
                switch (hw->device_id) {
                /* feature not supported on these id's */
                case E1000_DEV_ID_DH89XXCC_SGMII:
index 105b062..9148c62 100644 (file)
@@ -753,7 +753,8 @@ u32 igb_rd32(struct e1000_hw *hw, u32 reg)
                struct net_device *netdev = igb->netdev;
                hw->hw_addr = NULL;
                netdev_err(netdev, "PCIe link lost\n");
-               WARN(1, "igb: Failed to read reg 0x%x!\n", reg);
+               WARN(pci_device_is_present(igb->pdev),
+                    "igb: Failed to read reg 0x%x!\n", reg);
        }
 
        return value;
@@ -2064,7 +2065,8 @@ static void igb_check_swap_media(struct igb_adapter *adapter)
        if ((hw->phy.media_type == e1000_media_type_copper) &&
            (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) {
                swap_now = true;
-       } else if (!(connsw & E1000_CONNSW_SERDESD)) {
+       } else if ((hw->phy.media_type != e1000_media_type_copper) &&
+                  !(connsw & E1000_CONNSW_SERDESD)) {
                /* copper signal takes time to appear */
                if (adapter->copper_tries < 4) {
                        adapter->copper_tries++;
@@ -2370,7 +2372,7 @@ void igb_reset(struct igb_adapter *adapter)
                adapter->ei.get_invariants(hw);
                adapter->flags &= ~IGB_FLAG_MEDIA_RESET;
        }
-       if ((mac->type == e1000_82575) &&
+       if ((mac->type == e1000_82575 || mac->type == e1000_i350) &&
            (adapter->flags & IGB_FLAG_MAS_ENABLE)) {
                igb_enable_mas(adapter);
        }
index 63b62d7..8e424df 100644 (file)
@@ -4047,7 +4047,8 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
                hw->hw_addr = NULL;
                netif_device_detach(netdev);
                netdev_err(netdev, "PCIe link lost, device now detached\n");
-               WARN(1, "igc: Failed to read reg 0x%x!\n", reg);
+               WARN(pci_device_is_present(igc->pdev),
+                    "igc: Failed to read reg 0x%x!\n", reg);
        }
 
        return value;
index 1ce2397..91b3780 100644 (file)
@@ -4310,7 +4310,6 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter)
                if (test_bit(__IXGBE_RX_FCOE, &rx_ring->state))
                        set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
 
-               clear_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state);
                if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY)
                        continue;
 
index 6d52cf5..25aa400 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
index c8425d3..e47783c 100644 (file)
@@ -160,16 +160,23 @@ static inline u32 mvneta_bm_pool_get_bp(struct mvneta_bm *priv,
                             (bm_pool->id << MVNETA_BM_POOL_ACCESS_OFFS));
 }
 #else
-void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
-                           struct mvneta_bm_pool *bm_pool, u8 port_map) {}
-void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
-                        u8 port_map) {}
-int mvneta_bm_construct(struct hwbm_pool *hwbm_pool, void *buf) { return 0; }
-int mvneta_bm_pool_refill(struct mvneta_bm *priv,
-                         struct mvneta_bm_pool *bm_pool) {return 0; }
-struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id,
-                                         enum mvneta_bm_type type, u8 port_id,
-                                         int pkt_size) { return NULL; }
+static inline void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
+                                         struct mvneta_bm_pool *bm_pool,
+                                         u8 port_map) {}
+static inline void mvneta_bm_bufs_free(struct mvneta_bm *priv,
+                                      struct mvneta_bm_pool *bm_pool,
+                                      u8 port_map) {}
+static inline int mvneta_bm_construct(struct hwbm_pool *hwbm_pool, void *buf)
+{ return 0; }
+static inline int mvneta_bm_pool_refill(struct mvneta_bm *priv,
+                                       struct mvneta_bm_pool *bm_pool)
+{ return 0; }
+static inline struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv,
+                                                       u8 pool_id,
+                                                       enum mvneta_bm_type type,
+                                                       u8 port_id,
+                                                       int pkt_size)
+{ return NULL; }
 
 static inline void mvneta_bm_pool_put_bp(struct mvneta_bm *priv,
                                         struct mvneta_bm_pool *bm_pool,
@@ -178,7 +185,8 @@ static inline void mvneta_bm_pool_put_bp(struct mvneta_bm *priv,
 static inline u32 mvneta_bm_pool_get_bp(struct mvneta_bm *priv,
                                        struct mvneta_bm_pool *bm_pool)
 { return 0; }
-struct mvneta_bm *mvneta_bm_get(struct device_node *node) { return NULL; }
-void mvneta_bm_put(struct mvneta_bm *priv) {}
+static inline struct mvneta_bm *mvneta_bm_get(struct device_node *node)
+{ return NULL; }
+static inline void mvneta_bm_put(struct mvneta_bm *priv) {}
 #endif /* CONFIG_MVNETA_BM */
 #endif
index 0a2ec38..095f6c7 100644 (file)
@@ -3108,7 +3108,7 @@ static struct sk_buff *skge_rx_get(struct net_device *dev,
        skb_put(skb, len);
 
        if (dev->features & NETIF_F_RXCSUM) {
-               skb->csum = csum;
+               skb->csum = le16_to_cpu(csum);
                skb->ip_summed = CHECKSUM_COMPLETE;
        }
 
index c610693..703adb9 100644 (file)
@@ -261,6 +261,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
                ge_mode = 0;
                switch (state->interface) {
                case PHY_INTERFACE_MODE_MII:
+               case PHY_INTERFACE_MODE_GMII:
                        ge_mode = 1;
                        break;
                case PHY_INTERFACE_MODE_REVMII:
index 4356f3a..1187ef1 100644 (file)
@@ -471,12 +471,31 @@ void mlx4_init_quotas(struct mlx4_dev *dev)
                priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf];
 }
 
-static int get_max_gauranteed_vfs_counter(struct mlx4_dev *dev)
+static int
+mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev,
+                                struct resource_allocator *res_alloc,
+                                int vf)
 {
-       /* reduce the sink counter */
-       return (dev->caps.max_counters - 1 -
-               (MLX4_PF_COUNTERS_PER_PORT * MLX4_MAX_PORTS))
-               / MLX4_MAX_PORTS;
+       struct mlx4_active_ports actv_ports;
+       int ports, counters_guaranteed;
+
+       /* For master, only allocate according to the number of phys ports */
+       if (vf == mlx4_master_func_num(dev))
+               return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports;
+
+       /* calculate real number of ports for the VF */
+       actv_ports = mlx4_get_active_ports(dev, vf);
+       ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
+       counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT;
+
+       /* If we do not have enough counters for this VF, do not
+        * allocate any for it. '-1' to reduce the sink counter.
+        */
+       if ((res_alloc->res_reserved + counters_guaranteed) >
+           (dev->caps.max_counters - 1))
+               return 0;
+
+       return counters_guaranteed;
 }
 
 int mlx4_init_resource_tracker(struct mlx4_dev *dev)
@@ -484,7 +503,6 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
        struct mlx4_priv *priv = mlx4_priv(dev);
        int i, j;
        int t;
-       int max_vfs_guarantee_counter = get_max_gauranteed_vfs_counter(dev);
 
        priv->mfunc.master.res_tracker.slave_list =
                kcalloc(dev->num_slaves, sizeof(struct slave_list),
@@ -603,16 +621,8 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
                                break;
                        case RES_COUNTER:
                                res_alloc->quota[t] = dev->caps.max_counters;
-                               if (t == mlx4_master_func_num(dev))
-                                       res_alloc->guaranteed[t] =
-                                               MLX4_PF_COUNTERS_PER_PORT *
-                                               MLX4_MAX_PORTS;
-                               else if (t <= max_vfs_guarantee_counter)
-                                       res_alloc->guaranteed[t] =
-                                               MLX4_VF_COUNTERS_PER_PORT *
-                                               MLX4_MAX_PORTS;
-                               else
-                                       res_alloc->guaranteed[t] = 0;
+                               res_alloc->guaranteed[t] =
+                                       mlx4_calc_res_counter_guaranteed(dev, res_alloc, t);
                                break;
                        default:
                                break;
index 0dba272..a1f20b2 100644 (file)
@@ -20,15 +20,15 @@ config MLX5_ACCEL
        bool
 
 config MLX5_FPGA
-        bool "Mellanox Technologies Innova support"
-        depends on MLX5_CORE
+       bool "Mellanox Technologies Innova support"
+       depends on MLX5_CORE
        select MLX5_ACCEL
-        ---help---
-          Build support for the Innova family of network cards by Mellanox
-          Technologies. Innova network cards are comprised of a ConnectX chip
-          and an FPGA chip on one board. If you select this option, the
-          mlx5_core driver will include the Innova FPGA core and allow building
-          sandbox-specific client drivers.
+       ---help---
+         Build support for the Innova family of network cards by Mellanox
+         Technologies. Innova network cards are comprised of a ConnectX chip
+         and an FPGA chip on one board. If you select this option, the
+         mlx5_core driver will include the Innova FPGA core and allow building
+         sandbox-specific client drivers.
 
 config MLX5_CORE_EN
        bool "Mellanox 5th generation network adapters (ConnectX series) Ethernet support"
@@ -58,14 +58,14 @@ config MLX5_EN_RXNFC
          API.
 
 config MLX5_MPFS
-        bool "Mellanox Technologies MLX5 MPFS support"
-        depends on MLX5_CORE_EN
+       bool "Mellanox Technologies MLX5 MPFS support"
+       depends on MLX5_CORE_EN
        default y
-        ---help---
+       ---help---
          Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS)
-          support in ConnectX NIC. MPFs is required for when multi-PF configuration
-          is enabled to allow passing user configured unicast MAC addresses to the
-          requesting PF.
+         support in ConnectX NIC. MPFs is required for when multi-PF configuration
+         is enabled to allow passing user configured unicast MAC addresses to the
+         requesting PF.
 
 config MLX5_ESWITCH
        bool "Mellanox Technologies MLX5 SRIOV E-Switch support"
@@ -73,10 +73,10 @@ config MLX5_ESWITCH
        default y
        ---help---
          Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC.
-          E-Switch provides internal SRIOV packet steering and switching for the
-          enabled VFs and PF in two available modes:
-                Legacy SRIOV mode (L2 mac vlan steering based).
-                Switchdev mode (eswitch offloads).
+         E-Switch provides internal SRIOV packet steering and switching for the
+         enabled VFs and PF in two available modes:
+               Legacy SRIOV mode (L2 mac vlan steering based).
+               Switchdev mode (eswitch offloads).
 
 config MLX5_CORE_EN_DCB
        bool "Data Center Bridging (DCB) Support"
index 8d76452..f1a7bc4 100644 (file)
@@ -345,7 +345,7 @@ struct mlx5e_tx_wqe_info {
        u8  num_wqebbs;
        u8  num_dma;
 #ifdef CONFIG_MLX5_EN_TLS
-       skb_frag_t *resync_dump_frag;
+       struct page *resync_dump_frag_page;
 #endif
 };
 
@@ -410,6 +410,7 @@ struct mlx5e_txqsq {
        struct device             *pdev;
        __be32                     mkey_be;
        unsigned long              state;
+       unsigned int               hw_mtu;
        struct hwtstamp_config    *tstamp;
        struct mlx5_clock         *clock;
 
index b3a249b..ac44bbe 100644 (file)
@@ -141,7 +141,7 @@ int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
                                    "Failed to create hv vhca stats agent, err = %ld\n",
                                    PTR_ERR(agent));
 
-               kfree(priv->stats_agent.buf);
+               kvfree(priv->stats_agent.buf);
                return IS_ERR_OR_NULL(agent);
        }
 
@@ -157,5 +157,5 @@ void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
                return;
 
        mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent);
-       kfree(priv->stats_agent.buf);
+       kvfree(priv->stats_agent.buf);
 }
index f8ee18b..13af725 100644 (file)
@@ -97,15 +97,19 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
        if (ret)
                return ret;
 
-       if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET)
+       if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
+               ip_rt_put(rt);
                return -ENETUNREACH;
+       }
 #else
        return -EOPNOTSUPP;
 #endif
 
        ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev);
-       if (ret < 0)
+       if (ret < 0) {
+               ip_rt_put(rt);
                return ret;
+       }
 
        if (!(*out_ttl))
                *out_ttl = ip4_dst_hoplimit(&rt->dst);
@@ -149,8 +153,10 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
                *out_ttl = ip6_dst_hoplimit(dst);
 
        ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev);
-       if (ret < 0)
+       if (ret < 0) {
+               dst_release(dst);
                return ret;
+       }
 #else
        return -EOPNOTSUPP;
 #endif
index 87be967..7c8796d 100644 (file)
 #else
 /* TLS offload requires additional stop_room for:
  *  - a resync SKB.
- * kTLS offload requires additional stop_room for:
- * - static params WQE,
- * - progress params WQE, and
- * - resync DUMP per frag.
+ * kTLS offload requires fixed additional stop_room for:
+ * - a static params WQE, and a progress params WQE.
+ * The additional MTU-depending room for the resync DUMP WQEs
+ * will be calculated and added in runtime.
  */
 #define MLX5E_SQ_TLS_ROOM  \
        (MLX5_SEND_WQE_MAX_WQEBBS + \
-        MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + \
-        MAX_SKB_FRAGS * MLX5E_KTLS_MAX_DUMP_WQEBBS)
+        MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS)
 #endif
 
 #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
@@ -92,7 +91,7 @@ mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
 
        /* fill sq frag edge with nops to avoid wqe wrapping two pages */
        for (; wi < edge_wi; wi++) {
-               wi->skb        = NULL;
+               memset(wi, 0, sizeof(*wi));
                wi->num_wqebbs = 1;
                mlx5e_post_nop(wq, sq->sqn, &sq->pc);
        }
index d2ff74d..46725cd 100644 (file)
@@ -38,7 +38,7 @@ static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk,
                return -ENOMEM;
 
        tx_priv->expected_seq = start_offload_tcp_sn;
-       tx_priv->crypto_info  = crypto_info;
+       tx_priv->crypto_info  = *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
        mlx5e_set_ktls_tx_priv_ctx(tls_ctx, tx_priv);
 
        /* tc and underlay_qpn values are not in use for tls tis */
index b7298f9..a3efa29 100644 (file)
         MLX5_ST_SZ_BYTES(tls_progress_params))
 #define MLX5E_KTLS_PROGRESS_WQEBBS \
        (DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB))
-#define MLX5E_KTLS_MAX_DUMP_WQEBBS 2
+
+struct mlx5e_dump_wqe {
+       struct mlx5_wqe_ctrl_seg ctrl;
+       struct mlx5_wqe_data_seg data;
+};
+
+#define MLX5E_KTLS_DUMP_WQEBBS \
+       (DIV_ROUND_UP(sizeof(struct mlx5e_dump_wqe), MLX5_SEND_WQE_BB))
 
 enum {
        MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD     = 0,
@@ -37,7 +44,7 @@ enum {
 
 struct mlx5e_ktls_offload_context_tx {
        struct tls_offload_context_tx *tx_ctx;
-       struct tls_crypto_info *crypto_info;
+       struct tls12_crypto_info_aes_gcm_128 crypto_info;
        u32 expected_seq;
        u32 tisn;
        u32 key_id;
@@ -86,14 +93,28 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
                                         struct mlx5e_tx_wqe **wqe, u16 *pi);
 void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
                                           struct mlx5e_tx_wqe_info *wi,
-                                          struct mlx5e_sq_dma *dma);
-
+                                          u32 *dma_fifo_cc);
+static inline u8
+mlx5e_ktls_dumps_num_wqebbs(struct mlx5e_txqsq *sq, unsigned int nfrags,
+                           unsigned int sync_len)
+{
+       /* Given the MTU and sync_len, calculates an upper bound for the
+        * number of WQEBBs needed for the TX resync DUMP WQEs of a record.
+        */
+       return MLX5E_KTLS_DUMP_WQEBBS *
+               (nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu));
+}
 #else
 
 static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
 {
 }
 
+static inline void
+mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+                                     struct mlx5e_tx_wqe_info *wi,
+                                     u32 *dma_fifo_cc) {}
+
 #endif
 
 #endif /* __MLX5E_TLS_H__ */
index d195366..778dab1 100644 (file)
@@ -24,17 +24,12 @@ enum {
 static void
 fill_static_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx)
 {
-       struct tls_crypto_info *crypto_info = priv_tx->crypto_info;
-       struct tls12_crypto_info_aes_gcm_128 *info;
+       struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info;
        char *initial_rn, *gcm_iv;
        u16 salt_sz, rec_seq_sz;
        char *salt, *rec_seq;
        u8 tls_version;
 
-       if (WARN_ON(crypto_info->cipher_type != TLS_CIPHER_AES_GCM_128))
-               return;
-
-       info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
        EXTRACT_INFO_FIELDS;
 
        gcm_iv      = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv);
@@ -108,16 +103,15 @@ build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn,
 }
 
 static void tx_fill_wi(struct mlx5e_txqsq *sq,
-                      u16 pi, u8 num_wqebbs,
-                      skb_frag_t *resync_dump_frag,
-                      u32 num_bytes)
+                      u16 pi, u8 num_wqebbs, u32 num_bytes,
+                      struct page *page)
 {
        struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi];
 
-       wi->skb              = NULL;
-       wi->num_wqebbs       = num_wqebbs;
-       wi->resync_dump_frag = resync_dump_frag;
-       wi->num_bytes        = num_bytes;
+       memset(wi, 0, sizeof(*wi));
+       wi->num_wqebbs = num_wqebbs;
+       wi->num_bytes  = num_bytes;
+       wi->resync_dump_frag_page = page;
 }
 
 void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx)
@@ -145,7 +139,7 @@ post_static_params(struct mlx5e_txqsq *sq,
 
        umr_wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_STATIC_UMR_WQE_SZ, &pi);
        build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence);
-       tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, NULL, 0);
+       tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, 0, NULL);
        sq->pc += MLX5E_KTLS_STATIC_WQEBBS;
 }
 
@@ -159,7 +153,7 @@ post_progress_params(struct mlx5e_txqsq *sq,
 
        wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_PROGRESS_WQE_SZ, &pi);
        build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence);
-       tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, NULL, 0);
+       tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, 0, NULL);
        sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS;
 }
 
@@ -169,6 +163,14 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
                              bool skip_static_post, bool fence_first_post)
 {
        bool progress_fence = skip_static_post || !fence_first_post;
+       struct mlx5_wq_cyc *wq = &sq->wq;
+       u16 contig_wqebbs_room, pi;
+
+       pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+       contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+       if (unlikely(contig_wqebbs_room <
+                    MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS))
+               mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 
        if (!skip_static_post)
                post_static_params(sq, priv_tx, fence_first_post);
@@ -180,29 +182,36 @@ struct tx_sync_info {
        u64 rcd_sn;
        s32 sync_len;
        int nr_frags;
-       skb_frag_t *frags[MAX_SKB_FRAGS];
+       skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+enum mlx5e_ktls_sync_retval {
+       MLX5E_KTLS_SYNC_DONE,
+       MLX5E_KTLS_SYNC_FAIL,
+       MLX5E_KTLS_SYNC_SKIP_NO_DATA,
 };
 
-static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
-                            u32 tcp_seq, struct tx_sync_info *info)
+static enum mlx5e_ktls_sync_retval
+tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
+                u32 tcp_seq, struct tx_sync_info *info)
 {
        struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx;
+       enum mlx5e_ktls_sync_retval ret = MLX5E_KTLS_SYNC_DONE;
        struct tls_record_info *record;
        int remaining, i = 0;
        unsigned long flags;
-       bool ret = true;
 
        spin_lock_irqsave(&tx_ctx->lock, flags);
        record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn);
 
        if (unlikely(!record)) {
-               ret = false;
+               ret = MLX5E_KTLS_SYNC_FAIL;
                goto out;
        }
 
        if (unlikely(tcp_seq < tls_record_start_seq(record))) {
-               if (!tls_record_is_start_marker(record))
-                       ret = false;
+               ret = tls_record_is_start_marker(record) ?
+                       MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL;
                goto out;
        }
 
@@ -211,13 +220,13 @@ static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
        while (remaining > 0) {
                skb_frag_t *frag = &record->frags[i];
 
-               __skb_frag_ref(frag);
+               get_page(skb_frag_page(frag));
                remaining -= skb_frag_size(frag);
-               info->frags[i++] = frag;
+               info->frags[i++] = *frag;
        }
        /* reduce the part which will be sent with the original SKB */
        if (remaining < 0)
-               skb_frag_size_add(info->frags[i - 1], remaining);
+               skb_frag_size_add(&info->frags[i - 1], remaining);
        info->nr_frags = i;
 out:
        spin_unlock_irqrestore(&tx_ctx->lock, flags);
@@ -229,17 +238,12 @@ tx_post_resync_params(struct mlx5e_txqsq *sq,
                      struct mlx5e_ktls_offload_context_tx *priv_tx,
                      u64 rcd_sn)
 {
-       struct tls_crypto_info *crypto_info = priv_tx->crypto_info;
-       struct tls12_crypto_info_aes_gcm_128 *info;
+       struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info;
        __be64 rn_be = cpu_to_be64(rcd_sn);
        bool skip_static_post;
        u16 rec_seq_sz;
        char *rec_seq;
 
-       if (WARN_ON(crypto_info->cipher_type != TLS_CIPHER_AES_GCM_128))
-               return;
-
-       info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
        rec_seq = info->rec_seq;
        rec_seq_sz = sizeof(info->rec_seq);
 
@@ -250,11 +254,6 @@ tx_post_resync_params(struct mlx5e_txqsq *sq,
        mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, skip_static_post, true);
 }
 
-struct mlx5e_dump_wqe {
-       struct mlx5_wqe_ctrl_seg ctrl;
-       struct mlx5_wqe_data_seg data;
-};
-
 static int
 tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool first)
 {
@@ -262,7 +261,6 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir
        struct mlx5_wqe_data_seg *dseg;
        struct mlx5e_dump_wqe *wqe;
        dma_addr_t dma_addr = 0;
-       u8  num_wqebbs;
        u16 ds_cnt;
        int fsz;
        u16 pi;
@@ -270,7 +268,6 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir
        wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
 
        ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
-       num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
 
        cseg = &wqe->ctrl;
        dseg = &wqe->data;
@@ -291,24 +288,27 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir
        dseg->byte_count = cpu_to_be32(fsz);
        mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
 
-       tx_fill_wi(sq, pi, num_wqebbs, frag, fsz);
-       sq->pc += num_wqebbs;
-
-       WARN(num_wqebbs > MLX5E_KTLS_MAX_DUMP_WQEBBS,
-            "unexpected DUMP num_wqebbs, %d > %d",
-            num_wqebbs, MLX5E_KTLS_MAX_DUMP_WQEBBS);
+       tx_fill_wi(sq, pi, MLX5E_KTLS_DUMP_WQEBBS, fsz, skb_frag_page(frag));
+       sq->pc += MLX5E_KTLS_DUMP_WQEBBS;
 
        return 0;
 }
 
 void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
                                           struct mlx5e_tx_wqe_info *wi,
-                                          struct mlx5e_sq_dma *dma)
+                                          u32 *dma_fifo_cc)
 {
-       struct mlx5e_sq_stats *stats = sq->stats;
+       struct mlx5e_sq_stats *stats;
+       struct mlx5e_sq_dma *dma;
+
+       if (!wi->resync_dump_frag_page)
+               return;
+
+       dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
+       stats = sq->stats;
 
        mlx5e_tx_dma_unmap(sq->pdev, dma);
-       __skb_frag_unref(wi->resync_dump_frag);
+       put_page(wi->resync_dump_frag_page);
        stats->tls_dump_packets++;
        stats->tls_dump_bytes += wi->num_bytes;
 }
@@ -318,25 +318,31 @@ static void tx_post_fence_nop(struct mlx5e_txqsq *sq)
        struct mlx5_wq_cyc *wq = &sq->wq;
        u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 
-       tx_fill_wi(sq, pi, 1, NULL, 0);
+       tx_fill_wi(sq, pi, 1, 0, NULL);
 
        mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc);
 }
 
-static struct sk_buff *
+static enum mlx5e_ktls_sync_retval
 mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
                         struct mlx5e_txqsq *sq,
-                        struct sk_buff *skb,
+                        int datalen,
                         u32 seq)
 {
        struct mlx5e_sq_stats *stats = sq->stats;
        struct mlx5_wq_cyc *wq = &sq->wq;
+       enum mlx5e_ktls_sync_retval ret;
        struct tx_sync_info info = {};
        u16 contig_wqebbs_room, pi;
        u8 num_wqebbs;
-       int i;
-
-       if (!tx_sync_info_get(priv_tx, seq, &info)) {
+       int i = 0;
+
+       ret = tx_sync_info_get(priv_tx, seq, &info);
+       if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) {
+               if (ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA) {
+                       stats->tls_skip_no_sync_data++;
+                       return MLX5E_KTLS_SYNC_SKIP_NO_DATA;
+               }
                /* We might get here if a retransmission reaches the driver
                 * after the relevant record is acked.
                 * It should be safe to drop the packet in this case
@@ -346,13 +352,8 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
        }
 
        if (unlikely(info.sync_len < 0)) {
-               u32 payload;
-               int headln;
-
-               headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
-               payload = skb->len - headln;
-               if (likely(payload <= -info.sync_len))
-                       return skb;
+               if (likely(datalen <= -info.sync_len))
+                       return MLX5E_KTLS_SYNC_DONE;
 
                stats->tls_drop_bypass_req++;
                goto err_out;
@@ -360,30 +361,62 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
 
        stats->tls_ooo++;
 
-       num_wqebbs = MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS +
-               (info.nr_frags ? info.nr_frags * MLX5E_KTLS_MAX_DUMP_WQEBBS : 1);
+       tx_post_resync_params(sq, priv_tx, info.rcd_sn);
+
+       /* If no dump WQE was sent, we need to have a fence NOP WQE before the
+        * actual data xmit.
+        */
+       if (!info.nr_frags) {
+               tx_post_fence_nop(sq);
+               return MLX5E_KTLS_SYNC_DONE;
+       }
+
+       num_wqebbs = mlx5e_ktls_dumps_num_wqebbs(sq, info.nr_frags, info.sync_len);
        pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
        contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+
        if (unlikely(contig_wqebbs_room < num_wqebbs))
                mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 
        tx_post_resync_params(sq, priv_tx, info.rcd_sn);
 
-       for (i = 0; i < info.nr_frags; i++)
-               if (tx_post_resync_dump(sq, info.frags[i], priv_tx->tisn, !i))
-                       goto err_out;
+       for (; i < info.nr_frags; i++) {
+               unsigned int orig_fsz, frag_offset = 0, n = 0;
+               skb_frag_t *f = &info.frags[i];
 
-       /* If no dump WQE was sent, we need to have a fence NOP WQE before the
-        * actual data xmit.
-        */
-       if (!info.nr_frags)
-               tx_post_fence_nop(sq);
+               orig_fsz = skb_frag_size(f);
 
-       return skb;
+               do {
+                       bool fence = !(i || frag_offset);
+                       unsigned int fsz;
+
+                       n++;
+                       fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - frag_offset);
+                       skb_frag_size_set(f, fsz);
+                       if (tx_post_resync_dump(sq, f, priv_tx->tisn, fence)) {
+                               page_ref_add(skb_frag_page(f), n - 1);
+                               goto err_out;
+                       }
+
+                       skb_frag_off_add(f, fsz);
+                       frag_offset += fsz;
+               } while (frag_offset < orig_fsz);
+
+               page_ref_add(skb_frag_page(f), n - 1);
+       }
+
+       return MLX5E_KTLS_SYNC_DONE;
 
 err_out:
-       dev_kfree_skb_any(skb);
-       return NULL;
+       for (; i < info.nr_frags; i++)
+               /* The put_page() here undoes the page ref obtained in tx_sync_info_get().
+                * Page refs obtained for the DUMP WQEs above (by page_ref_add) will be
+                * released only upon their completions (or in mlx5e_free_txqsq_descs,
+                * if channel closes).
+                */
+               put_page(skb_frag_page(&info.frags[i]));
+
+       return MLX5E_KTLS_SYNC_FAIL;
 }
 
 struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
@@ -419,10 +452,15 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
 
        seq = ntohl(tcp_hdr(skb)->seq);
        if (unlikely(priv_tx->expected_seq != seq)) {
-               skb = mlx5e_ktls_tx_handle_ooo(priv_tx, sq, skb, seq);
-               if (unlikely(!skb))
+               enum mlx5e_ktls_sync_retval ret =
+                       mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq);
+
+               if (likely(ret == MLX5E_KTLS_SYNC_DONE))
+                       *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
+               else if (ret == MLX5E_KTLS_SYNC_FAIL)
+                       goto err_out;
+               else /* ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA */
                        goto out;
-               *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
        }
 
        priv_tx->expected_seq = seq + datalen;
index c5a9c20..327c93a 100644 (file)
@@ -1021,7 +1021,7 @@ static bool ext_link_mode_requested(const unsigned long *adver)
 {
 #define MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT ETHTOOL_LINK_MODE_50000baseKR_Full_BIT
        int size = __ETHTOOL_LINK_MODE_MASK_NBITS - MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT;
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(modes);
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = {0,};
 
        bitmap_set(modes, MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT, size);
        return bitmap_intersects(modes, adver, __ETHTOOL_LINK_MODE_MASK_NBITS);
index eed7101..acd946f 100644 (file)
@@ -399,10 +399,10 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
                      struct mlx5_flow_table *ft,
                      struct ethtool_rx_flow_spec *fs)
 {
+       struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND };
        struct mlx5_flow_destination *dst = NULL;
-       struct mlx5_flow_act flow_act = {0};
-       struct mlx5_flow_spec *spec;
        struct mlx5_flow_handle *rule;
+       struct mlx5_flow_spec *spec;
        int err = 0;
 
        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
index 7569287..772bfdb 100644 (file)
@@ -1128,6 +1128,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
        sq->txq_ix    = txq_ix;
        sq->uar_map   = mdev->mlx5e_res.bfreg.map;
        sq->min_inline_mode = params->tx_min_inline_mode;
+       sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
        sq->stats     = &c->priv->channel_stats[c->ix].sq[tc];
        sq->stop_room = MLX5E_SQ_STOP_ROOM;
        INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
@@ -1135,10 +1136,14 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
                set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
        if (MLX5_IPSEC_DEV(c->priv->mdev))
                set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
+#ifdef CONFIG_MLX5_EN_TLS
        if (mlx5_accel_is_tls_device(c->priv->mdev)) {
                set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
-               sq->stop_room += MLX5E_SQ_TLS_ROOM;
+               sq->stop_room += MLX5E_SQ_TLS_ROOM +
+                       mlx5e_ktls_dumps_num_wqebbs(sq, MAX_SKB_FRAGS,
+                                                   TLS_MAX_PAYLOAD_SIZE);
        }
+#endif
 
        param->wq.db_numa_node = cpu_to_node(c->cpu);
        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1349,9 +1354,13 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
        /* last doorbell out, godspeed .. */
        if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
                u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+               struct mlx5e_tx_wqe_info *wi;
                struct mlx5e_tx_wqe *nop;
 
-               sq->db.wqe_info[pi].skb = NULL;
+               wi = &sq->db.wqe_info[pi];
+
+               memset(wi, 0, sizeof(*wi));
+               wi->num_wqebbs = 1;
                nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
                mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl);
        }
index 95892a3..cd9bb7c 100644 (file)
@@ -611,8 +611,8 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
 
        mutex_lock(&esw->offloads.encap_tbl_lock);
        encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
-       if (e->compl_result || (encap_connected == neigh_connected &&
-                               ether_addr_equal(e->h_dest, ha)))
+       if (e->compl_result < 0 || (encap_connected == neigh_connected &&
+                                   ether_addr_equal(e->h_dest, ha)))
                goto unlock;
 
        mlx5e_take_all_encap_flows(e, &flow_list);
index d6a5472..82cffb3 100644 (file)
@@ -1386,8 +1386,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
        if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
                return 0;
 
-       if (rq->cqd.left)
+       if (rq->cqd.left) {
                work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget);
+               if (rq->cqd.left || work_done >= budget)
+                       goto out;
+       }
 
        cqe = mlx5_cqwq_get_cqe(cqwq);
        if (!cqe) {
index 840ec94..bbff8d8 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/udp.h>
 #include <net/udp.h>
 #include "en.h"
+#include "en/port.h"
 
 enum {
        MLX5E_ST_LINK_STATE,
@@ -80,22 +81,12 @@ static int mlx5e_test_link_state(struct mlx5e_priv *priv)
 
 static int mlx5e_test_link_speed(struct mlx5e_priv *priv)
 {
-       u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-       u32 eth_proto_oper;
-       int i;
+       u32 speed;
 
        if (!netif_carrier_ok(priv->netdev))
                return 1;
 
-       if (mlx5_query_port_ptys(priv->mdev, out, sizeof(out), MLX5_PTYS_EN, 1))
-               return 1;
-
-       eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
-       for (i = 0; i < MLX5E_LINK_MODES_NUMBER; i++) {
-               if (eth_proto_oper & MLX5E_PROT_MASK(i))
-                       return 0;
-       }
-       return 1;
+       return mlx5e_port_linkspeed(priv->mdev, &speed);
 }
 
 struct mlx5ehdr {
index ac6fdcd..7e6ebd0 100644 (file)
@@ -52,11 +52,12 @@ static const struct counter_desc sw_stats_desc[] = {
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_skip_no_sync_data) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_no_sync_data) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_bypass_req) },
-       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) },
-       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) },
 #endif
 
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
@@ -288,11 +289,12 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
                        s->tx_tls_encrypted_bytes   += sq_stats->tls_encrypted_bytes;
                        s->tx_tls_ctx               += sq_stats->tls_ctx;
                        s->tx_tls_ooo               += sq_stats->tls_ooo;
+                       s->tx_tls_dump_bytes        += sq_stats->tls_dump_bytes;
+                       s->tx_tls_dump_packets      += sq_stats->tls_dump_packets;
                        s->tx_tls_resync_bytes      += sq_stats->tls_resync_bytes;
+                       s->tx_tls_skip_no_sync_data += sq_stats->tls_skip_no_sync_data;
                        s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data;
                        s->tx_tls_drop_bypass_req   += sq_stats->tls_drop_bypass_req;
-                       s->tx_tls_dump_bytes        += sq_stats->tls_dump_bytes;
-                       s->tx_tls_dump_packets      += sq_stats->tls_dump_packets;
 #endif
                        s->tx_cqes              += sq_stats->cqes;
                }
@@ -1472,10 +1474,12 @@ static const struct counter_desc sq_stats_desc[] = {
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
-       { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
-       { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
+       { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) },
+       { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) },
+       { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
+       { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
 #endif
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
index 79f261b..869f350 100644 (file)
@@ -129,11 +129,12 @@ struct mlx5e_sw_stats {
        u64 tx_tls_encrypted_bytes;
        u64 tx_tls_ctx;
        u64 tx_tls_ooo;
+       u64 tx_tls_dump_packets;
+       u64 tx_tls_dump_bytes;
        u64 tx_tls_resync_bytes;
+       u64 tx_tls_skip_no_sync_data;
        u64 tx_tls_drop_no_sync_data;
        u64 tx_tls_drop_bypass_req;
-       u64 tx_tls_dump_packets;
-       u64 tx_tls_dump_bytes;
 #endif
 
        u64 rx_xsk_packets;
@@ -273,11 +274,12 @@ struct mlx5e_sq_stats {
        u64 tls_encrypted_bytes;
        u64 tls_ctx;
        u64 tls_ooo;
+       u64 tls_dump_packets;
+       u64 tls_dump_bytes;
        u64 tls_resync_bytes;
+       u64 tls_skip_no_sync_data;
        u64 tls_drop_no_sync_data;
        u64 tls_drop_bypass_req;
-       u64 tls_dump_packets;
-       u64 tls_dump_bytes;
 #endif
        /* less likely accessed in data path */
        u64 csum_none;
index da7555f..fda0b37 100644 (file)
@@ -1278,8 +1278,10 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
        mlx5_eswitch_del_vlan_action(esw, attr);
 
        for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
-               if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
+               if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
                        mlx5e_detach_encap(priv, flow, out_index);
+                       kfree(attr->parse_attr->tun_info[out_index]);
+               }
        kvfree(attr->parse_attr);
 
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
@@ -1559,6 +1561,7 @@ static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entr
                        mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
        }
 
+       kfree(e->tun_info);
        kfree(e->encap_header);
        kfree_rcu(e, rcu);
 }
@@ -1664,46 +1667,63 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
                return err;
        }
 
-       if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
-               struct flow_match_ipv4_addrs match;
+       if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+               struct flow_match_control match;
+               u16 addr_type;
 
-               flow_rule_match_enc_ipv4_addrs(rule, &match);
-               MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-                        src_ipv4_src_ipv6.ipv4_layout.ipv4,
-                        ntohl(match.mask->src));
-               MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-                        src_ipv4_src_ipv6.ipv4_layout.ipv4,
-                        ntohl(match.key->src));
-
-               MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-                        dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
-                        ntohl(match.mask->dst));
-               MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-                        dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
-                        ntohl(match.key->dst));
-
-               MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
-               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
-       } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
-               struct flow_match_ipv6_addrs match;
+               flow_rule_match_enc_control(rule, &match);
+               addr_type = match.key->addr_type;
 
-               flow_rule_match_enc_ipv6_addrs(rule, &match);
-               memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
-                                   src_ipv4_src_ipv6.ipv6_layout.ipv6),
-                      &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
-               memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
-                                   src_ipv4_src_ipv6.ipv6_layout.ipv6),
-                      &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+               /* For tunnel addr_type used same key id`s as for non-tunnel */
+               if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+                       struct flow_match_ipv4_addrs match;
 
-               memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
-                                   dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-                      &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
-               memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
-                                   dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-                      &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+                       flow_rule_match_enc_ipv4_addrs(rule, &match);
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+                                src_ipv4_src_ipv6.ipv4_layout.ipv4,
+                                ntohl(match.mask->src));
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+                                src_ipv4_src_ipv6.ipv4_layout.ipv4,
+                                ntohl(match.key->src));
 
-               MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
-               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+                                dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+                                ntohl(match.mask->dst));
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+                                dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+                                ntohl(match.key->dst));
+
+                       MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+                                        ethertype);
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+                                ETH_P_IP);
+               } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+                       struct flow_match_ipv6_addrs match;
+
+                       flow_rule_match_enc_ipv6_addrs(rule, &match);
+                       memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+                                           src_ipv4_src_ipv6.ipv6_layout.ipv6),
+                              &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
+                                                                  ipv6));
+                       memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+                                           src_ipv4_src_ipv6.ipv6_layout.ipv6),
+                              &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
+                                                                 ipv6));
+
+                       memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+                                           dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+                              &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
+                                                                  ipv6));
+                       memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+                                           dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+                              &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
+                                                                 ipv6));
+
+                       MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+                                        ethertype);
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+                                ETH_P_IPV6);
+               }
        }
 
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
@@ -2955,6 +2975,13 @@ mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
        return NULL;
 }
 
+static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
+{
+       size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
+
+       return kmemdup(tun_info, tun_size, GFP_KERNEL);
+}
+
 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
                              struct mlx5e_tc_flow *flow,
                              struct net_device *mirred_dev,
@@ -3011,13 +3038,15 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
        refcount_set(&e->refcnt, 1);
        init_completion(&e->res_ready);
 
+       tun_info = dup_tun_info(tun_info);
+       if (!tun_info) {
+               err = -ENOMEM;
+               goto out_err_init;
+       }
        e->tun_info = tun_info;
        err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
-       if (err) {
-               kfree(e);
-               e = NULL;
-               goto out_err;
-       }
+       if (err)
+               goto out_err_init;
 
        INIT_LIST_HEAD(&e->flows);
        hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
@@ -3058,6 +3087,12 @@ out_err:
        if (e)
                mlx5e_encap_put(priv, e);
        return err;
+
+out_err_init:
+       mutex_unlock(&esw->offloads.encap_tbl_lock);
+       kfree(tun_info);
+       kfree(e);
+       return err;
 }
 
 static int parse_tc_vlan_action(struct mlx5e_priv *priv,
@@ -3143,7 +3178,7 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv,
                               struct mlx5_esw_flow_attr *attr,
                               u32 *action)
 {
-       int nest_level = vlan_get_encap_level(attr->parse_attr->filter_dev);
+       int nest_level = attr->parse_attr->filter_dev->lower_level;
        struct flow_action_entry vlan_act = {
                .id = FLOW_ACTION_VLAN_POP,
        };
@@ -3278,7 +3313,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                        } else if (encap) {
                                parse_attr->mirred_ifindex[attr->out_count] =
                                        out_dev->ifindex;
-                               parse_attr->tun_info[attr->out_count] = info;
+                               parse_attr->tun_info[attr->out_count] = dup_tun_info(info);
+                               if (!parse_attr->tun_info[attr->out_count])
+                                       return -ENOMEM;
                                encap = false;
                                attr->dests[attr->out_count].flags |=
                                        MLX5_ESW_DEST_ENCAP;
index d3a67a9..67dc4f0 100644 (file)
@@ -403,7 +403,10 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
                                 struct mlx5_err_cqe *err_cqe)
 {
-       u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq);
+       struct mlx5_cqwq *wq = &sq->cq.wq;
+       u32 ci;
+
+       ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1);
 
        netdev_err(sq->channel->netdev,
                   "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
@@ -479,14 +482,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
                        skb = wi->skb;
 
                        if (unlikely(!skb)) {
-#ifdef CONFIG_MLX5_EN_TLS
-                               if (wi->resync_dump_frag) {
-                                       struct mlx5e_sq_dma *dma =
-                                               mlx5e_dma_get(sq, dma_fifo_cc++);
-
-                                       mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma);
-                               }
-#endif
+                               mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
                                sqcc += wi->num_wqebbs;
                                continue;
                        }
@@ -542,29 +538,38 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
 {
        struct mlx5e_tx_wqe_info *wi;
        struct sk_buff *skb;
+       u32 dma_fifo_cc;
+       u16 sqcc;
        u16 ci;
        int i;
 
-       while (sq->cc != sq->pc) {
-               ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
+       sqcc = sq->cc;
+       dma_fifo_cc = sq->dma_fifo_cc;
+
+       while (sqcc != sq->pc) {
+               ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
                wi = &sq->db.wqe_info[ci];
                skb = wi->skb;
 
-               if (!skb) { /* nop */
-                       sq->cc++;
+               if (!skb) {
+                       mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
+                       sqcc += wi->num_wqebbs;
                        continue;
                }
 
                for (i = 0; i < wi->num_dma; i++) {
                        struct mlx5e_sq_dma *dma =
-                               mlx5e_dma_get(sq, sq->dma_fifo_cc++);
+                               mlx5e_dma_get(sq, dma_fifo_cc++);
 
                        mlx5e_tx_dma_unmap(sq->pdev, dma);
                }
 
                dev_kfree_skb_any(skb);
-               sq->cc += wi->num_wqebbs;
+               sqcc += wi->num_wqebbs;
        }
+
+       sq->dma_fifo_cc = dma_fifo_cc;
+       sq->cc = sqcc;
 }
 
 #ifdef CONFIG_MLX5_CORE_IPOIB
index 00d71db..369499e 100644 (file)
@@ -285,7 +285,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 
        mlx5_eswitch_set_rule_source_port(esw, spec, attr);
 
-       spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
        if (attr->outer_match_level != MLX5_MATCH_NONE)
                spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
 
index 1d55a32..7879e17 100644 (file)
@@ -177,22 +177,32 @@ mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src,
        memset(&src->vlan[1], 0, sizeof(src->vlan[1]));
 }
 
+static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw,
+                                               const struct mlx5_flow_spec *spec)
+{
+       u32 port_mask, port_value;
+
+       if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
+               return spec->flow_context.flow_source == MLX5_VPORT_UPLINK;
+
+       port_mask = MLX5_GET(fte_match_param, spec->match_criteria,
+                            misc_parameters.source_port);
+       port_value = MLX5_GET(fte_match_param, spec->match_value,
+                             misc_parameters.source_port);
+       return (port_mask & port_value & 0xffff) == MLX5_VPORT_UPLINK;
+}
+
 bool
 mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
                              struct mlx5_flow_act *flow_act,
                              struct mlx5_flow_spec *spec)
 {
-       u32 port_mask = MLX5_GET(fte_match_param, spec->match_criteria,
-                                misc_parameters.source_port);
-       u32 port_value = MLX5_GET(fte_match_param, spec->match_value,
-                                 misc_parameters.source_port);
-
        if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table))
                return false;
 
        /* push vlan on RX */
        return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) &&
-               ((port_mask & port_value) == MLX5_VPORT_UPLINK);
+               mlx5_eswitch_offload_is_uplink_port(esw, spec);
 }
 
 struct mlx5_flow_handle *
index 4c50efe..6102113 100644 (file)
@@ -464,8 +464,10 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
        }
 
        err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn);
-       if (err)
+       if (err) {
+               kvfree(in);
                goto err_cqwq;
+       }
 
        cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
        MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
index 579c306..3c816e8 100644 (file)
@@ -507,7 +507,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
                                MLX5_SET(dest_format_struct, in_dests,
                                         destination_eswitch_owner_vhca_id,
                                         dst->dest_attr.vport.vhca_id);
-                               if (extended_dest) {
+                               if (extended_dest &&
+                                   dst->dest_attr.vport.pkt_reformat) {
                                        MLX5_SET(dest_format_struct, in_dests,
                                                 packet_reformat,
                                                 !!(dst->dest_attr.vport.flags &
index d685122..c07f315 100644 (file)
@@ -572,7 +572,7 @@ mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
                return -ENOMEM;
        err = mlx5_crdump_collect(dev, cr_data);
        if (err)
-               return err;
+               goto free_data;
 
        if (priv_ctx) {
                struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
index 9648c22..e47dd7c 100644 (file)
@@ -1568,6 +1568,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
        { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF},   /* ConnectX Family mlx5Gen Virtual Function */
        { PCI_VDEVICE(MELLANOX, 0xa2d2) },                      /* BlueField integrated ConnectX-5 network controller */
        { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF},   /* BlueField integrated ConnectX-5 network controller VF */
+       { PCI_VDEVICE(MELLANOX, 0xa2d6) },                      /* BlueField-2 integrated ConnectX-6 Dx network controller */
        { 0, }
 };
 
index 9231b39..c501bf2 100644 (file)
@@ -112,17 +112,11 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
        u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
        u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)]   = {0};
        struct xarray *mkeys = &dev->priv.mkey_table;
-       struct mlx5_core_mkey *deleted_mkey;
        unsigned long flags;
 
        xa_lock_irqsave(mkeys, flags);
-       deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key));
+       __xa_erase(mkeys, mlx5_base_mkey(mkey->key));
        xa_unlock_irqrestore(mkeys, flags);
-       if (!deleted_mkey) {
-               mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n",
-                             mlx5_base_mkey(mkey->key));
-               return -ENOENT;
-       }
 
        MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
        MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
index 7d81a77..b74b7d0 100644 (file)
@@ -615,7 +615,7 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
                 * that recalculates the CS and forwards to the vport.
                 */
                ret = mlx5dr_domain_cache_get_recalc_cs_ft_addr(dest_action->vport.dmn,
-                                                               dest_action->vport.num,
+                                                               dest_action->vport.caps->num,
                                                                final_icm_addr);
                if (ret) {
                        mlx5dr_err(dmn, "Failed to get FW cs recalc flow table\n");
@@ -744,7 +744,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
                        dest_action = action;
                        if (rx_rule) {
                                /* Loopback on WIRE vport is not supported */
-                               if (action->vport.num == WIRE_PORT)
+                               if (action->vport.caps->num == WIRE_PORT)
                                        goto out_invalid_arg;
 
                                attr.final_icm_addr = action->vport.caps->icm_address_rx;
index 913f1e5..d7c7467 100644 (file)
@@ -137,7 +137,8 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool,
 
        icm_mr->icm_start_addr = icm_mr->dm.addr;
 
-       align_diff = icm_mr->icm_start_addr % align_base;
+       /* align_base is always a power of 2 */
+       align_diff = icm_mr->icm_start_addr & (align_base - 1);
        if (align_diff)
                icm_mr->used_length = align_base - align_diff;
 
index 01008cd..67dea76 100644 (file)
@@ -230,8 +230,7 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
                    (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
                     dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX)) {
                        ret = mlx5dr_ste_build_src_gvmi_qpn(&sb[idx++], &mask,
-                                                           &dmn->info.caps,
-                                                           inner, rx);
+                                                           dmn, inner, rx);
                        if (ret)
                                return ret;
                }
@@ -458,13 +457,11 @@ static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher)
 
        prev_matcher = NULL;
        if (next_matcher && !first)
-               prev_matcher = list_entry(next_matcher->matcher_list.prev,
-                                         struct mlx5dr_matcher,
-                                         matcher_list);
+               prev_matcher = list_prev_entry(next_matcher, matcher_list);
        else if (!first)
-               prev_matcher = list_entry(tbl->matcher_list.prev,
-                                         struct mlx5dr_matcher,
-                                         matcher_list);
+               prev_matcher = list_last_entry(&tbl->matcher_list,
+                                              struct mlx5dr_matcher,
+                                              matcher_list);
 
        if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
            dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
index 3bc3f66..e8b6560 100644 (file)
@@ -18,7 +18,7 @@ static int dr_rule_append_to_miss_list(struct mlx5dr_ste *new_last_ste,
        struct mlx5dr_ste *last_ste;
 
        /* The new entry will be inserted after the last */
-       last_ste = list_entry(miss_list->prev, struct mlx5dr_ste, miss_list_node);
+       last_ste = list_last_entry(miss_list, struct mlx5dr_ste, miss_list_node);
        WARN_ON(!last_ste);
 
        ste_info_last = kzalloc(sizeof(*ste_info_last), GFP_KERNEL);
@@ -788,12 +788,10 @@ again:
                         * it means that all the previous stes are the same,
                         * if so, this rule is duplicated.
                         */
-                       if (mlx5dr_ste_is_last_in_rule(nic_matcher,
-                                                      matched_ste->ste_chain_location)) {
-                               mlx5dr_info(dmn, "Duplicate rule inserted, aborting!!\n");
-                               return NULL;
-                       }
-                       return matched_ste;
+                       if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste_location))
+                               return matched_ste;
+
+                       mlx5dr_dbg(dmn, "Duplicate rule inserted\n");
                }
 
                if (!skip_rehash && dr_rule_need_enlarge_hash(cur_htbl, dmn, nic_dmn)) {
index 6b0af64..4efe1b0 100644 (file)
@@ -429,12 +429,9 @@ static void dr_ste_remove_middle_ste(struct mlx5dr_ste *ste,
        struct mlx5dr_ste *prev_ste;
        u64 miss_addr;
 
-       prev_ste = list_entry(mlx5dr_ste_get_miss_list(ste)->prev, struct mlx5dr_ste,
-                             miss_list_node);
-       if (!prev_ste) {
-               WARN_ON(true);
+       prev_ste = list_prev_entry(ste, miss_list_node);
+       if (WARN_ON(!prev_ste))
                return;
-       }
 
        miss_addr = mlx5dr_ste_get_miss_addr(ste->hw_ste);
        mlx5dr_ste_set_miss_addr(prev_ste->hw_ste, miss_addr);
@@ -461,8 +458,8 @@ void mlx5dr_ste_free(struct mlx5dr_ste *ste,
        struct mlx5dr_ste_htbl *stats_tbl;
        LIST_HEAD(send_ste_list);
 
-       first_ste = list_entry(mlx5dr_ste_get_miss_list(ste)->next,
-                              struct mlx5dr_ste, miss_list_node);
+       first_ste = list_first_entry(mlx5dr_ste_get_miss_list(ste),
+                                    struct mlx5dr_ste, miss_list_node);
        stats_tbl = first_ste->htbl;
 
        /* Two options:
@@ -479,8 +476,7 @@ void mlx5dr_ste_free(struct mlx5dr_ste *ste,
                if (last_ste == first_ste)
                        next_ste = NULL;
                else
-                       next_ste = list_entry(ste->miss_list_node.next,
-                                             struct mlx5dr_ste, miss_list_node);
+                       next_ste = list_next_entry(ste, miss_list_node);
 
                if (!next_ste) {
                        /* One and only entry in the list */
@@ -841,6 +837,8 @@ static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec)
        spec->source_sqn = MLX5_GET(fte_match_set_misc, mask, source_sqn);
 
        spec->source_port = MLX5_GET(fte_match_set_misc, mask, source_port);
+       spec->source_eswitch_owner_vhca_id = MLX5_GET(fte_match_set_misc, mask,
+                                                     source_eswitch_owner_vhca_id);
 
        spec->outer_second_prio = MLX5_GET(fte_match_set_misc, mask, outer_second_prio);
        spec->outer_second_cfi = MLX5_GET(fte_match_set_misc, mask, outer_second_cfi);
@@ -2254,11 +2252,18 @@ static int dr_ste_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value,
 {
        struct mlx5dr_match_misc *misc_mask = &value->misc;
 
-       if (misc_mask->source_port != 0xffff)
+       /* Partial misc source_port is not supported */
+       if (misc_mask->source_port && misc_mask->source_port != 0xffff)
+               return -EINVAL;
+
+       /* Partial misc source_eswitch_owner_vhca_id is not supported */
+       if (misc_mask->source_eswitch_owner_vhca_id &&
+           misc_mask->source_eswitch_owner_vhca_id != 0xffff)
                return -EINVAL;
 
        DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port);
        DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn);
+       misc_mask->source_eswitch_owner_vhca_id = 0;
 
        return 0;
 }
@@ -2270,17 +2275,33 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
        struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
        struct mlx5dr_match_misc *misc = &value->misc;
        struct mlx5dr_cmd_vport_cap *vport_cap;
+       struct mlx5dr_domain *dmn = sb->dmn;
+       struct mlx5dr_cmd_caps *caps;
        u8 *tag = hw_ste->tag;
 
        DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn);
 
-       vport_cap = mlx5dr_get_vport_cap(sb->caps, misc->source_port);
+       if (sb->vhca_id_valid) {
+               /* Find port GVMI based on the eswitch_owner_vhca_id */
+               if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi)
+                       caps = &dmn->info.caps;
+               else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id ==
+                                          dmn->peer_dmn->info.caps.gvmi))
+                       caps = &dmn->peer_dmn->info.caps;
+               else
+                       return -EINVAL;
+       } else {
+               caps = &dmn->info.caps;
+       }
+
+       vport_cap = mlx5dr_get_vport_cap(caps, misc->source_port);
        if (!vport_cap)
                return -EINVAL;
 
        if (vport_cap->vport_gvmi)
                MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi);
 
+       misc->source_eswitch_owner_vhca_id = 0;
        misc->source_port = 0;
 
        return 0;
@@ -2288,17 +2309,20 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
 
 int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
                                  struct mlx5dr_match_param *mask,
-                                 struct mlx5dr_cmd_caps *caps,
+                                 struct mlx5dr_domain *dmn,
                                  bool inner, bool rx)
 {
        int ret;
 
+       /* Set vhca_id_valid before we reset source_eswitch_owner_vhca_id */
+       sb->vhca_id_valid = mask->misc.source_eswitch_owner_vhca_id;
+
        ret = dr_ste_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask);
        if (ret)
                return ret;
 
        sb->rx = rx;
-       sb->caps = caps;
+       sb->dmn = dmn;
        sb->inner = inner;
        sb->lu_type = MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP;
        sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
index a37ee63..1cb3769 100644 (file)
@@ -180,6 +180,8 @@ void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
 struct mlx5dr_ste_build {
        u8 inner:1;
        u8 rx:1;
+       u8 vhca_id_valid:1;
+       struct mlx5dr_domain *dmn;
        struct mlx5dr_cmd_caps *caps;
        u8 lu_type;
        u16 byte_mask;
@@ -331,7 +333,7 @@ void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb,
                                 bool inner, bool rx);
 int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
                                  struct mlx5dr_match_param *mask,
-                                 struct mlx5dr_cmd_caps *caps,
+                                 struct mlx5dr_domain *dmn,
                                  bool inner, bool rx);
 void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx);
 
@@ -453,7 +455,7 @@ struct mlx5dr_match_misc {
        u32 gre_c_present:1;
        /* Source port.;0xffff determines wire port */
        u32 source_port:16;
-       u32 reserved_auto2:16;
+       u32 source_eswitch_owner_vhca_id:16;
        /* VLAN ID of first VLAN tag the inner header of the incoming packet.
         * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
         */
@@ -745,7 +747,6 @@ struct mlx5dr_action {
                struct {
                        struct mlx5dr_domain *dmn;
                        struct mlx5dr_cmd_vport_cap *caps;
-                       u32 num;
                } vport;
                struct {
                        u32 vlan_hdr; /* tpid_pcp_dei_vid */
index 14dcc78..4421ab2 100644 (file)
@@ -1186,7 +1186,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
        if (err)
                goto err_thermal_init;
 
-       if (mlxsw_driver->params_register && !reload)
+       if (mlxsw_driver->params_register)
                devlink_params_publish(devlink);
 
        return 0;
@@ -1259,7 +1259,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
                        return;
        }
 
-       if (mlxsw_core->driver->params_unregister && !reload)
+       if (mlxsw_core->driver->params_unregister)
                devlink_params_unpublish(devlink);
        mlxsw_thermal_fini(mlxsw_core->thermal);
        mlxsw_hwmon_fini(mlxsw_core->hwmon);
index dd234cf..dcf9562 100644 (file)
@@ -3771,6 +3771,14 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
                goto err_port_qdiscs_init;
        }
 
+       err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, 0, VLAN_N_VID - 1, false,
+                                    false);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to clear VLAN filter\n",
+                       mlxsw_sp_port->local_port);
+               goto err_port_vlan_clear;
+       }
+
        err = mlxsw_sp_port_nve_init(mlxsw_sp_port);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize NVE\n",
@@ -3818,6 +3826,7 @@ err_port_vlan_create:
 err_port_pvid_set:
        mlxsw_sp_port_nve_fini(mlxsw_sp_port);
 err_port_nve_init:
+err_port_vlan_clear:
        mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
 err_port_qdiscs_init:
        mlxsw_sp_port_fids_fini(mlxsw_sp_port);
index 0ad1a24..b607919 100644 (file)
@@ -21,6 +21,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
                                         struct netlink_ext_ack *extack)
 {
        const struct flow_action_entry *act;
+       int mirror_act_count = 0;
        int err, i;
 
        if (!flow_action_has_entries(flow_action))
@@ -105,6 +106,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
                case FLOW_ACTION_MIRRED: {
                        struct net_device *out_dev = act->dev;
 
+                       if (mirror_act_count++) {
+                               NL_SET_ERR_MSG_MOD(extack, "Multiple mirror actions per rule are not supported");
+                               return -EOPNOTSUPP;
+                       }
+
                        err = mlxsw_sp_acl_rulei_act_mirror(mlxsw_sp, rulei,
                                                            block, out_dev,
                                                            extack);
index 899450b..7c03b66 100644 (file)
@@ -99,6 +99,7 @@ static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port,
        devlink = priv_to_devlink(mlxsw_sp->core);
        in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core,
                                                           local_port);
+       skb_push(skb, ETH_HLEN);
        devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port);
        consume_skb(skb);
 }
index 4d1bce4..344539c 100644 (file)
@@ -261,8 +261,15 @@ static int ocelot_vlan_vid_add(struct net_device *dev, u16 vid, bool pvid,
                port->pvid = vid;
 
        /* Untagged egress vlan clasification */
-       if (untagged)
+       if (untagged && port->vid != vid) {
+               if (port->vid) {
+                       dev_err(ocelot->dev,
+                               "Port already has a native VLAN: %d\n",
+                               port->vid);
+                       return -EBUSY;
+               }
                port->vid = vid;
+       }
 
        ocelot_vlan_port_apply(ocelot, port);
 
@@ -934,7 +941,7 @@ end:
 static int ocelot_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
                                  u16 vid)
 {
-       return ocelot_vlan_vid_add(dev, vid, false, true);
+       return ocelot_vlan_vid_add(dev, vid, false, false);
 }
 
 static int ocelot_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
index b063eb7..aac1151 100644 (file)
@@ -388,13 +388,14 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
                        continue;
 
                phy = of_phy_find_device(phy_node);
+               of_node_put(phy_node);
                if (!phy)
                        continue;
 
                err = ocelot_probe_port(ocelot, port, regs, phy);
                if (err) {
                        of_node_put(portnp);
-                       return err;
+                       goto out_put_ports;
                }
 
                phy_mode = of_get_phy_mode(portnp);
@@ -422,7 +423,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
                                "invalid phy mode for port%d, (Q)SGMII only\n",
                                port);
                        of_node_put(portnp);
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto out_put_ports;
                }
 
                serdes = devm_of_phy_get(ocelot->dev, portnp, NULL);
@@ -435,7 +437,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
                                        "missing SerDes phys for port%d\n",
                                        port);
 
-                       goto err_probe_ports;
+                       of_node_put(portnp);
+                       goto out_put_ports;
                }
 
                ocelot->ports[port]->serdes = serdes;
@@ -447,9 +450,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
 
        dev_info(&pdev->dev, "Ocelot switch probed\n");
 
-       return 0;
-
-err_probe_ports:
+out_put_ports:
+       of_node_put(ports);
        return err;
 }
 
index 23ebddf..9f8a1f6 100644 (file)
@@ -176,8 +176,10 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink,
        u8 mask, val;
        int err;
 
-       if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack))
+       if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) {
+               err = -EOPNOTSUPP;
                goto err_delete;
+       }
 
        tos_off = proto == htons(ETH_P_IP) ? 16 : 20;
 
@@ -198,14 +200,18 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink,
                if ((iter->val & cmask) == (val & cmask) &&
                    iter->band != knode->res->classid) {
                        NL_SET_ERR_MSG_MOD(extack, "conflict with already offloaded filter");
+                       err = -EOPNOTSUPP;
                        goto err_delete;
                }
        }
 
        if (!match) {
                match = kzalloc(sizeof(*match), GFP_KERNEL);
-               if (!match)
-                       return -ENOMEM;
+               if (!match) {
+                       err = -ENOMEM;
+                       goto err_delete;
+               }
+
                list_add(&match->list, &alink->dscp_map);
        }
        match->handle = knode->handle;
@@ -221,7 +227,7 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink,
 
 err_delete:
        nfp_abm_u32_knode_delete(alink, knode);
-       return -EOPNOTSUPP;
+       return err;
 }
 
 static int nfp_abm_setup_tc_block_cb(enum tc_setup_type type,
index 7a20447..d8ad934 100644 (file)
@@ -400,6 +400,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
                repr_priv = kzalloc(sizeof(*repr_priv), GFP_KERNEL);
                if (!repr_priv) {
                        err = -ENOMEM;
+                       nfp_repr_free(repr);
                        goto err_reprs_clean;
                }
 
@@ -413,6 +414,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
                port = nfp_port_alloc(app, port_type, repr);
                if (IS_ERR(port)) {
                        err = PTR_ERR(port);
+                       kfree(repr_priv);
                        nfp_repr_free(repr);
                        goto err_reprs_clean;
                }
@@ -433,6 +435,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
                err = nfp_repr_init(app, repr,
                                    port_id, port, priv->nn->dp.netdev);
                if (err) {
+                       kfree(repr_priv);
                        nfp_port_free(port);
                        nfp_repr_free(repr);
                        goto err_reprs_clean;
@@ -515,6 +518,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
                repr_priv = kzalloc(sizeof(*repr_priv), GFP_KERNEL);
                if (!repr_priv) {
                        err = -ENOMEM;
+                       nfp_repr_free(repr);
                        goto err_reprs_clean;
                }
 
@@ -525,11 +529,13 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
                port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, repr);
                if (IS_ERR(port)) {
                        err = PTR_ERR(port);
+                       kfree(repr_priv);
                        nfp_repr_free(repr);
                        goto err_reprs_clean;
                }
                err = nfp_port_init_phy_port(app->pf, app, port, i);
                if (err) {
+                       kfree(repr_priv);
                        nfp_port_free(port);
                        nfp_repr_free(repr);
                        goto err_reprs_clean;
@@ -542,6 +548,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
                err = nfp_repr_init(app, repr,
                                    cmsg_port_id, port, priv->nn->dp.netdev);
                if (err) {
+                       kfree(repr_priv);
                        nfp_port_free(port);
                        nfp_repr_free(repr);
                        goto err_reprs_clean;
index 1eef446..79d72c8 100644 (file)
@@ -299,22 +299,6 @@ static void nfp_repr_clean(struct nfp_repr *repr)
        nfp_port_free(repr->port);
 }
 
-static struct lock_class_key nfp_repr_netdev_xmit_lock_key;
-static struct lock_class_key nfp_repr_netdev_addr_lock_key;
-
-static void nfp_repr_set_lockdep_class_one(struct net_device *dev,
-                                          struct netdev_queue *txq,
-                                          void *_unused)
-{
-       lockdep_set_class(&txq->_xmit_lock, &nfp_repr_netdev_xmit_lock_key);
-}
-
-static void nfp_repr_set_lockdep_class(struct net_device *dev)
-{
-       lockdep_set_class(&dev->addr_list_lock, &nfp_repr_netdev_addr_lock_key);
-       netdev_for_each_tx_queue(dev, nfp_repr_set_lockdep_class_one, NULL);
-}
-
 int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
                  u32 cmsg_port_id, struct nfp_port *port,
                  struct net_device *pf_netdev)
@@ -324,8 +308,6 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
        u32 repr_cap = nn->tlv_caps.repr_cap;
        int err;
 
-       nfp_repr_set_lockdep_class(netdev);
-
        repr->port = port;
        repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL);
        if (!repr->dst)
diff --git a/drivers/net/ethernet/netx-eth.c b/drivers/net/ethernet/netx-eth.c
deleted file mode 100644 (file)
index cf6e7eb..0000000
+++ /dev/null
@@ -1,497 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * drivers/net/ethernet/netx-eth.c
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-
-#include <linux/netdevice.h>
-#include <linux/platform_device.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/mii.h>
-
-#include <asm/io.h>
-#include <mach/hardware.h>
-#include <mach/netx-regs.h>
-#include <mach/pfifo.h>
-#include <mach/xc.h>
-#include <linux/platform_data/eth-netx.h>
-
-/* XC Fifo Offsets */
-#define EMPTY_PTR_FIFO(xcno)    (0 + ((xcno) << 3))    /* Index of the empty pointer FIFO */
-#define IND_FIFO_PORT_HI(xcno)  (1 + ((xcno) << 3))    /* Index of the FIFO where received */
-                                                       /* Data packages are indicated by XC */
-#define IND_FIFO_PORT_LO(xcno)  (2 + ((xcno) << 3))    /* Index of the FIFO where received */
-                                                       /* Data packages are indicated by XC */
-#define REQ_FIFO_PORT_HI(xcno)  (3 + ((xcno) << 3))    /* Index of the FIFO where Data packages */
-                                                       /* have to be indicated by ARM which */
-                                                       /* shall be sent */
-#define REQ_FIFO_PORT_LO(xcno)  (4 + ((xcno) << 3))    /* Index of the FIFO where Data packages */
-                                                       /* have to be indicated by ARM which shall */
-                                                       /* be sent */
-#define CON_FIFO_PORT_HI(xcno)  (5 + ((xcno) << 3))    /* Index of the FIFO where sent Data packages */
-                                                       /* are confirmed */
-#define CON_FIFO_PORT_LO(xcno)  (6 + ((xcno) << 3))    /* Index of the FIFO where sent Data */
-                                                       /* packages are confirmed */
-#define PFIFO_MASK(xcno)        (0x7f << (xcno*8))
-
-#define FIFO_PTR_FRAMELEN_SHIFT 0
-#define FIFO_PTR_FRAMELEN_MASK  (0x7ff << 0)
-#define FIFO_PTR_FRAMELEN(len)  (((len) << 0) & FIFO_PTR_FRAMELEN_MASK)
-#define FIFO_PTR_TIMETRIG       (1<<11)
-#define FIFO_PTR_MULTI_REQ
-#define FIFO_PTR_ORIGIN         (1<<14)
-#define FIFO_PTR_VLAN           (1<<15)
-#define FIFO_PTR_FRAMENO_SHIFT  16
-#define FIFO_PTR_FRAMENO_MASK   (0x3f << 16)
-#define FIFO_PTR_FRAMENO(no)    (((no) << 16) & FIFO_PTR_FRAMENO_MASK)
-#define FIFO_PTR_SEGMENT_SHIFT  22
-#define FIFO_PTR_SEGMENT_MASK   (0xf << 22)
-#define FIFO_PTR_SEGMENT(seg)   (((seg) & 0xf) << 22)
-#define FIFO_PTR_ERROR_SHIFT    28
-#define FIFO_PTR_ERROR_MASK     (0xf << 28)
-
-#define ISR_LINK_STATUS_CHANGE (1<<4)
-#define ISR_IND_LO             (1<<3)
-#define ISR_CON_LO             (1<<2)
-#define ISR_IND_HI             (1<<1)
-#define ISR_CON_HI             (1<<0)
-
-#define ETH_MAC_LOCAL_CONFIG 0x1560
-#define ETH_MAC_4321         0x1564
-#define ETH_MAC_65           0x1568
-
-#define MAC_TRAFFIC_CLASS_ARRANGEMENT_SHIFT 16
-#define MAC_TRAFFIC_CLASS_ARRANGEMENT_MASK (0xf<<MAC_TRAFFIC_CLASS_ARRANGEMENT_SHIFT)
-#define MAC_TRAFFIC_CLASS_ARRANGEMENT(x) (((x)<<MAC_TRAFFIC_CLASS_ARRANGEMENT_SHIFT) & MAC_TRAFFIC_CLASS_ARRANGEMENT_MASK)
-#define LOCAL_CONFIG_LINK_STATUS_IRQ_EN (1<<24)
-#define LOCAL_CONFIG_CON_LO_IRQ_EN (1<<23)
-#define LOCAL_CONFIG_CON_HI_IRQ_EN (1<<22)
-#define LOCAL_CONFIG_IND_LO_IRQ_EN (1<<21)
-#define LOCAL_CONFIG_IND_HI_IRQ_EN (1<<20)
-
-#define CARDNAME "netx-eth"
-
-/* LSB must be zero */
-#define INTERNAL_PHY_ADR 0x1c
-
-struct netx_eth_priv {
-       void                    __iomem *sram_base, *xpec_base, *xmac_base;
-       int                     id;
-       struct mii_if_info      mii;
-       u32                     msg_enable;
-       struct xc               *xc;
-       spinlock_t              lock;
-};
-
-static void netx_eth_set_multicast_list(struct net_device *ndev)
-{
-       /* implement me */
-}
-
-static int
-netx_eth_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
-{
-       struct netx_eth_priv *priv = netdev_priv(ndev);
-       unsigned char *buf = skb->data;
-       unsigned int len = skb->len;
-
-       spin_lock_irq(&priv->lock);
-       memcpy_toio(priv->sram_base + 1560, (void *)buf, len);
-       if (len < 60) {
-               memset_io(priv->sram_base + 1560 + len, 0, 60 - len);
-               len = 60;
-       }
-
-       pfifo_push(REQ_FIFO_PORT_LO(priv->id),
-                  FIFO_PTR_SEGMENT(priv->id) |
-                  FIFO_PTR_FRAMENO(1) |
-                  FIFO_PTR_FRAMELEN(len));
-
-       ndev->stats.tx_packets++;
-       ndev->stats.tx_bytes += skb->len;
-
-       netif_stop_queue(ndev);
-       spin_unlock_irq(&priv->lock);
-       dev_kfree_skb(skb);
-
-       return NETDEV_TX_OK;
-}
-
-static void netx_eth_receive(struct net_device *ndev)
-{
-       struct netx_eth_priv *priv = netdev_priv(ndev);
-       unsigned int val, frameno, seg, len;
-       unsigned char *data;
-       struct sk_buff *skb;
-
-       val = pfifo_pop(IND_FIFO_PORT_LO(priv->id));
-
-       frameno = (val & FIFO_PTR_FRAMENO_MASK) >> FIFO_PTR_FRAMENO_SHIFT;
-       seg = (val & FIFO_PTR_SEGMENT_MASK) >> FIFO_PTR_SEGMENT_SHIFT;
-       len = (val & FIFO_PTR_FRAMELEN_MASK) >> FIFO_PTR_FRAMELEN_SHIFT;
-
-       skb = netdev_alloc_skb(ndev, len);
-       if (unlikely(skb == NULL)) {
-               ndev->stats.rx_dropped++;
-               return;
-       }
-
-       data = skb_put(skb, len);
-
-       memcpy_fromio(data, priv->sram_base + frameno * 1560, len);
-
-       pfifo_push(EMPTY_PTR_FIFO(priv->id),
-               FIFO_PTR_SEGMENT(seg) | FIFO_PTR_FRAMENO(frameno));
-
-       skb->protocol = eth_type_trans(skb, ndev);
-       netif_rx(skb);
-       ndev->stats.rx_packets++;
-       ndev->stats.rx_bytes += len;
-}
-
-static irqreturn_t
-netx_eth_interrupt(int irq, void *dev_id)
-{
-       struct net_device *ndev = dev_id;
-       struct netx_eth_priv *priv = netdev_priv(ndev);
-       int status;
-       unsigned long flags;
-
-       spin_lock_irqsave(&priv->lock, flags);
-
-       status = readl(NETX_PFIFO_XPEC_ISR(priv->id));
-       while (status) {
-               int fill_level;
-               writel(status, NETX_PFIFO_XPEC_ISR(priv->id));
-
-               if ((status & ISR_CON_HI) || (status & ISR_IND_HI))
-                       printk("%s: unexpected status: 0x%08x\n",
-                           __func__, status);
-
-               fill_level =
-                   readl(NETX_PFIFO_FILL_LEVEL(IND_FIFO_PORT_LO(priv->id)));
-               while (fill_level--)
-                       netx_eth_receive(ndev);
-
-               if (status & ISR_CON_LO)
-                       netif_wake_queue(ndev);
-
-               if (status & ISR_LINK_STATUS_CHANGE)
-                       mii_check_media(&priv->mii, netif_msg_link(priv), 1);
-
-               status = readl(NETX_PFIFO_XPEC_ISR(priv->id));
-       }
-       spin_unlock_irqrestore(&priv->lock, flags);
-       return IRQ_HANDLED;
-}
-
-static int netx_eth_open(struct net_device *ndev)
-{
-       struct netx_eth_priv *priv = netdev_priv(ndev);
-
-       if (request_irq
-           (ndev->irq, netx_eth_interrupt, IRQF_SHARED, ndev->name, ndev))
-               return -EAGAIN;
-
-       writel(ndev->dev_addr[0] |
-              ndev->dev_addr[1]<<8 |
-              ndev->dev_addr[2]<<16 |
-              ndev->dev_addr[3]<<24,
-              priv->xpec_base + NETX_XPEC_RAM_START_OFS + ETH_MAC_4321);
-       writel(ndev->dev_addr[4] |
-              ndev->dev_addr[5]<<8,
-              priv->xpec_base + NETX_XPEC_RAM_START_OFS + ETH_MAC_65);
-
-       writel(LOCAL_CONFIG_LINK_STATUS_IRQ_EN |
-               LOCAL_CONFIG_CON_LO_IRQ_EN |
-               LOCAL_CONFIG_CON_HI_IRQ_EN |
-               LOCAL_CONFIG_IND_LO_IRQ_EN |
-               LOCAL_CONFIG_IND_HI_IRQ_EN,
-               priv->xpec_base + NETX_XPEC_RAM_START_OFS +
-               ETH_MAC_LOCAL_CONFIG);
-
-       mii_check_media(&priv->mii, netif_msg_link(priv), 1);
-       netif_start_queue(ndev);
-
-       return 0;
-}
-
-static int netx_eth_close(struct net_device *ndev)
-{
-       struct netx_eth_priv *priv = netdev_priv(ndev);
-
-       netif_stop_queue(ndev);
-
-       writel(0,
-           priv->xpec_base + NETX_XPEC_RAM_START_OFS + ETH_MAC_LOCAL_CONFIG);
-
-       free_irq(ndev->irq, ndev);
-
-       return 0;
-}
-
-static void netx_eth_timeout(struct net_device *ndev)
-{
-       struct netx_eth_priv *priv = netdev_priv(ndev);
-       int i;
-
-       printk(KERN_ERR "%s: transmit timed out, resetting\n", ndev->name);
-
-       spin_lock_irq(&priv->lock);
-
-       xc_reset(priv->xc);
-       xc_start(priv->xc);
-
-       for (i=2; i<=18; i++)
-               pfifo_push(EMPTY_PTR_FIFO(priv->id),
-                       FIFO_PTR_FRAMENO(i) | FIFO_PTR_SEGMENT(priv->id));
-
-       spin_unlock_irq(&priv->lock);
-
-       netif_wake_queue(ndev);
-}
-
-static int
-netx_eth_phy_read(struct net_device *ndev, int phy_id, int reg)
-{
-       unsigned int val;
-
-       val = MIIMU_SNRDY | MIIMU_PREAMBLE | MIIMU_PHYADDR(phy_id) |
-             MIIMU_REGADDR(reg) | MIIMU_PHY_NRES;
-
-       writel(val, NETX_MIIMU);
-       while (readl(NETX_MIIMU) & MIIMU_SNRDY);
-
-       return readl(NETX_MIIMU) >> 16;
-
-}
-
-static void
-netx_eth_phy_write(struct net_device *ndev, int phy_id, int reg, int value)
-{
-       unsigned int val;
-
-       val = MIIMU_SNRDY | MIIMU_PREAMBLE | MIIMU_PHYADDR(phy_id) |
-             MIIMU_REGADDR(reg) | MIIMU_PHY_NRES | MIIMU_OPMODE_WRITE |
-             MIIMU_DATA(value);
-
-       writel(val, NETX_MIIMU);
-       while (readl(NETX_MIIMU) & MIIMU_SNRDY);
-}
-
-static const struct net_device_ops netx_eth_netdev_ops = {
-       .ndo_open               = netx_eth_open,
-       .ndo_stop               = netx_eth_close,
-       .ndo_start_xmit         = netx_eth_hard_start_xmit,
-       .ndo_tx_timeout         = netx_eth_timeout,
-       .ndo_set_rx_mode        = netx_eth_set_multicast_list,
-       .ndo_validate_addr      = eth_validate_addr,
-       .ndo_set_mac_address    = eth_mac_addr,
-};
-
-static int netx_eth_enable(struct net_device *ndev)
-{
-       struct netx_eth_priv *priv = netdev_priv(ndev);
-       unsigned int mac4321, mac65;
-       int running, i, ret;
-       bool inv_mac_addr = false;
-
-       ndev->netdev_ops = &netx_eth_netdev_ops;
-       ndev->watchdog_timeo = msecs_to_jiffies(5000);
-
-       priv->msg_enable       = NETIF_MSG_LINK;
-       priv->mii.phy_id_mask  = 0x1f;
-       priv->mii.reg_num_mask = 0x1f;
-       priv->mii.force_media  = 0;
-       priv->mii.full_duplex  = 0;
-       priv->mii.dev        = ndev;
-       priv->mii.mdio_read    = netx_eth_phy_read;
-       priv->mii.mdio_write   = netx_eth_phy_write;
-       priv->mii.phy_id = INTERNAL_PHY_ADR + priv->id;
-
-       running = xc_running(priv->xc);
-       xc_stop(priv->xc);
-
-       /* if the xc engine is already running, assume the bootloader has
-        * loaded the firmware for us
-        */
-       if (running) {
-               /* get Node Address from hardware */
-               mac4321 = readl(priv->xpec_base +
-                       NETX_XPEC_RAM_START_OFS + ETH_MAC_4321);
-               mac65 = readl(priv->xpec_base +
-                       NETX_XPEC_RAM_START_OFS + ETH_MAC_65);
-
-               ndev->dev_addr[0] = mac4321 & 0xff;
-               ndev->dev_addr[1] = (mac4321 >> 8) & 0xff;
-               ndev->dev_addr[2] = (mac4321 >> 16) & 0xff;
-               ndev->dev_addr[3] = (mac4321 >> 24) & 0xff;
-               ndev->dev_addr[4] = mac65 & 0xff;
-               ndev->dev_addr[5] = (mac65 >> 8) & 0xff;
-       } else {
-               if (xc_request_firmware(priv->xc)) {
-                       printk(CARDNAME ": requesting firmware failed\n");
-                       return -ENODEV;
-               }
-       }
-
-       xc_reset(priv->xc);
-       xc_start(priv->xc);
-
-       if (!is_valid_ether_addr(ndev->dev_addr))
-               inv_mac_addr = true;
-
-       for (i=2; i<=18; i++)
-               pfifo_push(EMPTY_PTR_FIFO(priv->id),
-                       FIFO_PTR_FRAMENO(i) | FIFO_PTR_SEGMENT(priv->id));
-
-       ret = register_netdev(ndev);
-       if (inv_mac_addr)
-               printk("%s: Invalid ethernet MAC address. Please set using ip\n",
-                      ndev->name);
-
-       return ret;
-}
-
-static int netx_eth_drv_probe(struct platform_device *pdev)
-{
-       struct netx_eth_priv *priv;
-       struct net_device *ndev;
-       struct netxeth_platform_data *pdata;
-       int ret;
-
-       ndev = alloc_etherdev(sizeof (struct netx_eth_priv));
-       if (!ndev) {
-               ret = -ENOMEM;
-               goto exit;
-       }
-       SET_NETDEV_DEV(ndev, &pdev->dev);
-
-       platform_set_drvdata(pdev, ndev);
-
-       priv = netdev_priv(ndev);
-
-       pdata = dev_get_platdata(&pdev->dev);
-       priv->xc = request_xc(pdata->xcno, &pdev->dev);
-       if (!priv->xc) {
-               dev_err(&pdev->dev, "unable to request xc engine\n");
-               ret = -ENODEV;
-               goto exit_free_netdev;
-       }
-
-       ndev->irq = priv->xc->irq;
-       priv->id = pdev->id;
-       priv->xpec_base = priv->xc->xpec_base;
-       priv->xmac_base = priv->xc->xmac_base;
-       priv->sram_base = priv->xc->sram_base;
-
-       spin_lock_init(&priv->lock);
-
-       ret = pfifo_request(PFIFO_MASK(priv->id));
-       if (ret) {
-               printk("unable to request PFIFO\n");
-               goto exit_free_xc;
-       }
-
-       ret = netx_eth_enable(ndev);
-       if (ret)
-               goto exit_free_pfifo;
-
-       return 0;
-exit_free_pfifo:
-       pfifo_free(PFIFO_MASK(priv->id));
-exit_free_xc:
-       free_xc(priv->xc);
-exit_free_netdev:
-       free_netdev(ndev);
-exit:
-       return ret;
-}
-
-static int netx_eth_drv_remove(struct platform_device *pdev)
-{
-       struct net_device *ndev = platform_get_drvdata(pdev);
-       struct netx_eth_priv *priv = netdev_priv(ndev);
-
-       unregister_netdev(ndev);
-       xc_stop(priv->xc);
-       free_xc(priv->xc);
-       free_netdev(ndev);
-       pfifo_free(PFIFO_MASK(priv->id));
-
-       return 0;
-}
-
-static int netx_eth_drv_suspend(struct platform_device *pdev, pm_message_t state)
-{
-       dev_err(&pdev->dev, "suspend not implemented\n");
-       return 0;
-}
-
-static int netx_eth_drv_resume(struct platform_device *pdev)
-{
-       dev_err(&pdev->dev, "resume not implemented\n");
-       return 0;
-}
-
-static struct platform_driver netx_eth_driver = {
-       .probe          = netx_eth_drv_probe,
-       .remove         = netx_eth_drv_remove,
-       .suspend        = netx_eth_drv_suspend,
-       .resume         = netx_eth_drv_resume,
-       .driver         = {
-               .name   = CARDNAME,
-       },
-};
-
-static int __init netx_eth_init(void)
-{
-       unsigned int phy_control, val;
-
-       printk("NetX Ethernet driver\n");
-
-       phy_control = PHY_CONTROL_PHY_ADDRESS(INTERNAL_PHY_ADR>>1) |
-                     PHY_CONTROL_PHY1_MODE(PHY_MODE_ALL) |
-                     PHY_CONTROL_PHY1_AUTOMDIX |
-                     PHY_CONTROL_PHY1_EN |
-                     PHY_CONTROL_PHY0_MODE(PHY_MODE_ALL) |
-                     PHY_CONTROL_PHY0_AUTOMDIX |
-                     PHY_CONTROL_PHY0_EN |
-                     PHY_CONTROL_CLK_XLATIN;
-
-       val = readl(NETX_SYSTEM_IOC_ACCESS_KEY);
-       writel(val, NETX_SYSTEM_IOC_ACCESS_KEY);
-
-       writel(phy_control | PHY_CONTROL_RESET, NETX_SYSTEM_PHY_CONTROL);
-       udelay(100);
-
-       val = readl(NETX_SYSTEM_IOC_ACCESS_KEY);
-       writel(val, NETX_SYSTEM_IOC_ACCESS_KEY);
-
-       writel(phy_control, NETX_SYSTEM_PHY_CONTROL);
-
-       return platform_driver_register(&netx_eth_driver);
-}
-
-static void __exit netx_eth_cleanup(void)
-{
-       platform_driver_unregister(&netx_eth_driver);
-}
-
-module_init(netx_eth_init);
-module_exit(netx_eth_cleanup);
-
-MODULE_AUTHOR("Sascha Hauer, Pengutronix");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" CARDNAME);
-MODULE_FIRMWARE("xc0.bin");
-MODULE_FIRMWARE("xc1.bin");
-MODULE_FIRMWARE("xc2.bin");
index 0b384f9..2761f3a 100644 (file)
@@ -1347,7 +1347,7 @@ static int nixge_probe(struct platform_device *pdev)
        }
 
        priv->phy_mode = of_get_phy_mode(pdev->dev.of_node);
-       if (priv->phy_mode < 0) {
+       if ((int)priv->phy_mode < 0) {
                netdev_err(ndev, "not find \"phy-mode\" property\n");
                err = -EINVAL;
                goto unregister_mdio;
index 418afb8..ee83a71 100644 (file)
@@ -1,9 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config LPC_ENET
-        tristate "NXP ethernet MAC on LPC devices"
-        depends on ARCH_LPC32XX || COMPILE_TEST
-        select PHYLIB
-        help
+       tristate "NXP ethernet MAC on LPC devices"
+       depends on ARCH_LPC32XX || COMPILE_TEST
+       select PHYLIB
+       help
          Say Y or M here if you want to use the NXP ethernet MAC included on
          some NXP LPC devices. You can safely enable this option for LPC32xx
          SoC. Also available as a module.
index 141571e..544012a 100644 (file)
@@ -1356,9 +1356,6 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
        if (!is_valid_ether_addr(ndev->dev_addr))
                eth_hw_addr_random(ndev);
 
-       /* Reset the ethernet controller */
-       __lpc_eth_reset(pldat);
-
        /* then shut everything down to save power */
        __lpc_eth_shutdown(pldat);
 
index 5ea570b..d25b88f 100644 (file)
@@ -20,13 +20,14 @@ if NET_VENDOR_PENSANDO
 config IONIC
        tristate "Pensando Ethernet IONIC Support"
        depends on 64BIT && PCI
+       select NET_DEVLINK
        help
          This enables the support for the Pensando family of Ethernet
          adapters.  More specific information on this driver can be
          found in
          <file:Documentation/networking/device_drivers/pensando/ionic.rst>.
 
-          To compile this driver as a module, choose M here. The module
-          will be called ionic.
+         To compile this driver as a module, choose M here. The module
+         will be called ionic.
 
 endif # NET_VENDOR_PENSANDO
index 7afc4a3..bc03cec 100644 (file)
@@ -57,7 +57,7 @@ DEFINE_SHOW_ATTRIBUTE(identity);
 void ionic_debugfs_add_ident(struct ionic *ionic)
 {
        debugfs_create_file("identity", 0400, ionic->dentry,
-                           ionic, &identity_fops) ? 0 : -EOPNOTSUPP;
+                           ionic, &identity_fops);
 }
 
 void ionic_debugfs_add_sizes(struct ionic *ionic)
index db7c827..20faa8d 100644 (file)
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
 
+#include <linux/printk.h>
+#include <linux/dynamic_debug.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/rtnetlink.h>
@@ -1704,6 +1706,7 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
                                              GFP_KERNEL);
 
        if (!lif->rss_ind_tbl) {
+               err = -ENOMEM;
                dev_err(dev, "Failed to allocate rss indirection table, aborting\n");
                goto err_out_free_qcqs;
        }
index 812190e..6a95b42 100644 (file)
@@ -182,6 +182,8 @@ struct ionic_lif {
 
 #define lif_to_txqcq(lif, i)   ((lif)->txqcqs[i].qcq)
 #define lif_to_rxqcq(lif, i)   ((lif)->rxqcqs[i].qcq)
+#define lif_to_txstats(lif, i) ((lif)->txqcqs[i].stats->tx)
+#define lif_to_rxstats(lif, i) ((lif)->rxqcqs[i].stats->rx)
 #define lif_to_txq(lif, i)     (&lif_to_txqcq((lif), i)->q)
 #define lif_to_rxq(lif, i)     (&lif_to_txqcq((lif), i)->q)
 
index 15e4323..aab3114 100644 (file)
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
 
+#include <linux/printk.h>
+#include <linux/dynamic_debug.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/utsname.h>
index e290788..03916b6 100644 (file)
@@ -117,7 +117,8 @@ static u64 ionic_sw_stats_get_count(struct ionic_lif *lif)
        /* rx stats */
        total += MAX_Q(lif) * IONIC_NUM_RX_STATS;
 
-       if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+       if (test_bit(IONIC_LIF_UP, lif->state) &&
+           test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
                /* tx debug stats */
                total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS +
                                      IONIC_NUM_TX_Q_STATS +
@@ -149,7 +150,8 @@ static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf)
                        *buf += ETH_GSTRING_LEN;
                }
 
-               if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+               if (test_bit(IONIC_LIF_UP, lif->state) &&
+                   test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
                        for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
                                snprintf(*buf, ETH_GSTRING_LEN,
                                         "txq_%d_%s",
@@ -187,7 +189,8 @@ static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf)
                        *buf += ETH_GSTRING_LEN;
                }
 
-               if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+               if (test_bit(IONIC_LIF_UP, lif->state) &&
+                   test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
                        for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
                                snprintf(*buf, ETH_GSTRING_LEN,
                                         "rxq_%d_cq_%s",
@@ -223,6 +226,8 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
 {
        struct ionic_lif_sw_stats lif_stats;
        struct ionic_qcq *txqcq, *rxqcq;
+       struct ionic_tx_stats *txstats;
+       struct ionic_rx_stats *rxstats;
        int i, q_num;
 
        ionic_get_lif_stats(lif, &lif_stats);
@@ -233,15 +238,17 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
        }
 
        for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
-               txqcq = lif_to_txqcq(lif, q_num);
+               txstats = &lif_to_txstats(lif, q_num);
 
                for (i = 0; i < IONIC_NUM_TX_STATS; i++) {
-                       **buf = IONIC_READ_STAT64(&txqcq->stats->tx,
+                       **buf = IONIC_READ_STAT64(txstats,
                                                  &ionic_tx_stats_desc[i]);
                        (*buf)++;
                }
 
-               if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+               if (test_bit(IONIC_LIF_UP, lif->state) &&
+                   test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+                       txqcq = lif_to_txqcq(lif, q_num);
                        for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
                                **buf = IONIC_READ_STAT64(&txqcq->q,
                                                      &ionic_txq_stats_desc[i]);
@@ -258,22 +265,24 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
                                (*buf)++;
                        }
                        for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) {
-                               **buf = txqcq->stats->tx.sg_cntr[i];
+                               **buf = txstats->sg_cntr[i];
                                (*buf)++;
                        }
                }
        }
 
        for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
-               rxqcq = lif_to_rxqcq(lif, q_num);
+               rxstats = &lif_to_rxstats(lif, q_num);
 
                for (i = 0; i < IONIC_NUM_RX_STATS; i++) {
-                       **buf = IONIC_READ_STAT64(&rxqcq->stats->rx,
+                       **buf = IONIC_READ_STAT64(rxstats,
                                                  &ionic_rx_stats_desc[i]);
                        (*buf)++;
                }
 
-               if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+               if (test_bit(IONIC_LIF_UP, lif->state) &&
+                   test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+                       rxqcq = lif_to_rxqcq(lif, q_num);
                        for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
                                **buf = IONIC_READ_STAT64(&rxqcq->cq,
                                                   &ionic_dbg_cq_stats_desc[i]);
index 2ce7009..38f7f40 100644 (file)
 #define QED_ROCE_QPS                   (8192)
 #define QED_ROCE_DPIS                  (8)
 #define QED_RDMA_SRQS                   QED_ROCE_QPS
-#define QED_NVM_CFG_SET_FLAGS          0xE
-#define QED_NVM_CFG_SET_PF_FLAGS       0x1E
 #define QED_NVM_CFG_GET_FLAGS          0xA
 #define QED_NVM_CFG_GET_PF_FLAGS       0x1A
+#define QED_NVM_CFG_MAX_ATTRS          50
 
 static char version[] =
        "QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n";
@@ -2255,6 +2254,7 @@ static int qed_nvm_flash_cfg_write(struct qed_dev *cdev, const u8 **data)
 {
        struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
        u8 entity_id, len, buf[32];
+       bool need_nvm_init = true;
        struct qed_ptt *ptt;
        u16 cfg_id, count;
        int rc = 0, i;
@@ -2271,8 +2271,10 @@ static int qed_nvm_flash_cfg_write(struct qed_dev *cdev, const u8 **data)
 
        DP_VERBOSE(cdev, NETIF_MSG_DRV,
                   "Read config ids: num_attrs = %0d\n", count);
-       /* NVM CFG ID attributes */
-       for (i = 0; i < count; i++) {
+       /* NVM CFG ID attributes. Start loop index from 1 to avoid additional
+        * arithmetic operations in the implementation.
+        */
+       for (i = 1; i <= count; i++) {
                cfg_id = *((u16 *)*data);
                *data += 2;
                entity_id = **data;
@@ -2282,8 +2284,21 @@ static int qed_nvm_flash_cfg_write(struct qed_dev *cdev, const u8 **data)
                memcpy(buf, *data, len);
                *data += len;
 
-               flags = entity_id ? QED_NVM_CFG_SET_PF_FLAGS :
-                       QED_NVM_CFG_SET_FLAGS;
+               flags = 0;
+               if (need_nvm_init) {
+                       flags |= QED_NVM_CFG_OPTION_INIT;
+                       need_nvm_init = false;
+               }
+
+               /* Commit to flash and free the resources */
+               if (!(i % QED_NVM_CFG_MAX_ATTRS) || i == count) {
+                       flags |= QED_NVM_CFG_OPTION_COMMIT |
+                                QED_NVM_CFG_OPTION_FREE;
+                       need_nvm_init = true;
+               }
+
+               if (entity_id)
+                       flags |= QED_NVM_CFG_OPTION_ENTITY_SEL;
 
                DP_VERBOSE(cdev, NETIF_MSG_DRV,
                           "cfg_id = %d entity = %d len = %d\n", cfg_id,
index 78f77b7..dcb5c91 100644 (file)
@@ -2005,7 +2005,7 @@ static void qed_iov_vf_mbx_stop_vport(struct qed_hwfn *p_hwfn,
            (qed_iov_validate_active_txq(p_hwfn, vf))) {
                vf->b_malicious = true;
                DP_NOTICE(p_hwfn,
-                         "VF [%02x] - considered malicious; Unable to stop RX/TX queuess\n",
+                         "VF [%02x] - considered malicious; Unable to stop RX/TX queues\n",
                          vf->abs_vf_id);
                status = PFVF_STATUS_MALICIOUS;
                goto out;
index 0ae28f0..004c0bf 100644 (file)
@@ -779,8 +779,7 @@ qede_rx_build_skb(struct qede_dev *edev,
                        return NULL;
 
                skb_reserve(skb, pad);
-               memcpy(skb_put(skb, len),
-                      page_address(bd->data) + offset, len);
+               skb_put_data(skb, page_address(bd->data) + offset, len);
                qede_reuse_page(rxq, bd);
                goto out;
        }
index 4574448..b4b8ba0 100644 (file)
@@ -2787,6 +2787,7 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev)
                                netdev_err(qdev->ndev,
                                           "PCI mapping failed with error: %d\n",
                                           err);
+                               dev_kfree_skb_irq(skb);
                                ql_free_large_buffers(qdev);
                                return -ENOMEM;
                        }
index 0ef01db..5064c29 100644 (file)
@@ -28,7 +28,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
 #include <linux/prefetch.h>
-#include <linux/pci-aspm.h>
 #include <linux/ipv6.h>
 #include <net/ip6_checksum.h>
 
@@ -1030,6 +1029,10 @@ static int r8168dp_2_mdio_read(struct rtl8169_private *tp, int reg)
 {
        int value;
 
+       /* Work around issue with chip reporting wrong PHY ID */
+       if (reg == MII_PHYSID2)
+               return 0xc912;
+
        r8168dp_2_mdio_start(tp);
 
        value = r8169_mdio_read(tp, reg);
@@ -4147,6 +4150,14 @@ static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
        rtl_lock_config_regs(tp);
 }
 
+static void rtl_jumbo_config(struct rtl8169_private *tp, int mtu)
+{
+       if (mtu > ETH_DATA_LEN)
+               rtl_hw_jumbo_enable(tp);
+       else
+               rtl_hw_jumbo_disable(tp);
+}
+
 DECLARE_RTL_COND(rtl_chipcmd_cond)
 {
        return RTL_R8(tp, ChipCmd) & CmdReset;
@@ -4443,11 +4454,6 @@ static void rtl8168g_set_pause_thresholds(struct rtl8169_private *tp,
 static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
 {
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
-
-       if (tp->dev->mtu <= ETH_DATA_LEN) {
-               rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B |
-                                        PCI_EXP_DEVCTL_NOSNOOP_EN);
-       }
 }
 
 static void rtl_hw_start_8168bef(struct rtl8169_private *tp)
@@ -4463,9 +4469,6 @@ static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
 
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
-       if (tp->dev->mtu <= ETH_DATA_LEN)
-               rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
        rtl_disable_clock_request(tp);
 }
 
@@ -4491,9 +4494,6 @@ static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
        rtl_set_def_aspm_entry_latency(tp);
 
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
-
-       if (tp->dev->mtu <= ETH_DATA_LEN)
-               rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 }
 
 static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
@@ -4504,9 +4504,6 @@ static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
 
        /* Magic. */
        RTL_W8(tp, DBG_REG, 0x20);
-
-       if (tp->dev->mtu <= ETH_DATA_LEN)
-               rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 }
 
 static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
@@ -4612,9 +4609,6 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 
        rtl_ephy_init(tp, e_info_8168e_1);
 
-       if (tp->dev->mtu <= ETH_DATA_LEN)
-               rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
        rtl_disable_clock_request(tp);
 
        /* Reset tx FIFO pointer */
@@ -4637,9 +4631,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 
        rtl_ephy_init(tp, e_info_8168e_2);
 
-       if (tp->dev->mtu <= ETH_DATA_LEN)
-               rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
        rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
        rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
        rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06);
@@ -5486,6 +5477,8 @@ static void rtl_hw_start(struct  rtl8169_private *tp)
        rtl_set_rx_tx_desc_registers(tp);
        rtl_lock_config_regs(tp);
 
+       rtl_jumbo_config(tp, tp->dev->mtu);
+
        /* Initially a 10 us delay. Turned it into a PCI commit. - FR */
        RTL_R16(tp, CPlusCmd);
        RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
@@ -5499,10 +5492,7 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
 
-       if (new_mtu > ETH_DATA_LEN)
-               rtl_hw_jumbo_enable(tp);
-       else
-               rtl_hw_jumbo_disable(tp);
+       rtl_jumbo_config(tp, new_mtu);
 
        dev->mtu = new_mtu;
        netdev_update_features(dev);
index 1502fe8..f9e6744 100644 (file)
@@ -282,7 +282,6 @@ struct netsec_desc_ring {
        void *vaddr;
        u16 head, tail;
        u16 xdp_xmit; /* netsec_xdp_xmit packets */
-       bool is_xdp;
        struct page_pool *page_pool;
        struct xdp_rxq_info xdp_rxq;
        spinlock_t lock; /* XDP tx queue locking */
@@ -634,8 +633,7 @@ static bool netsec_clean_tx_dring(struct netsec_priv *priv)
        unsigned int bytes;
        int cnt = 0;
 
-       if (dring->is_xdp)
-               spin_lock(&dring->lock);
+       spin_lock(&dring->lock);
 
        bytes = 0;
        entry = dring->vaddr + DESC_SZ * tail;
@@ -682,8 +680,8 @@ next:
                entry = dring->vaddr + DESC_SZ * tail;
                cnt++;
        }
-       if (dring->is_xdp)
-               spin_unlock(&dring->lock);
+
+       spin_unlock(&dring->lock);
 
        if (!cnt)
                return false;
@@ -799,9 +797,6 @@ static void netsec_set_tx_de(struct netsec_priv *priv,
        de->data_buf_addr_lw = lower_32_bits(desc->dma_addr);
        de->buf_len_info = (tx_ctrl->tcp_seg_len << 16) | desc->len;
        de->attr = attr;
-       /* under spin_lock if using XDP */
-       if (!dring->is_xdp)
-               dma_wmb();
 
        dring->desc[idx] = *desc;
        if (desc->buf_type == TYPE_NETSEC_SKB)
@@ -1123,12 +1118,10 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
        u16 tso_seg_len = 0;
        int filled;
 
-       if (dring->is_xdp)
-               spin_lock_bh(&dring->lock);
+       spin_lock_bh(&dring->lock);
        filled = netsec_desc_used(dring);
        if (netsec_check_stop_tx(priv, filled)) {
-               if (dring->is_xdp)
-                       spin_unlock_bh(&dring->lock);
+               spin_unlock_bh(&dring->lock);
                net_warn_ratelimited("%s %s Tx queue full\n",
                                     dev_name(priv->dev), ndev->name);
                return NETDEV_TX_BUSY;
@@ -1161,8 +1154,7 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
        tx_desc.dma_addr = dma_map_single(priv->dev, skb->data,
                                          skb_headlen(skb), DMA_TO_DEVICE);
        if (dma_mapping_error(priv->dev, tx_desc.dma_addr)) {
-               if (dring->is_xdp)
-                       spin_unlock_bh(&dring->lock);
+               spin_unlock_bh(&dring->lock);
                netif_err(priv, drv, priv->ndev,
                          "%s: DMA mapping failed\n", __func__);
                ndev->stats.tx_dropped++;
@@ -1177,8 +1169,7 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
        netdev_sent_queue(priv->ndev, skb->len);
 
        netsec_set_tx_de(priv, dring, &tx_ctrl, &tx_desc, skb);
-       if (dring->is_xdp)
-               spin_unlock_bh(&dring->lock);
+       spin_unlock_bh(&dring->lock);
        netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, 1); /* submit another tx */
 
        return NETDEV_TX_OK;
@@ -1262,7 +1253,6 @@ err:
 static void netsec_setup_tx_dring(struct netsec_priv *priv)
 {
        struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX];
-       struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
        int i;
 
        for (i = 0; i < DESC_NUM; i++) {
@@ -1275,12 +1265,6 @@ static void netsec_setup_tx_dring(struct netsec_priv *priv)
                 */
                de->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD;
        }
-
-       if (xdp_prog)
-               dring->is_xdp = true;
-       else
-               dring->is_xdp = false;
-
 }
 
 static int netsec_setup_rx_dring(struct netsec_priv *priv)
@@ -2007,7 +1991,7 @@ static int netsec_probe(struct platform_device *pdev)
                           NETIF_MSG_LINK | NETIF_MSG_PROBE;
 
        priv->phy_interface = device_get_phy_mode(&pdev->dev);
-       if (priv->phy_interface < 0) {
+       if ((int)priv->phy_interface < 0) {
                dev_err(&pdev->dev, "missing required property 'phy-mode'\n");
                ret = -ENODEV;
                goto free_ndev;
index 10d0c3e..6e984d5 100644 (file)
@@ -1566,7 +1566,7 @@ static int ave_probe(struct platform_device *pdev)
 
        np = dev->of_node;
        phy_mode = of_get_phy_mode(np);
-       if (phy_mode < 0) {
+       if ((int)phy_mode < 0) {
                dev_err(dev, "phy-mode not found\n");
                return -EINVAL;
        }
@@ -1662,19 +1662,19 @@ static int ave_probe(struct platform_device *pdev)
                                               "socionext,syscon-phy-mode",
                                               1, 0, &args);
        if (ret) {
-               netdev_err(ndev, "can't get syscon-phy-mode property\n");
+               dev_err(dev, "can't get syscon-phy-mode property\n");
                goto out_free_netdev;
        }
        priv->regmap = syscon_node_to_regmap(args.np);
        of_node_put(args.np);
        if (IS_ERR(priv->regmap)) {
-               netdev_err(ndev, "can't map syscon-phy-mode\n");
+               dev_err(dev, "can't map syscon-phy-mode\n");
                ret = PTR_ERR(priv->regmap);
                goto out_free_netdev;
        }
        ret = priv->data->get_pinmode(priv, phy_mode, args.args[0]);
        if (ret) {
-               netdev_err(ndev, "invalid phy-mode setting\n");
+               dev_err(dev, "invalid phy-mode setting\n");
                goto out_free_netdev;
        }
 
index 2c6d7c6..0d21082 100644 (file)
@@ -191,7 +191,7 @@ static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac)
        struct device *dev = &gmac->pdev->dev;
 
        gmac->phy_mode = of_get_phy_mode(dev->of_node);
-       if (gmac->phy_mode < 0) {
+       if ((int)gmac->phy_mode < 0) {
                dev_err(dev, "missing phy mode property\n");
                return -EINVAL;
        }
index 9cda29e..306da8f 100644 (file)
@@ -339,7 +339,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 
        dwmac->dev = &pdev->dev;
        dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node);
-       if (dwmac->phy_mode < 0) {
+       if ((int)dwmac->phy_mode < 0) {
                dev_err(&pdev->dev, "missing phy-mode property\n");
                ret = -EINVAL;
                goto err_remove_config_dt;
index f97a409..ddcc191 100644 (file)
@@ -651,7 +651,8 @@ static void sun8i_dwmac_set_filter(struct mac_device_info *hw,
                        }
                }
        } else {
-               netdev_info(dev, "Too many address, switching to promiscuous\n");
+               if (!(readl(ioaddr + EMAC_RX_FRM_FLT) & EMAC_FRM_FLT_RXALL))
+                       netdev_info(dev, "Too many address, switching to promiscuous\n");
                v = EMAC_FRM_FLT_RXALL;
        }
 
index 9b4b5f6..5a7b0ac 100644 (file)
@@ -401,8 +401,11 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
        int numhashregs = (hw->multicast_filter_bins >> 5);
        int mcbitslog2 = hw->mcast_bits_log2;
        unsigned int value;
+       u32 mc_filter[8];
        int i;
 
+       memset(mc_filter, 0, sizeof(mc_filter));
+
        value = readl(ioaddr + GMAC_PACKET_FILTER);
        value &= ~GMAC_PACKET_FILTER_HMC;
        value &= ~GMAC_PACKET_FILTER_HPF;
@@ -416,16 +419,13 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
                /* Pass all multi */
                value |= GMAC_PACKET_FILTER_PM;
                /* Set all the bits of the HASH tab */
-               for (i = 0; i < numhashregs; i++)
-                       writel(0xffffffff, ioaddr + GMAC_HASH_TAB(i));
+               memset(mc_filter, 0xff, sizeof(mc_filter));
        } else if (!netdev_mc_empty(dev)) {
                struct netdev_hw_addr *ha;
-               u32 mc_filter[8];
 
                /* Hash filter for multicast */
                value |= GMAC_PACKET_FILTER_HMC;
 
-               memset(mc_filter, 0, sizeof(mc_filter));
                netdev_for_each_mc_addr(ha, dev) {
                        /* The upper n bits of the calculated CRC are used to
                         * index the contents of the hash table. The number of
@@ -440,14 +440,15 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
                         */
                        mc_filter[bit_nr >> 5] |= (1 << (bit_nr & 0x1f));
                }
-               for (i = 0; i < numhashregs; i++)
-                       writel(mc_filter[i], ioaddr + GMAC_HASH_TAB(i));
        }
 
+       for (i = 0; i < numhashregs; i++)
+               writel(mc_filter[i], ioaddr + GMAC_HASH_TAB(i));
+
        value |= GMAC_PACKET_FILTER_HPF;
 
        /* Handle multiple unicast addresses */
-       if (netdev_uc_count(dev) > GMAC_MAX_PERFECT_ADDRESSES) {
+       if (netdev_uc_count(dev) > hw->unicast_filter_entries) {
                /* Switch to promiscuous mode if more than 128 addrs
                 * are required
                 */
index 3f4f313..e436fa1 100644 (file)
@@ -515,6 +515,7 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
 
        if (!enable) {
                val |= PPSCMDx(index, 0x5);
+               val |= PPSEN0;
                writel(val, ioaddr + MAC_PPS_CONTROL);
                return 0;
        }
index 5923ca6..9903738 100644 (file)
@@ -84,7 +84,7 @@
 #define XGMAC_TSIE                     BIT(12)
 #define XGMAC_LPIIE                    BIT(5)
 #define XGMAC_PMTIE                    BIT(4)
-#define XGMAC_INT_DEFAULT_EN           (XGMAC_LPIIE | XGMAC_PMTIE | XGMAC_TSIE)
+#define XGMAC_INT_DEFAULT_EN           (XGMAC_LPIIE | XGMAC_PMTIE)
 #define XGMAC_Qx_TX_FLOW_CTRL(x)       (0x00000070 + (x) * 4)
 #define XGMAC_PT                       GENMASK(31, 16)
 #define XGMAC_PT_SHIFT                 16
 #define XGMAC_HWFEAT_GMIISEL           BIT(1)
 #define XGMAC_HW_FEATURE1              0x00000120
 #define XGMAC_HWFEAT_L3L4FNUM          GENMASK(30, 27)
+#define XGMAC_HWFEAT_HASHTBLSZ         GENMASK(25, 24)
 #define XGMAC_HWFEAT_RSSEN             BIT(20)
 #define XGMAC_HWFEAT_TSOEN             BIT(18)
 #define XGMAC_HWFEAT_SPHEN             BIT(17)
index d5173dd..5031398 100644 (file)
@@ -472,7 +472,7 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw,
        dwxgmac2_set_mchash(ioaddr, mc_filter, mcbitslog2);
 
        /* Handle multiple unicast addresses */
-       if (netdev_uc_count(dev) > XGMAC_ADDR_MAX) {
+       if (netdev_uc_count(dev) > hw->unicast_filter_entries) {
                value |= XGMAC_FILTER_PR;
        } else {
                struct netdev_hw_addr *ha;
@@ -523,19 +523,19 @@ static int dwxgmac2_rss_configure(struct mac_device_info *hw,
                                  struct stmmac_rss *cfg, u32 num_rxq)
 {
        void __iomem *ioaddr = hw->pcsr;
-       u32 *key = (u32 *)cfg->key;
+       u32 value, *key;
        int i, ret;
-       u32 value;
 
        value = readl(ioaddr + XGMAC_RSS_CTRL);
-       if (!cfg->enable) {
+       if (!cfg || !cfg->enable) {
                value &= ~XGMAC_RSSE;
                writel(value, ioaddr + XGMAC_RSS_CTRL);
                return 0;
        }
 
-       for (i = 0; i < (sizeof(cfg->key) / sizeof(u32)); i++) {
-               ret = dwxgmac2_rss_write_reg(ioaddr, true, i, *key++);
+       key = (u32 *)cfg->key;
+       for (i = 0; i < (ARRAY_SIZE(cfg->key) / sizeof(u32)); i++) {
+               ret = dwxgmac2_rss_write_reg(ioaddr, true, i, key[i]);
                if (ret)
                        return ret;
        }
index 53c4a40..965cbe3 100644 (file)
@@ -380,6 +380,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
        /* MAC HW feature 1 */
        hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1);
        dma_cap->l3l4fnum = (hw_cap & XGMAC_HWFEAT_L3L4FNUM) >> 27;
+       dma_cap->hash_tb_sz = (hw_cap & XGMAC_HWFEAT_HASHTBLSZ) >> 24;
        dma_cap->rssen = (hw_cap & XGMAC_HWFEAT_RSSEN) >> 20;
        dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18;
        dma_cap->sphen = (hw_cap & XGMAC_HWFEAT_SPHEN) >> 17;
index a6cb2aa..4e9c848 100644 (file)
@@ -629,6 +629,7 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
                        config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
                        ptp_v2 = PTP_TCR_TSVER2ENA;
                        snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+                       ts_event_en = PTP_TCR_TSEVNTENA;
                        ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
                        ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
                        ptp_over_ethernet = PTP_TCR_TSIPENA;
@@ -1557,13 +1558,15 @@ static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
        for (queue = 0; queue < rx_count; queue++) {
                struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
                struct page_pool_params pp_params = { 0 };
+               unsigned int num_pages;
 
                rx_q->queue_index = queue;
                rx_q->priv_data = priv;
 
                pp_params.flags = PP_FLAG_DMA_MAP;
                pp_params.pool_size = DMA_RX_SIZE;
-               pp_params.order = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE);
+               num_pages = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE);
+               pp_params.order = ilog2(num_pages);
                pp_params.nid = dev_to_node(priv->device);
                pp_params.dev = priv->device;
                pp_params.dma_dir = DMA_FROM_DEVICE;
@@ -2607,7 +2610,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        }
 
        if (priv->hw->pcs)
-               stmmac_pcs_ctrl_ane(priv, priv->hw, 1, priv->hw->ps, 0);
+               stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 1, priv->hw->ps, 0);
 
        /* set TX and RX rings length */
        stmmac_set_rings_length(priv);
@@ -2992,6 +2995,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
        } else {
                stmmac_set_desc_addr(priv, first, des);
                tmp_pay_len = pay_len;
+               des += proto_hdr_len;
        }
 
        stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue);
@@ -4713,11 +4717,9 @@ int stmmac_suspend(struct device *dev)
        if (!ndev || !netif_running(ndev))
                return 0;
 
-       mutex_lock(&priv->lock);
+       phylink_mac_change(priv->phylink, false);
 
-       rtnl_lock();
-       phylink_stop(priv->phylink);
-       rtnl_unlock();
+       mutex_lock(&priv->lock);
 
        netif_device_detach(ndev);
        stmmac_stop_all_queues(priv);
@@ -4732,11 +4734,19 @@ int stmmac_suspend(struct device *dev)
                stmmac_pmt(priv, priv->hw, priv->wolopts);
                priv->irq_wake = 1;
        } else {
+               mutex_unlock(&priv->lock);
+               rtnl_lock();
+               phylink_stop(priv->phylink);
+               rtnl_unlock();
+               mutex_lock(&priv->lock);
+
                stmmac_mac_set(priv, priv->ioaddr, false);
                pinctrl_pm_select_sleep_state(priv->device);
                /* Disable clock in case of PWM is off */
-               clk_disable(priv->plat->pclk);
-               clk_disable(priv->plat->stmmac_clk);
+               if (priv->plat->clk_ptp_ref)
+                       clk_disable_unprepare(priv->plat->clk_ptp_ref);
+               clk_disable_unprepare(priv->plat->pclk);
+               clk_disable_unprepare(priv->plat->stmmac_clk);
        }
        mutex_unlock(&priv->lock);
 
@@ -4799,8 +4809,10 @@ int stmmac_resume(struct device *dev)
        } else {
                pinctrl_pm_select_default_state(priv->device);
                /* enable the clk previously disabled */
-               clk_enable(priv->plat->stmmac_clk);
-               clk_enable(priv->plat->pclk);
+               clk_prepare_enable(priv->plat->stmmac_clk);
+               clk_prepare_enable(priv->plat->pclk);
+               if (priv->plat->clk_ptp_ref)
+                       clk_prepare_enable(priv->plat->clk_ptp_ref);
                /* reset the phy so that it's ready */
                if (priv->mii)
                        stmmac_mdio_reset(priv->mii);
@@ -4822,12 +4834,16 @@ int stmmac_resume(struct device *dev)
 
        stmmac_start_all_queues(priv);
 
-       rtnl_lock();
-       phylink_start(priv->phylink);
-       rtnl_unlock();
-
        mutex_unlock(&priv->lock);
 
+       if (!device_may_wakeup(priv->device)) {
+               rtnl_lock();
+               phylink_start(priv->phylink);
+               rtnl_unlock();
+       }
+
+       phylink_mac_change(priv->phylink, true);
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(stmmac_resume);
index 173493d..df638b1 100644 (file)
@@ -164,7 +164,7 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
 /* structure describing a PTP hardware clock */
 static struct ptp_clock_info stmmac_ptp_clock_ops = {
        .owner = THIS_MODULE,
-       .name = "stmmac_ptp_clock",
+       .name = "stmmac ptp",
        .max_adj = 62500000,
        .n_alarm = 0,
        .n_ext_ts = 0,
index c56e89e..e4ac3c4 100644 (file)
@@ -487,8 +487,8 @@ static int stmmac_filter_check(struct stmmac_priv *priv)
 
 static int stmmac_test_hfilt(struct stmmac_priv *priv)
 {
-       unsigned char gd_addr[ETH_ALEN] = {0x01, 0x00, 0xcc, 0xcc, 0xdd, 0xdd};
-       unsigned char bd_addr[ETH_ALEN] = {0x09, 0x00, 0xaa, 0xaa, 0xbb, 0xbb};
+       unsigned char gd_addr[ETH_ALEN] = {0x01, 0xee, 0xdd, 0xcc, 0xbb, 0xaa};
+       unsigned char bd_addr[ETH_ALEN] = {0x01, 0x01, 0x02, 0x03, 0x04, 0x05};
        struct stmmac_packet_attrs attr = { };
        int ret;
 
@@ -496,6 +496,9 @@ static int stmmac_test_hfilt(struct stmmac_priv *priv)
        if (ret)
                return ret;
 
+       if (netdev_mc_count(priv->dev) >= priv->hw->multicast_filter_bins)
+               return -EOPNOTSUPP;
+
        ret = dev_mc_add(priv->dev, gd_addr);
        if (ret)
                return ret;
@@ -573,6 +576,8 @@ static int stmmac_test_mcfilt(struct stmmac_priv *priv)
 
        if (stmmac_filter_check(priv))
                return -EOPNOTSUPP;
+       if (!priv->hw->multicast_filter_bins)
+               return -EOPNOTSUPP;
 
        /* Remove all MC addresses */
        __dev_mc_unsync(priv->dev, NULL);
@@ -611,6 +616,8 @@ static int stmmac_test_ucfilt(struct stmmac_priv *priv)
 
        if (stmmac_filter_check(priv))
                return -EOPNOTSUPP;
+       if (!priv->hw->multicast_filter_bins)
+               return -EOPNOTSUPP;
 
        /* Remove all UC addresses */
        __dev_uc_unsync(priv->dev, NULL);
@@ -670,7 +677,7 @@ static int stmmac_test_flowctrl(struct stmmac_priv *priv)
        unsigned int pkt_count;
        int i, ret = 0;
 
-       if (!phydev || !phydev->pause)
+       if (!phydev || (!phydev->pause && !phydev->asym_pause))
                return -EOPNOTSUPP;
 
        tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
@@ -1233,12 +1240,9 @@ static int __stmmac_test_l3filt(struct stmmac_priv *priv, u32 dst, u32 src,
                return -EOPNOTSUPP;
        if (!priv->dma_cap.l3l4fnum)
                return -EOPNOTSUPP;
-       if (priv->rss.enable) {
-               struct stmmac_rss rss = { .enable = false, };
-
-               stmmac_rss_configure(priv, priv->hw, &rss,
+       if (priv->rss.enable)
+               stmmac_rss_configure(priv, priv->hw, NULL,
                                     priv->plat->rx_queues_to_use);
-       }
 
        dissector = kzalloc(sizeof(*dissector), GFP_KERNEL);
        if (!dissector) {
@@ -1357,12 +1361,9 @@ static int __stmmac_test_l4filt(struct stmmac_priv *priv, u32 dst, u32 src,
                return -EOPNOTSUPP;
        if (!priv->dma_cap.l3l4fnum)
                return -EOPNOTSUPP;
-       if (priv->rss.enable) {
-               struct stmmac_rss rss = { .enable = false, };
-
-               stmmac_rss_configure(priv, priv->hw, &rss,
+       if (priv->rss.enable)
+               stmmac_rss_configure(priv, priv->hw, NULL,
                                     priv->plat->rx_queues_to_use);
-       }
 
        dissector = kzalloc(sizeof(*dissector), GFP_KERNEL);
        if (!dissector) {
@@ -1570,10 +1571,6 @@ static int __stmmac_test_jumbo(struct stmmac_priv *priv, u16 queue)
        struct stmmac_packet_attrs attr = { };
        int size = priv->dma_buf_sz;
 
-       /* Only XGMAC has SW support for multiple RX descs in same packet */
-       if (priv->plat->has_xgmac)
-               size = priv->dev->max_mtu;
-
        attr.dst = priv->dev->dev_addr;
        attr.max_size = size - ETH_FCS_LEN;
        attr.queue_mapping = queue;
index e231098..f9a9a9d 100644 (file)
@@ -510,7 +510,7 @@ static struct stmmac_flow_entry *tc_find_flow(struct stmmac_priv *priv,
        return NULL;
 }
 
-struct {
+static struct {
        int (*fn)(struct stmmac_priv *priv, struct flow_cls_offload *cls,
                  struct stmmac_flow_entry *entry);
 } tc_flow_parsers[] = {
index a65edd2..37ba708 100644 (file)
@@ -722,7 +722,7 @@ static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr,
  * cpdma_chan_split_pool - Splits ctrl pool between all channels.
  * Has to be called under ctlr lock
  */
-int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
+static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
 {
        int tx_per_ch_desc = 0, rx_per_ch_desc = 0;
        int free_rx_num = 0, free_tx_num = 0;
index 4fc627f..676006f 100644 (file)
@@ -1762,7 +1762,7 @@ static int axienet_probe(struct platform_device *pdev)
                }
        } else {
                lp->phy_mode = of_get_phy_mode(pdev->dev.of_node);
-               if (lp->phy_mode < 0) {
+               if ((int)lp->phy_mode < 0) {
                        ret = -EINVAL;
                        goto free_netdev;
                }
index bbbc1dc..b517c1a 100644 (file)
@@ -1237,8 +1237,17 @@ static int fjes_probe(struct platform_device *plat_dev)
        adapter->open_guard = false;
 
        adapter->txrx_wq = alloc_workqueue(DRV_NAME "/txrx", WQ_MEM_RECLAIM, 0);
+       if (unlikely(!adapter->txrx_wq)) {
+               err = -ENOMEM;
+               goto err_free_netdev;
+       }
+
        adapter->control_wq = alloc_workqueue(DRV_NAME "/control",
                                              WQ_MEM_RECLAIM, 0);
+       if (unlikely(!adapter->control_wq)) {
+               err = -ENOMEM;
+               goto err_free_txrx_wq;
+       }
 
        INIT_WORK(&adapter->tx_stall_task, fjes_tx_stall_task);
        INIT_WORK(&adapter->raise_intr_rxdata_task,
@@ -1255,7 +1264,7 @@ static int fjes_probe(struct platform_device *plat_dev)
        hw->hw_res.irq = platform_get_irq(plat_dev, 0);
        err = fjes_hw_init(&adapter->hw);
        if (err)
-               goto err_free_netdev;
+               goto err_free_control_wq;
 
        /* setup MAC address (02:00:00:00:00:[epid])*/
        netdev->dev_addr[0] = 2;
@@ -1277,6 +1286,10 @@ static int fjes_probe(struct platform_device *plat_dev)
 
 err_hw_exit:
        fjes_hw_exit(&adapter->hw);
+err_free_control_wq:
+       destroy_workqueue(adapter->control_wq);
+err_free_txrx_wq:
+       destroy_workqueue(adapter->txrx_wq);
 err_free_netdev:
        free_netdev(netdev);
 err_out:
index fbec711..fbea6f2 100644 (file)
@@ -107,27 +107,6 @@ struct bpqdev {
 
 static LIST_HEAD(bpq_devices);
 
-/*
- * bpqether network devices are paired with ethernet devices below them, so
- * form a special "super class" of normal ethernet devices; split their locks
- * off into a separate class since they always nest.
- */
-static struct lock_class_key bpq_netdev_xmit_lock_key;
-static struct lock_class_key bpq_netdev_addr_lock_key;
-
-static void bpq_set_lockdep_class_one(struct net_device *dev,
-                                     struct netdev_queue *txq,
-                                     void *_unused)
-{
-       lockdep_set_class(&txq->_xmit_lock, &bpq_netdev_xmit_lock_key);
-}
-
-static void bpq_set_lockdep_class(struct net_device *dev)
-{
-       lockdep_set_class(&dev->addr_list_lock, &bpq_netdev_addr_lock_key);
-       netdev_for_each_tx_queue(dev, bpq_set_lockdep_class_one, NULL);
-}
-
 /* ------------------------------------------------------------------------ */
 
 
@@ -498,7 +477,6 @@ static int bpq_new_device(struct net_device *edev)
        err = register_netdevice(ndev);
        if (err)
                goto error;
-       bpq_set_lockdep_class(ndev);
 
        /* List protected by RTNL */
        list_add_rcu(&bpq->bpq_list, &bpq_devices);
index 39dddcd..963509a 100644 (file)
@@ -982,7 +982,7 @@ static int netvsc_attach(struct net_device *ndev,
        if (netif_running(ndev)) {
                ret = rndis_filter_open(nvdev);
                if (ret)
-                       return ret;
+                       goto err;
 
                rdev = nvdev->extension;
                if (!rdev->link_state)
@@ -990,6 +990,13 @@ static int netvsc_attach(struct net_device *ndev,
        }
 
        return 0;
+
+err:
+       netif_device_detach(ndev);
+
+       rndis_filter_device_remove(hdev, nvdev);
+
+       return ret;
 }
 
 static int netvsc_set_channels(struct net_device *net,
@@ -1807,8 +1814,10 @@ static int netvsc_set_features(struct net_device *ndev,
 
        ret = rndis_filter_set_offload_params(ndev, nvdev, &offloads);
 
-       if (ret)
+       if (ret) {
                features ^= NETIF_F_LRO;
+               ndev->features = features;
+       }
 
 syncvf:
        if (!vf_netdev)
@@ -2335,8 +2344,6 @@ static int netvsc_probe(struct hv_device *dev,
                NETIF_F_HW_VLAN_CTAG_RX;
        net->vlan_features = net->features;
 
-       netdev_lockdep_set_classes(net);
-
        /* MTU range: 68 - 1500 or 65521 */
        net->min_mtu = NETVSC_MTU_MIN;
        if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
index ceddb42..0dd0ba9 100644 (file)
@@ -1137,10 +1137,11 @@ static void atusb_disconnect(struct usb_interface *interface)
 
        ieee802154_unregister_hw(atusb->hw);
 
+       usb_put_dev(atusb->usb_dev);
+
        ieee802154_free_hw(atusb->hw);
 
        usb_set_intfdata(interface, NULL);
-       usb_put_dev(atusb->usb_dev);
 
        pr_debug("%s done\n", __func__);
 }
index 11402dc..430c937 100644 (file)
@@ -3145,12 +3145,12 @@ static int ca8210_probe(struct spi_device *spi_device)
                goto error;
        }
 
+       priv->spi->dev.platform_data = pdata;
        ret = ca8210_get_platform_data(priv->spi, pdata);
        if (ret) {
                dev_crit(&spi_device->dev, "ca8210_get_platform_data failed\n");
                goto error;
        }
-       priv->spi->dev.platform_data = pdata;
 
        ret = ca8210_dev_com_init(priv);
        if (ret) {
index 17f2300..8dc04e2 100644 (file)
@@ -800,7 +800,7 @@ mcr20a_handle_rx_read_buf_complete(void *context)
        if (!skb)
                return;
 
-       memcpy(skb_put(skb, len), lp->rx_buf, len);
+       __skb_put_data(skb, lp->rx_buf, len);
        ieee802154_rx_irqsafe(lp->hw, skb, lp->rx_lqi[0]);
 
        print_hex_dump_debug("mcr20a rx: ", DUMP_PREFIX_OFFSET, 16, 1,
index 887bbba..ba3dfac 100644 (file)
@@ -131,8 +131,6 @@ static int ipvlan_init(struct net_device *dev)
        dev->gso_max_segs = phy_dev->gso_max_segs;
        dev->hard_header_len = phy_dev->hard_header_len;
 
-       netdev_lockdep_set_classes(dev);
-
        ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats);
        if (!ipvlan->pcpu_stats)
                return -ENOMEM;
index 8f46aa1..afd8b2a 100644 (file)
@@ -267,7 +267,6 @@ struct macsec_dev {
        struct pcpu_secy_stats __percpu *stats;
        struct list_head secys;
        struct gro_cells gro_cells;
-       unsigned int nest_level;
 };
 
 /**
@@ -1235,6 +1234,7 @@ deliver:
                macsec_rxsa_put(rx_sa);
        macsec_rxsc_put(rx_sc);
 
+       skb_orphan(skb);
        ret = gro_cells_receive(&macsec->gro_cells, skb);
        if (ret == NET_RX_SUCCESS)
                count_rx(dev, skb->len);
@@ -2749,7 +2749,6 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
 
 #define MACSEC_FEATURES \
        (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST)
-static struct lock_class_key macsec_netdev_addr_lock_key;
 
 static int macsec_dev_init(struct net_device *dev)
 {
@@ -2957,11 +2956,6 @@ static int macsec_get_iflink(const struct net_device *dev)
        return macsec_priv(dev)->real_dev->ifindex;
 }
 
-static int macsec_get_nest_level(struct net_device *dev)
-{
-       return macsec_priv(dev)->nest_level;
-}
-
 static const struct net_device_ops macsec_netdev_ops = {
        .ndo_init               = macsec_dev_init,
        .ndo_uninit             = macsec_dev_uninit,
@@ -2975,7 +2969,6 @@ static const struct net_device_ops macsec_netdev_ops = {
        .ndo_start_xmit         = macsec_start_xmit,
        .ndo_get_stats64        = macsec_get_stats64,
        .ndo_get_iflink         = macsec_get_iflink,
-       .ndo_get_lock_subclass  = macsec_get_nest_level,
 };
 
 static const struct device_type macsec_type = {
@@ -3000,12 +2993,10 @@ static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = {
 static void macsec_free_netdev(struct net_device *dev)
 {
        struct macsec_dev *macsec = macsec_priv(dev);
-       struct net_device *real_dev = macsec->real_dev;
 
        free_percpu(macsec->stats);
        free_percpu(macsec->secy.tx_sc.stats);
 
-       dev_put(real_dev);
 }
 
 static void macsec_setup(struct net_device *dev)
@@ -3260,14 +3251,6 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
        if (err < 0)
                return err;
 
-       dev_hold(real_dev);
-
-       macsec->nest_level = dev_get_nest_level(real_dev) + 1;
-       netdev_lockdep_set_classes(dev);
-       lockdep_set_class_and_subclass(&dev->addr_list_lock,
-                                      &macsec_netdev_addr_lock_key,
-                                      macsec_get_nest_level(dev));
-
        err = netdev_upper_dev_link(real_dev, dev, extack);
        if (err < 0)
                goto unregister;
index 940192c..34fc59b 100644 (file)
@@ -852,8 +852,6 @@ static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
  * "super class" of normal network devices; split their locks off into a
  * separate class since they always nest.
  */
-static struct lock_class_key macvlan_netdev_addr_lock_key;
-
 #define ALWAYS_ON_OFFLOADS \
        (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \
         NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL)
@@ -869,19 +867,6 @@ static struct lock_class_key macvlan_netdev_addr_lock_key;
 #define MACVLAN_STATE_MASK \
        ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
 
-static int macvlan_get_nest_level(struct net_device *dev)
-{
-       return ((struct macvlan_dev *)netdev_priv(dev))->nest_level;
-}
-
-static void macvlan_set_lockdep_class(struct net_device *dev)
-{
-       netdev_lockdep_set_classes(dev);
-       lockdep_set_class_and_subclass(&dev->addr_list_lock,
-                                      &macvlan_netdev_addr_lock_key,
-                                      macvlan_get_nest_level(dev));
-}
-
 static int macvlan_init(struct net_device *dev)
 {
        struct macvlan_dev *vlan = netdev_priv(dev);
@@ -900,8 +885,6 @@ static int macvlan_init(struct net_device *dev)
        dev->gso_max_segs       = lowerdev->gso_max_segs;
        dev->hard_header_len    = lowerdev->hard_header_len;
 
-       macvlan_set_lockdep_class(dev);
-
        vlan->pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
        if (!vlan->pcpu_stats)
                return -ENOMEM;
@@ -1161,7 +1144,6 @@ static const struct net_device_ops macvlan_netdev_ops = {
        .ndo_fdb_add            = macvlan_fdb_add,
        .ndo_fdb_del            = macvlan_fdb_del,
        .ndo_fdb_dump           = ndo_dflt_fdb_dump,
-       .ndo_get_lock_subclass  = macvlan_get_nest_level,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = macvlan_dev_poll_controller,
        .ndo_netpoll_setup      = macvlan_dev_netpoll_setup,
@@ -1445,7 +1427,6 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
        vlan->dev      = dev;
        vlan->port     = port;
        vlan->set_features = MACVLAN_FEATURES;
-       vlan->nest_level = dev_get_nest_level(lowerdev) + 1;
 
        vlan->mode     = MACVLAN_MODE_VEPA;
        if (data && data[IFLA_MACVLAN_MODE])
index 56576d4..54ca668 100644 (file)
@@ -806,9 +806,11 @@ static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev)
 {
        struct nsim_dev_port *nsim_dev_port, *tmp;
 
+       mutex_lock(&nsim_dev->port_list_lock);
        list_for_each_entry_safe(nsim_dev_port, tmp,
                                 &nsim_dev->port_list, list)
                __nsim_dev_port_del(nsim_dev_port);
+       mutex_unlock(&nsim_dev->port_list_lock);
 }
 
 int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
@@ -822,14 +824,17 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
                return PTR_ERR(nsim_dev);
        dev_set_drvdata(&nsim_bus_dev->dev, nsim_dev);
 
+       mutex_lock(&nsim_dev->port_list_lock);
        for (i = 0; i < nsim_bus_dev->port_count; i++) {
                err = __nsim_dev_port_add(nsim_dev, i);
                if (err)
                        goto err_port_del_all;
        }
+       mutex_unlock(&nsim_dev->port_list_lock);
        return 0;
 
 err_port_del_all:
+       mutex_unlock(&nsim_dev->port_list_lock);
        nsim_dev_port_del_all(nsim_dev);
        nsim_dev_destroy(nsim_dev);
        return err;
index f61d094..1a251f7 100644 (file)
@@ -241,8 +241,8 @@ static struct pernet_operations nsim_fib_net_ops = {
 
 void nsim_fib_exit(void)
 {
-       unregister_pernet_subsys(&nsim_fib_net_ops);
        unregister_fib_notifier(&nsim_fib_nb);
+       unregister_pernet_subsys(&nsim_fib_net_ops);
 }
 
 int nsim_fib_init(void)
@@ -258,6 +258,7 @@ int nsim_fib_init(void)
        err = register_fib_notifier(&nsim_fib_nb, nsim_fib_dump_inconsistent);
        if (err < 0) {
                pr_err("Failed to register fib notifier\n");
+               unregister_pernet_subsys(&nsim_fib_net_ops);
                goto err_out;
        }
 
index 03be30c..fe60264 100644 (file)
@@ -460,9 +460,9 @@ config RENESAS_PHY
          Supports the Renesas PHYs uPD60620 and uPD60620A.
 
 config ROCKCHIP_PHY
-        tristate "Driver for Rockchip Ethernet PHYs"
-        ---help---
-          Currently supports the integrated Ethernet PHY.
+       tristate "Driver for Rockchip Ethernet PHYs"
+       ---help---
+         Currently supports the integrated Ethernet PHY.
 
 config SMSC_PHY
        tristate "SMSC PHYs"
index 2aa7b2e..1eb5d4f 100644 (file)
 #include <linux/of_gpio.h>
 #include <linux/gpio/consumer.h>
 
+#define AT803X_SPECIFIC_STATUS                 0x11
+#define AT803X_SS_SPEED_MASK                   (3 << 14)
+#define AT803X_SS_SPEED_1000                   (2 << 14)
+#define AT803X_SS_SPEED_100                    (1 << 14)
+#define AT803X_SS_SPEED_10                     (0 << 14)
+#define AT803X_SS_DUPLEX                       BIT(13)
+#define AT803X_SS_SPEED_DUPLEX_RESOLVED                BIT(11)
+#define AT803X_SS_MDIX                         BIT(6)
+
 #define AT803X_INTR_ENABLE                     0x12
 #define AT803X_INTR_ENABLE_AUTONEG_ERR         BIT(15)
 #define AT803X_INTR_ENABLE_SPEED_CHANGED       BIT(14)
@@ -357,6 +366,64 @@ static int at803x_aneg_done(struct phy_device *phydev)
        return aneg_done;
 }
 
+static int at803x_read_status(struct phy_device *phydev)
+{
+       int ss, err, old_link = phydev->link;
+
+       /* Update the link, but return if there was an error */
+       err = genphy_update_link(phydev);
+       if (err)
+               return err;
+
+       /* why bother the PHY if nothing can have changed */
+       if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link)
+               return 0;
+
+       phydev->speed = SPEED_UNKNOWN;
+       phydev->duplex = DUPLEX_UNKNOWN;
+       phydev->pause = 0;
+       phydev->asym_pause = 0;
+
+       err = genphy_read_lpa(phydev);
+       if (err < 0)
+               return err;
+
+       /* Read the AT8035 PHY-Specific Status register, which indicates the
+        * speed and duplex that the PHY is actually using, irrespective of
+        * whether we are in autoneg mode or not.
+        */
+       ss = phy_read(phydev, AT803X_SPECIFIC_STATUS);
+       if (ss < 0)
+               return ss;
+
+       if (ss & AT803X_SS_SPEED_DUPLEX_RESOLVED) {
+               switch (ss & AT803X_SS_SPEED_MASK) {
+               case AT803X_SS_SPEED_10:
+                       phydev->speed = SPEED_10;
+                       break;
+               case AT803X_SS_SPEED_100:
+                       phydev->speed = SPEED_100;
+                       break;
+               case AT803X_SS_SPEED_1000:
+                       phydev->speed = SPEED_1000;
+                       break;
+               }
+               if (ss & AT803X_SS_DUPLEX)
+                       phydev->duplex = DUPLEX_FULL;
+               else
+                       phydev->duplex = DUPLEX_HALF;
+               if (ss & AT803X_SS_MDIX)
+                       phydev->mdix = ETH_TP_MDI_X;
+               else
+                       phydev->mdix = ETH_TP_MDI;
+       }
+
+       if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete)
+               phy_resolve_aneg_pause(phydev);
+
+       return 0;
+}
+
 static struct phy_driver at803x_driver[] = {
 {
        /* ATHEROS 8035 */
@@ -370,6 +437,7 @@ static struct phy_driver at803x_driver[] = {
        .suspend                = at803x_suspend,
        .resume                 = at803x_resume,
        /* PHY_GBIT_FEATURES */
+       .read_status            = at803x_read_status,
        .ack_interrupt          = at803x_ack_interrupt,
        .config_intr            = at803x_config_intr,
 }, {
@@ -399,6 +467,7 @@ static struct phy_driver at803x_driver[] = {
        .suspend                = at803x_suspend,
        .resume                 = at803x_resume,
        /* PHY_GBIT_FEATURES */
+       .read_status            = at803x_read_status,
        .aneg_done              = at803x_aneg_done,
        .ack_interrupt          = &at803x_ack_interrupt,
        .config_intr            = &at803x_config_intr,
index 8fc3386..af8eabe 100644 (file)
@@ -572,6 +572,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev)
        .name           = _name,                                        \
        /* PHY_BASIC_FEATURES */                                        \
        .flags          = PHY_IS_INTERNAL,                              \
+       .soft_reset     = genphy_soft_reset,                            \
        .config_init    = bcm7xxx_config_init,                          \
        .suspend        = bcm7xxx_suspend,                              \
        .resume         = bcm7xxx_config_init,                          \
index e282600..c1d345c 100644 (file)
@@ -121,7 +121,7 @@ void mdio_device_reset(struct mdio_device *mdiodev, int value)
                return;
 
        if (mdiodev->reset_gpio)
-               gpiod_set_value(mdiodev->reset_gpio, value);
+               gpiod_set_value_cansleep(mdiodev->reset_gpio, value);
 
        if (mdiodev->reset_ctrl) {
                if (value)
index 3c8186f..63dedec 100644 (file)
@@ -341,6 +341,35 @@ static int ksz8041_config_aneg(struct phy_device *phydev)
        return genphy_config_aneg(phydev);
 }
 
+static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
+                                           const u32 ksz_phy_id)
+{
+       int ret;
+
+       if ((phydev->phy_id & MICREL_PHY_ID_MASK) != ksz_phy_id)
+               return 0;
+
+       ret = phy_read(phydev, MII_BMSR);
+       if (ret < 0)
+               return ret;
+
+       /* KSZ8051 PHY and KSZ8794/KSZ8795/KSZ8765 switch share the same
+        * exact PHY ID. However, they can be told apart by the extended
+        * capability registers presence. The KSZ8051 PHY has them while
+        * the switch does not.
+        */
+       ret &= BMSR_ERCAP;
+       if (ksz_phy_id == PHY_ID_KSZ8051)
+               return ret;
+       else
+               return !ret;
+}
+
+static int ksz8051_match_phy_device(struct phy_device *phydev)
+{
+       return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8051);
+}
+
 static int ksz8081_config_init(struct phy_device *phydev)
 {
        /* KSZPHY_OMSO_FACTORY_TEST is set at de-assertion of the reset line
@@ -364,6 +393,11 @@ static int ksz8061_config_init(struct phy_device *phydev)
        return kszphy_config_init(phydev);
 }
 
+static int ksz8795_match_phy_device(struct phy_device *phydev)
+{
+       return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ87XX);
+}
+
 static int ksz9021_load_values_from_of(struct phy_device *phydev,
                                       const struct device_node *of_node,
                                       u16 reg,
@@ -763,6 +797,8 @@ static int ksz9031_get_features(struct phy_device *phydev)
         * Whenever the device's Asymmetric Pause capability is set to 1,
         * link-up may fail after a link-up to link-down transition.
         *
+        * The Errata Sheet is for ksz9031, but ksz9021 has the same issue
+        *
         * Workaround:
         * Do not enable the Asymmetric Pause capability bit.
         */
@@ -1015,8 +1051,6 @@ static struct phy_driver ksphy_driver[] = {
        .suspend        = genphy_suspend,
        .resume         = genphy_resume,
 }, {
-       .phy_id         = PHY_ID_KSZ8051,
-       .phy_id_mask    = MICREL_PHY_ID_MASK,
        .name           = "Micrel KSZ8051",
        /* PHY_BASIC_FEATURES */
        .driver_data    = &ksz8051_type,
@@ -1027,6 +1061,7 @@ static struct phy_driver ksphy_driver[] = {
        .get_sset_count = kszphy_get_sset_count,
        .get_strings    = kszphy_get_strings,
        .get_stats      = kszphy_get_stats,
+       .match_phy_device = ksz8051_match_phy_device,
        .suspend        = genphy_suspend,
        .resume         = genphy_resume,
 }, {
@@ -1076,6 +1111,7 @@ static struct phy_driver ksphy_driver[] = {
        /* PHY_GBIT_FEATURES */
        .driver_data    = &ksz9021_type,
        .probe          = kszphy_probe,
+       .get_features   = ksz9031_get_features,
        .config_init    = ksz9021_config_init,
        .ack_interrupt  = kszphy_ack_interrupt,
        .config_intr    = kszphy_config_intr,
@@ -1138,13 +1174,12 @@ static struct phy_driver ksphy_driver[] = {
        .suspend        = genphy_suspend,
        .resume         = genphy_resume,
 }, {
-       .phy_id         = PHY_ID_KSZ8795,
-       .phy_id_mask    = MICREL_PHY_ID_MASK,
-       .name           = "Micrel KSZ8795",
+       .name           = "Micrel KSZ87XX Switch",
        /* PHY_BASIC_FEATURES */
        .config_init    = kszphy_config_init,
        .config_aneg    = ksz8873mll_config_aneg,
        .read_status    = ksz8873mll_read_status,
+       .match_phy_device = ksz8795_match_phy_device,
        .suspend        = genphy_suspend,
        .resume         = genphy_resume,
 }, {
index a221dd5..a5bf087 100644 (file)
@@ -105,14 +105,17 @@ static void ns_giga_speed_fallback(struct phy_device *phydev, int mode)
 
 static void ns_10_base_t_hdx_loopack(struct phy_device *phydev, int disable)
 {
+       u16 lb_dis = BIT(1);
+
        if (disable)
-               ns_exp_write(phydev, 0x1c0, ns_exp_read(phydev, 0x1c0) | 1);
+               ns_exp_write(phydev, 0x1c0,
+                            ns_exp_read(phydev, 0x1c0) | lb_dis);
        else
                ns_exp_write(phydev, 0x1c0,
-                            ns_exp_read(phydev, 0x1c0) & 0xfffe);
+                            ns_exp_read(phydev, 0x1c0) & ~lb_dis);
 
        pr_debug("10BASE-T HDX loopback %s\n",
-                (ns_exp_read(phydev, 0x1c0) & 0x0001) ? "off" : "on");
+                (ns_exp_read(phydev, 0x1c0) & lb_dis) ? "off" : "on");
 }
 
 static int ns_config_init(struct phy_device *phydev)
index 7935593..a1caeee 100644 (file)
@@ -323,6 +323,8 @@ int genphy_c45_read_pma(struct phy_device *phydev)
 {
        int val;
 
+       linkmode_zero(phydev->lp_advertising);
+
        val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1);
        if (val < 0)
                return val;
index 369903d..9412669 100644 (file)
@@ -283,6 +283,18 @@ void of_set_phy_eee_broken(struct phy_device *phydev)
        phydev->eee_broken_modes = broken;
 }
 
+void phy_resolve_aneg_pause(struct phy_device *phydev)
+{
+       if (phydev->duplex == DUPLEX_FULL) {
+               phydev->pause = linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT,
+                                                 phydev->lp_advertising);
+               phydev->asym_pause = linkmode_test_bit(
+                       ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+                       phydev->lp_advertising);
+       }
+}
+EXPORT_SYMBOL_GPL(phy_resolve_aneg_pause);
+
 /**
  * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings
  * @phydev: The phy_device struct
@@ -305,13 +317,7 @@ void phy_resolve_aneg_linkmode(struct phy_device *phydev)
                        break;
                }
 
-       if (phydev->duplex == DUPLEX_FULL) {
-               phydev->pause = linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT,
-                                                 phydev->lp_advertising);
-               phydev->asym_pause = linkmode_test_bit(
-                       ETHTOOL_LINK_MODE_Asym_Pause_BIT,
-                       phydev->lp_advertising);
-       }
+       phy_resolve_aneg_pause(phydev);
 }
 EXPORT_SYMBOL_GPL(phy_resolve_aneg_linkmode);
 
index 7c92afd..105d389 100644 (file)
@@ -457,6 +457,11 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
                                                           val);
                                change_autoneg = true;
                                break;
+                       case MII_CTRL1000:
+                               mii_ctrl1000_mod_linkmode_adv_t(phydev->advertising,
+                                                               val);
+                               change_autoneg = true;
+                               break;
                        default:
                                /* do nothing */
                                break;
@@ -567,9 +572,6 @@ int phy_start_aneg(struct phy_device *phydev)
        if (AUTONEG_DISABLE == phydev->autoneg)
                phy_sanitize_settings(phydev);
 
-       /* Invalidate LP advertising flags */
-       linkmode_zero(phydev->lp_advertising);
-
        err = phy_config_aneg(phydev);
        if (err < 0)
                goto out_unlock;
index d347ddc..adb66a2 100644 (file)
@@ -1783,34 +1783,18 @@ done:
 }
 EXPORT_SYMBOL(genphy_update_link);
 
-/**
- * genphy_read_status - check the link status and update current link state
- * @phydev: target phy_device struct
- *
- * Description: Check the link, then figure out the current state
- *   by comparing what we advertise with what the link partner
- *   advertises.  Start by checking the gigabit possibilities,
- *   then move on to 10/100.
- */
-int genphy_read_status(struct phy_device *phydev)
+int genphy_read_lpa(struct phy_device *phydev)
 {
-       int lpa, lpagb, err, old_link = phydev->link;
-
-       /* Update the link, but return if there was an error */
-       err = genphy_update_link(phydev);
-       if (err)
-               return err;
-
-       /* why bother the PHY if nothing can have changed */
-       if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link)
-               return 0;
+       int lpa, lpagb;
 
-       phydev->speed = SPEED_UNKNOWN;
-       phydev->duplex = DUPLEX_UNKNOWN;
-       phydev->pause = 0;
-       phydev->asym_pause = 0;
+       if (phydev->autoneg == AUTONEG_ENABLE) {
+               if (!phydev->autoneg_complete) {
+                       mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising,
+                                                       0);
+                       mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, 0);
+                       return 0;
+               }
 
-       if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
                if (phydev->is_gigabit_capable) {
                        lpagb = phy_read(phydev, MII_STAT1000);
                        if (lpagb < 0)
@@ -1838,6 +1822,46 @@ int genphy_read_status(struct phy_device *phydev)
                        return lpa;
 
                mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, lpa);
+       } else {
+               linkmode_zero(phydev->lp_advertising);
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(genphy_read_lpa);
+
+/**
+ * genphy_read_status - check the link status and update current link state
+ * @phydev: target phy_device struct
+ *
+ * Description: Check the link, then figure out the current state
+ *   by comparing what we advertise with what the link partner
+ *   advertises.  Start by checking the gigabit possibilities,
+ *   then move on to 10/100.
+ */
+int genphy_read_status(struct phy_device *phydev)
+{
+       int err, old_link = phydev->link;
+
+       /* Update the link, but return if there was an error */
+       err = genphy_update_link(phydev);
+       if (err)
+               return err;
+
+       /* why bother the PHY if nothing can have changed */
+       if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link)
+               return 0;
+
+       phydev->speed = SPEED_UNKNOWN;
+       phydev->duplex = DUPLEX_UNKNOWN;
+       phydev->pause = 0;
+       phydev->asym_pause = 0;
+
+       err = genphy_read_lpa(phydev);
+       if (err < 0)
+               return err;
+
+       if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
                phy_resolve_aneg_linkmode(phydev);
        } else if (phydev->autoneg == AUTONEG_DISABLE) {
                int bmcr = phy_read(phydev, MII_BMCR);
index a5a57ca..a578f7e 100644 (file)
@@ -87,8 +87,24 @@ struct phylink {
        phylink_printk(KERN_WARNING, pl, fmt, ##__VA_ARGS__)
 #define phylink_info(pl, fmt, ...) \
        phylink_printk(KERN_INFO, pl, fmt, ##__VA_ARGS__)
+#if defined(CONFIG_DYNAMIC_DEBUG)
 #define phylink_dbg(pl, fmt, ...) \
+do {                                                                   \
+       if ((pl)->config->type == PHYLINK_NETDEV)                       \
+               netdev_dbg((pl)->netdev, fmt, ##__VA_ARGS__);           \
+       else if ((pl)->config->type == PHYLINK_DEV)                     \
+               dev_dbg((pl)->dev, fmt, ##__VA_ARGS__);                 \
+} while (0)
+#elif defined(DEBUG)
+#define phylink_dbg(pl, fmt, ...)                                      \
        phylink_printk(KERN_DEBUG, pl, fmt, ##__VA_ARGS__)
+#else
+#define phylink_dbg(pl, fmt, ...)                                      \
+({                                                                     \
+       if (0)                                                          \
+               phylink_printk(KERN_DEBUG, pl, fmt, ##__VA_ARGS__);     \
+})
+#endif
 
 /**
  * phylink_set_port_modes() - set the port type modes in the ethtool mask
@@ -576,7 +592,7 @@ static int phylink_register_sfp(struct phylink *pl,
 
 /**
  * phylink_create() - create a phylink instance
- * @ndev: a pointer to the &struct net_device
+ * @config: a pointer to the target &struct phylink_config
  * @fwnode: a pointer to a &struct fwnode_handle describing the network
  *     interface
  * @iface: the desired link mode defined by &typedef phy_interface_t
index dc3d92d..b732982 100644 (file)
@@ -327,6 +327,7 @@ static struct phy_driver smsc_phy_driver[] = {
        .name           = "SMSC LAN8740",
 
        /* PHY_BASIC_FEATURES */
+       .flags          = PHY_RST_AFTER_CLK_EN,
 
        .probe          = smsc_phy_probe,
 
index a30e41a..61824bb 100644 (file)
@@ -1324,8 +1324,6 @@ static int ppp_dev_init(struct net_device *dev)
 {
        struct ppp *ppp;
 
-       netdev_lockdep_set_classes(dev);
-
        ppp = netdev_priv(dev);
        /* Let the netdevice take a reference on the ppp file. This ensures
         * that ppp_destroy_interface() won't run before the device gets
@@ -1415,6 +1413,8 @@ static void __ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb)
                        netif_wake_queue(ppp->dev);
                else
                        netif_stop_queue(ppp->dev);
+       } else {
+               kfree_skb(skb);
        }
        ppp_xmit_unlock(ppp);
 }
index 734de7d..e1fabb3 100644 (file)
@@ -238,7 +238,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
        skb_dst_drop(skb);
        skb_dst_set(skb, &rt->dst);
 
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        skb->ip_summed = CHECKSUM_NONE;
        ip_select_ident(net, skb, NULL);
@@ -358,7 +358,7 @@ static int pptp_rcv(struct sk_buff *skb)
        po = lookup_chan(htons(header->call_id), iph->saddr);
        if (po) {
                skb_dst_drop(skb);
-               nf_reset(skb);
+               nf_reset_ct(skb);
                return sk_receive_skb(sk_pppox(po), skb, 0);
        }
 drop:
index dd614c2..3ae70c7 100644 (file)
@@ -1200,7 +1200,7 @@ err_kfree:
        kfree_skb(skb);
 err:
        rcu_read_lock();
-               tap = rcu_dereference(q->tap);
+       tap = rcu_dereference(q->tap);
        if (tap && tap->count_tx_dropped)
                tap->count_tx_dropped(tap);
        rcu_read_unlock();
index e8089de..8156b33 100644 (file)
@@ -1615,7 +1615,6 @@ static int team_init(struct net_device *dev)
        int err;
 
        team->dev = dev;
-       mutex_init(&team->lock);
        team_set_no_mode(team);
 
        team->pcpu_stats = netdev_alloc_pcpu_stats(struct team_pcpu_stats);
@@ -1642,7 +1641,8 @@ static int team_init(struct net_device *dev)
                goto err_options_register;
        netif_carrier_off(dev);
 
-       netdev_lockdep_set_classes(dev);
+       lockdep_register_key(&team->team_lock_key);
+       __mutex_init(&team->lock, "team->team_lock_key", &team->team_lock_key);
 
        return 0;
 
@@ -1673,6 +1673,7 @@ static void team_uninit(struct net_device *dev)
        team_queue_override_fini(team);
        mutex_unlock(&team->lock);
        netdev_change_features(dev);
+       lockdep_unregister_key(&team->team_lock_key);
 }
 
 static void team_destructor(struct net_device *dev)
@@ -1976,8 +1977,15 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev)
        err = team_port_del(team, port_dev);
        mutex_unlock(&team->lock);
 
-       if (!err)
-               netdev_change_features(dev);
+       if (err)
+               return err;
+
+       if (netif_is_team_master(port_dev)) {
+               lockdep_unregister_key(&team->team_lock_key);
+               lockdep_register_key(&team->team_lock_key);
+               lockdep_set_class(&team->lock, &team->team_lock_key);
+       }
+       netdev_change_features(dev);
 
        return err;
 }
index aab0be4..a8d3141 100644 (file)
@@ -526,8 +526,8 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
        e = tun_flow_find(head, rxhash);
        if (likely(e)) {
                /* TODO: keep queueing to old queue until it's empty? */
-               if (e->queue_index != queue_index)
-                       e->queue_index = queue_index;
+               if (READ_ONCE(e->queue_index) != queue_index)
+                       WRITE_ONCE(e->queue_index, queue_index);
                if (e->updated != jiffies)
                        e->updated = jiffies;
                sock_rps_record_flow_hash(e->rps_rxhash);
@@ -1104,7 +1104,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
         */
        skb_orphan(skb);
 
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        if (ptr_ring_produce(&tfile->tx_ring, skb))
                goto drop;
index 32f53de..fe63043 100644 (file)
@@ -787,6 +787,13 @@ static const struct usb_device_id  products[] = {
        .driver_info = 0,
 },
 
+/* ThinkPad USB-C Dock Gen 2 (based on Realtek RTL8153) */
+{
+       USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0xa387, USB_CLASS_COMM,
+                       USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+       .driver_info = 0,
+},
+
 /* NVIDIA Tegra USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */
 {
        USB_DEVICE_AND_INTERFACE_INFO(NVIDIA_VENDOR_ID, 0x09ff, USB_CLASS_COMM,
index 50c05d0..00cab3f 100644 (file)
@@ -681,8 +681,12 @@ cdc_ncm_find_endpoints(struct usbnet *dev, struct usb_interface *intf)
        u8 ep;
 
        for (ep = 0; ep < intf->cur_altsetting->desc.bNumEndpoints; ep++) {
-
                e = intf->cur_altsetting->endpoint + ep;
+
+               /* ignore endpoints which cannot transfer data */
+               if (!usb_endpoint_maxp(&e->desc))
+                       continue;
+
                switch (e->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) {
                case USB_ENDPOINT_XFER_INT:
                        if (usb_endpoint_dir_in(&e->desc)) {
index ce78714..74849da 100644 (file)
@@ -186,7 +186,7 @@ struct hso_tiocmget {
        int    intr_completed;
        struct usb_endpoint_descriptor *endp;
        struct urb *urb;
-       struct hso_serial_state_notification serial_state_notification;
+       struct hso_serial_state_notification *serial_state_notification;
        u16    prev_UART_state_bitmap;
        struct uart_icount icount;
 };
@@ -1432,7 +1432,7 @@ static int tiocmget_submit_urb(struct hso_serial *serial,
                         usb_rcvintpipe(usb,
                                        tiocmget->endp->
                                        bEndpointAddress & 0x7F),
-                        &tiocmget->serial_state_notification,
+                        tiocmget->serial_state_notification,
                         sizeof(struct hso_serial_state_notification),
                         tiocmget_intr_callback, serial,
                         tiocmget->endp->bInterval);
@@ -1479,7 +1479,7 @@ static void tiocmget_intr_callback(struct urb *urb)
        /* wIndex should be the USB interface number of the port to which the
         * notification applies, which should always be the Modem port.
         */
-       serial_state_notification = &tiocmget->serial_state_notification;
+       serial_state_notification = tiocmget->serial_state_notification;
        if (serial_state_notification->bmRequestType != BM_REQUEST_TYPE ||
            serial_state_notification->bNotification != B_NOTIFICATION ||
            le16_to_cpu(serial_state_notification->wValue) != W_VALUE ||
@@ -2565,6 +2565,8 @@ static void hso_free_tiomget(struct hso_serial *serial)
                usb_free_urb(tiocmget->urb);
                tiocmget->urb = NULL;
                serial->tiocmget = NULL;
+               kfree(tiocmget->serial_state_notification);
+               tiocmget->serial_state_notification = NULL;
                kfree(tiocmget);
        }
 }
@@ -2615,19 +2617,26 @@ static struct hso_device *hso_create_bulk_serial_device(
                num_urbs = 2;
                serial->tiocmget = kzalloc(sizeof(struct hso_tiocmget),
                                           GFP_KERNEL);
+               serial->tiocmget->serial_state_notification
+                       = kzalloc(sizeof(struct hso_serial_state_notification),
+                                          GFP_KERNEL);
                /* it isn't going to break our heart if serial->tiocmget
                 *  allocation fails don't bother checking this.
                 */
-               if (serial->tiocmget) {
+               if (serial->tiocmget && serial->tiocmget->serial_state_notification) {
                        tiocmget = serial->tiocmget;
+                       tiocmget->endp = hso_get_ep(interface,
+                                                   USB_ENDPOINT_XFER_INT,
+                                                   USB_DIR_IN);
+                       if (!tiocmget->endp) {
+                               dev_err(&interface->dev, "Failed to find INT IN ep\n");
+                               goto exit;
+                       }
+
                        tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL);
                        if (tiocmget->urb) {
                                mutex_init(&tiocmget->mutex);
                                init_waitqueue_head(&tiocmget->waitq);
-                               tiocmget->endp = hso_get_ep(
-                                       interface,
-                                       USB_ENDPOINT_XFER_INT,
-                                       USB_DIR_IN);
                        } else
                                hso_free_tiomget(serial);
                }
index 58f5a21..f24a1b0 100644 (file)
@@ -1264,8 +1264,11 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb)
                netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
                lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
 
-               if (dev->domain_data.phyirq > 0)
+               if (dev->domain_data.phyirq > 0) {
+                       local_irq_disable();
                        generic_handle_irq(dev->domain_data.phyirq);
+                       local_irq_enable();
+               }
        } else
                netdev_warn(dev->net,
                            "unexpected interrupt: 0x%08x\n", intdata);
@@ -3782,10 +3785,14 @@ static int lan78xx_probe(struct usb_interface *intf,
        /* driver requires remote-wakeup capability during autosuspend. */
        intf->needs_remote_wakeup = 1;
 
+       ret = lan78xx_phy_init(dev);
+       if (ret < 0)
+               goto out4;
+
        ret = register_netdev(netdev);
        if (ret != 0) {
                netif_err(dev, probe, netdev, "couldn't register the device\n");
-               goto out4;
+               goto out5;
        }
 
        usb_set_intfdata(intf, dev);
@@ -3798,14 +3805,10 @@ static int lan78xx_probe(struct usb_interface *intf,
        pm_runtime_set_autosuspend_delay(&udev->dev,
                                         DEFAULT_AUTOSUSPEND_DELAY);
 
-       ret = lan78xx_phy_init(dev);
-       if (ret < 0)
-               goto out5;
-
        return 0;
 
 out5:
-       unregister_netdev(netdev);
+       phy_disconnect(netdev->phydev);
 out4:
        usb_free_urb(dev->urb_intr);
 out3:
index b6dc5d7..596428e 100644 (file)
@@ -1327,6 +1327,7 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x2357, 0x0201, 4)},    /* TP-LINK HSUPA Modem MA180 */
        {QMI_FIXED_INTF(0x2357, 0x9000, 4)},    /* TP-LINK MA260 */
        {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */
+       {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */
        {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)},    /* Telit ME910 */
        {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)},    /* Telit ME910 dual modem */
        {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},    /* Telit LE920 */
@@ -1350,6 +1351,7 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x1e2d, 0x0082, 4)},    /* Cinterion PHxx,PXxx (2 RmNet) */
        {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)},    /* Cinterion PHxx,PXxx (2 RmNet) */
        {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)},    /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/
+       {QMI_QUIRK_SET_DTR(0x1e2d, 0x00b0, 4)}, /* Cinterion CLS8 */
        {QMI_FIXED_INTF(0x413c, 0x81a2, 8)},    /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
        {QMI_FIXED_INTF(0x413c, 0x81a3, 8)},    /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
        {QMI_FIXED_INTF(0x413c, 0x81a4, 8)},    /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
index 0872609..d4a95b5 100644 (file)
@@ -4799,10 +4799,9 @@ static int rtl8152_reset_resume(struct usb_interface *intf)
        struct r8152 *tp = usb_get_intfdata(intf);
 
        clear_bit(SELECTIVE_SUSPEND, &tp->flags);
-       mutex_lock(&tp->control);
        tp->rtl_ops.init(tp);
        queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0);
-       mutex_unlock(&tp->control);
+       set_ethernet_addr(tp);
        return rtl8152_resume(intf);
 }
 
@@ -5756,6 +5755,7 @@ static const struct usb_device_id rtl8152_table[] = {
        {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x7205)},
        {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x720c)},
        {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x7214)},
+       {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0xa387)},
        {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)},
        {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA,  0x09ff)},
        {REALTEK_USB_DEVICE(VENDOR_ID_TPLINK,  0x0601)},
index c5d4a00..681e0de 100644 (file)
@@ -335,7 +335,7 @@ static void sr_set_multicast(struct net_device *net)
 static int sr_mdio_read(struct net_device *net, int phy_id, int loc)
 {
        struct usbnet *dev = netdev_priv(net);
-       __le16 res;
+       __le16 res = 0;
 
        mutex_lock(&dev->phy_mutex);
        sr_set_sw_mii(dev);
index 58952a7..dde05e2 100644 (file)
@@ -100,6 +100,11 @@ int usbnet_get_endpoints(struct usbnet *dev, struct usb_interface *intf)
                        int                             intr = 0;
 
                        e = alt->endpoint + ep;
+
+                       /* ignore endpoints which cannot transfer data */
+                       if (!usb_endpoint_maxp(&e->desc))
+                               continue;
+
                        switch (e->desc.bmAttributes) {
                        case USB_ENDPOINT_XFER_INT:
                                if (!usb_endpoint_dir_in(&e->desc))
@@ -339,6 +344,8 @@ void usbnet_update_max_qlen(struct usbnet *dev)
 {
        enum usb_device_speed speed = dev->udev->speed;
 
+       if (!dev->rx_urb_size || !dev->hard_mtu)
+               goto insanity;
        switch (speed) {
        case USB_SPEED_HIGH:
                dev->rx_qlen = MAX_QUEUE_MEMORY / dev->rx_urb_size;
@@ -355,6 +362,7 @@ void usbnet_update_max_qlen(struct usbnet *dev)
                dev->tx_qlen = 5 * MAX_QUEUE_MEMORY / dev->hard_mtu;
                break;
        default:
+insanity:
                dev->rx_qlen = dev->tx_qlen = 4;
        }
 }
index ba98e09..5a635f0 100644 (file)
@@ -1585,7 +1585,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
        /* Don't wait up for transmitted skbs to be freed. */
        if (!use_napi) {
                skb_orphan(skb);
-               nf_reset(skb);
+               nf_reset_ct(skb);
        }
 
        /* If running out of space, stop queue to avoid getting packets that we
index 6e84328..b8228f5 100644 (file)
@@ -366,7 +366,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
        struct neighbour *neigh;
        int ret;
 
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        skb->protocol = htons(ETH_P_IPV6);
        skb->dev = dev;
@@ -459,7 +459,7 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
 
        /* reset skb device */
        if (likely(err == 1))
-               nf_reset(skb);
+               nf_reset_ct(skb);
        else
                skb = NULL;
 
@@ -560,7 +560,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
        bool is_v6gw = false;
        int ret = -EINVAL;
 
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        /* Be paranoid, rather than too clever. */
        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
@@ -670,7 +670,7 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
 
        /* reset skb device */
        if (likely(err == 1))
-               nf_reset(skb);
+               nf_reset_ct(skb);
        else
                skb = NULL;
 
@@ -865,7 +865,6 @@ static int vrf_dev_init(struct net_device *dev)
 
        /* similarly, oper state is irrelevant; set to up to avoid confusion */
        dev->operstate = IF_OPER_UP;
-       netdev_lockdep_set_classes(dev);
        return 0;
 
 out_rth:
@@ -1154,7 +1153,8 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
        struct sk_buff *skb;
        int err;
 
-       if (family == AF_INET6 && !ipv6_mod_enabled())
+       if ((family == AF_INET6 || family == RTNL_FAMILY_IP6MR) &&
+           !ipv6_mod_enabled())
                return 0;
 
        skb = nlmsg_new(vrf_fib_rule_nl_size(), GFP_KERNEL);
index 3d9bcc9..8869154 100644 (file)
@@ -2487,9 +2487,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                vni = tunnel_id_to_key32(info->key.tun_id);
                ifindex = 0;
                dst_cache = &info->dst_cache;
-               if (info->options_len &&
-                   info->key.tun_flags & TUNNEL_VXLAN_OPT)
+               if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
+                       if (info->options_len < sizeof(*md))
+                               goto drop;
                        md = ip_tunnel_info_opts(info);
+               }
                ttl = info->key.ttl;
                tos = info->key.tos;
                label = info->key.label;
@@ -3566,10 +3568,13 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
 {
        struct vxlan_net *vn = net_generic(net, vxlan_net_id);
        struct vxlan_dev *vxlan = netdev_priv(dev);
+       struct net_device *remote_dev = NULL;
        struct vxlan_fdb *f = NULL;
        bool unregister = false;
+       struct vxlan_rdst *dst;
        int err;
 
+       dst = &vxlan->default_dst;
        err = vxlan_dev_configure(net, dev, conf, false, extack);
        if (err)
                return err;
@@ -3577,14 +3582,14 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
        dev->ethtool_ops = &vxlan_ethtool_ops;
 
        /* create an fdb entry for a valid default destination */
-       if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
+       if (!vxlan_addr_any(&dst->remote_ip)) {
                err = vxlan_fdb_create(vxlan, all_zeros_mac,
-                                      &vxlan->default_dst.remote_ip,
+                                      &dst->remote_ip,
                                       NUD_REACHABLE | NUD_PERMANENT,
                                       vxlan->cfg.dst_port,
-                                      vxlan->default_dst.remote_vni,
-                                      vxlan->default_dst.remote_vni,
-                                      vxlan->default_dst.remote_ifindex,
+                                      dst->remote_vni,
+                                      dst->remote_vni,
+                                      dst->remote_ifindex,
                                       NTF_SELF, &f);
                if (err)
                        return err;
@@ -3595,26 +3600,41 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
                goto errout;
        unregister = true;
 
+       if (dst->remote_ifindex) {
+               remote_dev = __dev_get_by_index(net, dst->remote_ifindex);
+               if (!remote_dev)
+                       goto errout;
+
+               err = netdev_upper_dev_link(remote_dev, dev, extack);
+               if (err)
+                       goto errout;
+       }
+
        err = rtnl_configure_link(dev, NULL);
        if (err)
-               goto errout;
+               goto unlink;
 
        if (f) {
-               vxlan_fdb_insert(vxlan, all_zeros_mac,
-                                vxlan->default_dst.remote_vni, f);
+               vxlan_fdb_insert(vxlan, all_zeros_mac, dst->remote_vni, f);
 
                /* notify default fdb entry */
                err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f),
                                       RTM_NEWNEIGH, true, extack);
                if (err) {
                        vxlan_fdb_destroy(vxlan, f, false, false);
+                       if (remote_dev)
+                               netdev_upper_dev_unlink(remote_dev, dev);
                        goto unregister;
                }
        }
 
        list_add(&vxlan->next, &vn->vxlan_list);
+       if (remote_dev)
+               dst->remote_dev = remote_dev;
        return 0;
-
+unlink:
+       if (remote_dev)
+               netdev_upper_dev_unlink(remote_dev, dev);
 errout:
        /* unregister_netdevice() destroys the default FDB entry with deletion
         * notification. But the addition notification was not sent yet, so
@@ -3932,11 +3952,12 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
                            struct netlink_ext_ack *extack)
 {
        struct vxlan_dev *vxlan = netdev_priv(dev);
-       struct vxlan_rdst *dst = &vxlan->default_dst;
        struct net_device *lowerdev;
        struct vxlan_config conf;
+       struct vxlan_rdst *dst;
        int err;
 
+       dst = &vxlan->default_dst;
        err = vxlan_nl2conf(tb, data, dev, &conf, true, extack);
        if (err)
                return err;
@@ -3946,6 +3967,14 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
        if (err)
                return err;
 
+       if (dst->remote_dev == lowerdev)
+               lowerdev = NULL;
+
+       err = netdev_adjacent_change_prepare(dst->remote_dev, lowerdev, dev,
+                                            extack);
+       if (err)
+               return err;
+
        /* handle default dst entry */
        if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) {
                u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni);
@@ -3962,6 +3991,8 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
                                               NTF_SELF, true, extack);
                        if (err) {
                                spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+                               netdev_adjacent_change_abort(dst->remote_dev,
+                                                            lowerdev, dev);
                                return err;
                        }
                }
@@ -3979,6 +4010,11 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
        if (conf.age_interval != vxlan->cfg.age_interval)
                mod_timer(&vxlan->age_timer, jiffies);
 
+       netdev_adjacent_change_commit(dst->remote_dev, lowerdev, dev);
+       if (lowerdev && lowerdev != dst->remote_dev) {
+               dst->remote_dev = lowerdev;
+               netdev_update_lockdep_key(lowerdev);
+       }
        vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true);
        return 0;
 }
@@ -3991,6 +4027,8 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head)
 
        list_del(&vxlan->next);
        unregister_netdevice_queue(dev, head);
+       if (vxlan->default_dst.remote_dev)
+               netdev_upper_dev_unlink(vxlan->default_dst.remote_dev, dev);
 }
 
 static size_t vxlan_get_size(const struct net_device *dev)
index 8efb493..5c79f05 100644 (file)
@@ -127,12 +127,12 @@ int i2400m_op_rfkill_sw_toggle(struct wimax_dev *wimax_dev,
                        "%d\n", result);
        result = 0;
 error_cmd:
-       kfree(cmd);
        kfree_skb(ack_skb);
 error_msg_to_dev:
 error_alloc:
        d_fnend(4, dev, "(wimax_dev %p state %d) = %d\n",
                wimax_dev, state, result);
+       kfree(cmd);
        return result;
 }
 
index ebd64e0..1255302 100644 (file)
@@ -654,8 +654,7 @@ void i2400m_tx_close(struct i2400m *i2400m)
        padding = aligned_size - tx_msg_moved->size;
        if (padding > 0) {
                pad_buf = i2400m_tx_fifo_push(i2400m, padding, 0, 0);
-               if (unlikely(WARN_ON(pad_buf == NULL
-                                    || pad_buf == TAIL_FULL))) {
+               if (WARN_ON(pad_buf == NULL || pad_buf == TAIL_FULL)) {
                        /* This should not happen -- append should verify
                         * there is always space left at least to append
                         * tx_block_size */
index d98d6ac..56616d9 100644 (file)
@@ -34,7 +34,7 @@ config ATH_TRACEPOINTS
        depends on ATH_DEBUG
        depends on EVENT_TRACING
        ---help---
-         This option enables tracepoints for atheros wireless drivers.
+        This option enables tracepoints for atheros wireless drivers.
         Currently, ath9k makes use of this facility.
 
 config ATH_REG_DYNAMIC_USER_REG_HINTS
index 41d3c9a..65b39c7 100644 (file)
@@ -5,5 +5,5 @@ config AR5523
        select ATH_COMMON
        select FW_LOADER
        ---help---
-         This module add support for AR5523 based USB dongles such as D-Link
-         DWL-G132, Netgear WPN111 and many more.
+        This module add support for AR5523 based USB dongles such as D-Link
+        DWL-G132, Netgear WPN111 and many more.
index dc45d16..383d4fa 100644 (file)
@@ -2118,12 +2118,15 @@ static int ath10k_init_uart(struct ath10k *ar)
                return ret;
        }
 
-       if (!uart_print && ar->hw_params.uart_pin_workaround) {
-               ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin,
-                                        ar->hw_params.uart_pin);
-               if (ret) {
-                       ath10k_warn(ar, "failed to set UART TX pin: %d", ret);
-                       return ret;
+       if (!uart_print) {
+               if (ar->hw_params.uart_pin_workaround) {
+                       ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin,
+                                                ar->hw_params.uart_pin);
+                       if (ret) {
+                               ath10k_warn(ar, "failed to set UART TX pin: %d",
+                                           ret);
+                               return ret;
+                       }
                }
 
                return 0;
index c6156cc..d5ee32c 100644 (file)
@@ -18,7 +18,6 @@
 
 #include <linux/nl80211.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/etherdevice.h>
 #include <linux/module.h>
 #include "../ath.h"
index dcf8ca0..62c22fd 100644 (file)
@@ -2,7 +2,7 @@
 config ATH6KL
        tristate "Atheros mobile chipsets support"
        depends on CFG80211
-        ---help---
+       ---help---
          This module adds core support for wireless adapters based on
          Atheros AR6003 and AR6004 chipsets. You still need separate
          bus drivers for USB and SDIO to be able to use real devices.
index 2d1247f..c99f422 100644 (file)
@@ -148,7 +148,7 @@ config ATH9K_CHANNEL_CONTEXT
        depends on ATH9K
        default n
        ---help---
-         This option enables channel context support in ath9k, which is needed
+        This option enables channel context support in ath9k, which is needed
         for multi-channel concurrency. Enable this if P2P PowerSave support
         is required.
 
index 757eb76..b1bce7a 100644 (file)
@@ -41,9 +41,9 @@ config CARL9170_WPC
        default y
 
 config CARL9170_HWRNG
-        bool "Random number generator"
-        depends on CARL9170 && (HW_RANDOM = y || HW_RANDOM = CARL9170)
-        default n
+       bool "Random number generator"
+       depends on CARL9170 && (HW_RANDOM = y || HW_RANDOM = CARL9170)
+       default n
        help
          Provides a hardware random number generator to the kernel.
 
index cb13652..598c1fb 100644 (file)
@@ -1012,11 +1012,11 @@ void wil_netif_rx_any(struct sk_buff *skb, struct net_device *ndev)
        skb_orphan(skb);
 
        if (security && (wil->txrx_ops.rx_crypto_check(wil, skb) != 0)) {
+               wil_dbg_txrx(wil, "Rx drop %d bytes\n", skb->len);
                dev_kfree_skb(skb);
                ndev->stats.rx_dropped++;
                stats->rx_replay++;
                stats->rx_dropped++;
-               wil_dbg_txrx(wil, "Rx drop %d bytes\n", skb->len);
                return;
        }
 
index 809bdf3..4c0556b 100644 (file)
@@ -20,22 +20,22 @@ config ATMEL
       select FW_LOADER
       select CRC32
        ---help---
-        A driver 802.11b wireless cards based on the Atmel fast-vnet
-        chips. This driver supports standard Linux wireless extensions.
+       A driver 802.11b wireless cards based on the Atmel fast-vnet
+       chips. This driver supports standard Linux wireless extensions.
 
-        Many  cards based on this chipset do not have flash memory
-        and need their firmware loaded at start-up. If yours is
-        one of these, you will need to provide a firmware image
-        to be loaded into the card by the driver. The Atmel
-        firmware package can be downloaded from
-        <http://www.thekelleys.org.uk/atmel>
+       Many  cards based on this chipset do not have flash memory
+       and need their firmware loaded at start-up. If yours is
+       one of these, you will need to provide a firmware image
+       to be loaded into the card by the driver. The Atmel
+       firmware package can be downloaded from
+       <http://www.thekelleys.org.uk/atmel>
 
 config PCI_ATMEL
       tristate "Atmel at76c506 PCI cards"
       depends on ATMEL && PCI
        ---help---
-        Enable support for PCI and mini-PCI cards containing the
-        Atmel at76c506 chip.
+       Enable support for PCI and mini-PCI cards containing the
+       Atmel at76c506 chip.
 
 config PCMCIA_ATMEL
        tristate "Atmel at76c502/at76c504 PCMCIA cards"
@@ -48,11 +48,11 @@ config PCMCIA_ATMEL
          Atmel at76c502 and at76c504 chips.
 
 config AT76C50X_USB
-        tristate "Atmel at76c503/at76c505/at76c505a USB cards"
-        depends on MAC80211 && USB
-        select FW_LOADER
-        ---help---
-          Enable support for USB Wireless devices using Atmel at76c503,
-          at76c505 or at76c505a chips.
+       tristate "Atmel at76c503/at76c505/at76c505a USB cards"
+       depends on MAC80211 && USB
+       select FW_LOADER
+       ---help---
+         Enable support for USB Wireless devices using Atmel at76c503,
+         at76c505 or at76c505a chips.
 
 endif # WLAN_VENDOR_ATMEL
index 5d2878a..ab17903 100644 (file)
@@ -13,37 +13,37 @@ config IPW2100
        select LIB80211
        select LIBIPW
        ---help---
-          A driver for the Intel PRO/Wireless 2100 Network 
+         A driver for the Intel PRO/Wireless 2100 Network
          Connection 802.11b wireless network adapter.
 
-          See <file:Documentation/networking/device_drivers/intel/ipw2100.txt>
+         See <file:Documentation/networking/device_drivers/intel/ipw2100.txt>
          for information on the capabilities currently enabled in this driver
          and for tips for debugging issues and problems.
 
          In order to use this driver, you will need a firmware image for it.
-          You can obtain the firmware from
-         <http://ipw2100.sf.net/>.  Once you have the firmware image, you 
+         You can obtain the firmware from
+         <http://ipw2100.sf.net/>.  Once you have the firmware image, you
          will need to place it in /lib/firmware.
 
-          You will also very likely need the Wireless Tools in order to
-          configure your card:
+         You will also very likely need the Wireless Tools in order to
+         configure your card:
 
-          <http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/Tools.html>.
+         <http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/Tools.html>.
+
+         It is recommended that you compile this driver as a module (M)
+         rather than built-in (Y). This driver requires firmware at device
+         initialization time, and when built-in this typically happens
+         before the filesystem is accessible (hence firmware will be
+         unavailable and initialization will fail). If you do choose to build
+         this driver into your kernel image, you can avoid this problem by
+         including the firmware and a firmware loader in an initramfs.
 
-          It is recommended that you compile this driver as a module (M)
-          rather than built-in (Y). This driver requires firmware at device
-          initialization time, and when built-in this typically happens
-          before the filesystem is accessible (hence firmware will be
-          unavailable and initialization will fail). If you do choose to build
-          this driver into your kernel image, you can avoid this problem by
-          including the firmware and a firmware loader in an initramfs.
 config IPW2100_MONITOR
-        bool "Enable promiscuous mode"
-        depends on IPW2100
-        ---help---
+       bool "Enable promiscuous mode"
+       depends on IPW2100
+       ---help---
          Enables promiscuous/monitor mode support for the ipw2100 driver.
-         With this feature compiled into the driver, you can switch to 
+         With this feature compiled into the driver, you can switch to
          promiscuous mode via the Wireless Tool's Monitor mode.  While in this
          mode, no packets can be sent.
 
@@ -51,17 +51,17 @@ config IPW2100_DEBUG
        bool "Enable full debugging output in IPW2100 module."
        depends on IPW2100
        ---help---
-         This option will enable debug tracing output for the IPW2100.  
+         This option will enable debug tracing output for the IPW2100.
 
-         This will result in the kernel module being ~60k larger.  You can 
-         control which debug output is sent to the kernel log by setting the 
-         value in 
+         This will result in the kernel module being ~60k larger.  You can
+         control which debug output is sent to the kernel log by setting the
+         value in
 
          /sys/bus/pci/drivers/ipw2100/debug_level
 
          This entry will only exist if this option is enabled.
 
-         If you are not trying to debug or develop the IPW2100 driver, you 
+         If you are not trying to debug or develop the IPW2100 driver, you
          most likely want to say N here.
 
 config IPW2200
@@ -75,37 +75,37 @@ config IPW2200
        select LIB80211
        select LIBIPW
        ---help---
-          A driver for the Intel PRO/Wireless 2200BG and 2915ABG Network
-         Connection adapters. 
+         A driver for the Intel PRO/Wireless 2200BG and 2915ABG Network
+         Connection adapters.
 
-          See <file:Documentation/networking/device_drivers/intel/ipw2200.txt>
+         See <file:Documentation/networking/device_drivers/intel/ipw2200.txt>
          for information on the capabilities currently enabled in this
          driver and for tips for debugging issues and problems.
 
          In order to use this driver, you will need a firmware image for it.
-          You can obtain the firmware from
-         <http://ipw2200.sf.net/>.  See the above referenced README.ipw2200 
+         You can obtain the firmware from
+         <http://ipw2200.sf.net/>.  See the above referenced README.ipw2200
          for information on where to install the firmware images.
 
-          You will also very likely need the Wireless Tools in order to
-          configure your card:
+         You will also very likely need the Wireless Tools in order to
+         configure your card:
 
-          <http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/Tools.html>.
+         <http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/Tools.html>.
 
-          It is recommended that you compile this driver as a module (M)
-          rather than built-in (Y). This driver requires firmware at device
-          initialization time, and when built-in this typically happens
-          before the filesystem is accessible (hence firmware will be
-          unavailable and initialization will fail). If you do choose to build
-          this driver into your kernel image, you can avoid this problem by
-          including the firmware and a firmware loader in an initramfs.
+         It is recommended that you compile this driver as a module (M)
+         rather than built-in (Y). This driver requires firmware at device
+         initialization time, and when built-in this typically happens
+         before the filesystem is accessible (hence firmware will be
+         unavailable and initialization will fail). If you do choose to build
+         this driver into your kernel image, you can avoid this problem by
+         including the firmware and a firmware loader in an initramfs.
 
 config IPW2200_MONITOR
-        bool "Enable promiscuous mode"
-        depends on IPW2200
-        ---help---
+       bool "Enable promiscuous mode"
+       depends on IPW2200
+       ---help---
          Enables promiscuous/monitor mode support for the ipw2200 driver.
-         With this feature compiled into the driver, you can switch to 
+         With this feature compiled into the driver, you can switch to
          promiscuous mode via the Wireless Tool's Monitor mode.  While in this
          mode, no packets can be sent.
 
@@ -118,28 +118,28 @@ config IPW2200_PROMISCUOUS
        depends on IPW2200_MONITOR
        select IPW2200_RADIOTAP
        ---help---
-          Enables the creation of a second interface prefixed 'rtap'. 
-          This second interface will provide every received in radiotap
+         Enables the creation of a second interface prefixed 'rtap'.
+         This second interface will provide every received in radiotap
          format.
 
-          This is useful for performing wireless network analysis while
-          maintaining an active association.
+         This is useful for performing wireless network analysis while
+         maintaining an active association.
+
+         Example usage:
 
-          Example usage:
+           % modprobe ipw2200 rtap_iface=1
+           % ifconfig rtap0 up
+           % tethereal -i rtap0
 
-            % modprobe ipw2200 rtap_iface=1
-            % ifconfig rtap0 up
-            % tethereal -i rtap0
+         If you do not specify 'rtap_iface=1' as a module parameter then
+         the rtap interface will not be created and you will need to turn
+         it on via sysfs:
 
-          If you do not specify 'rtap_iface=1' as a module parameter then 
-          the rtap interface will not be created and you will need to turn 
-          it on via sysfs:
-       
-            % echo 1 > /sys/bus/pci/drivers/ipw2200/*/rtap_iface
+           % echo 1 > /sys/bus/pci/drivers/ipw2200/*/rtap_iface
 
 config IPW2200_QOS
-        bool "Enable QoS support"
-        depends on IPW2200
+       bool "Enable QoS support"
+       depends on IPW2200
 
 config IPW2200_DEBUG
        bool "Enable full debugging output in IPW2200 module."
index b82da75..4fbcc7f 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/slab.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
index fa2c028..ffb705b 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/slab.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
index e329fd7..100f558 100644 (file)
@@ -91,9 +91,9 @@ config IWLEGACY_DEBUG
          any problems you may encounter.
 
 config IWLEGACY_DEBUGFS
-        bool "iwlegacy (iwl 3945/4965) debugfs support"
-        depends on IWLEGACY && MAC80211_DEBUGFS
-        ---help---
+       bool "iwlegacy (iwl 3945/4965) debugfs support"
+       depends on IWLEGACY && MAC80211_DEBUGFS
+       ---help---
          Enable creation of debugfs files for the iwlegacy drivers. This
          is a low-impact option that allows getting insight into the
          driver's state at runtime.
index 7dbc0d3..091d621 100644 (file)
@@ -119,9 +119,9 @@ config IWLWIFI_DEBUG
          any problems you may encounter.
 
 config IWLWIFI_DEBUGFS
-        bool "iwlwifi debugfs support"
-        depends on MAC80211_DEBUGFS
-        ---help---
+       bool "iwlwifi debugfs support"
+       depends on MAC80211_DEBUGFS
+       ---help---
          Enable creation of debugfs files for the iwlwifi drivers. This
          is a low-impact option that allows getting insight into the
          driver's state at runtime.
index 7573af2..c2db758 100644 (file)
@@ -162,12 +162,13 @@ int iwl_acpi_get_mcc(struct device *dev, char *mcc)
 
        wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_WRDD_WIFI_DATA_SIZE,
                                         &tbl_rev);
-       if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+       if (IS_ERR(wifi_pkg)) {
                ret = PTR_ERR(wifi_pkg);
                goto out_free;
        }
 
-       if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
+       if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
+           tbl_rev != 0) {
                ret = -EINVAL;
                goto out_free;
        }
@@ -224,12 +225,13 @@ int iwl_acpi_get_eckv(struct device *dev, u32 *extl_clk)
 
        wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_ECKV_WIFI_DATA_SIZE,
                                         &tbl_rev);
-       if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+       if (IS_ERR(wifi_pkg)) {
                ret = PTR_ERR(wifi_pkg);
                goto out_free;
        }
 
-       if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
+       if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
+           tbl_rev != 0) {
                ret = -EINVAL;
                goto out_free;
        }
index 39c6485..c0750ce 100644 (file)
@@ -520,7 +520,7 @@ struct iwl_scan_dwell {
 } __packed;
 
 /**
- * struct iwl_scan_config
+ * struct iwl_scan_config_v1
  * @flags:                     enum scan_config_flags
  * @tx_chains:                 valid_tx antenna - ANT_* definitions
  * @rx_chains:                 valid_rx antenna - ANT_* definitions
@@ -552,7 +552,7 @@ struct iwl_scan_config_v1 {
 #define SCAN_LB_LMAC_IDX 0
 #define SCAN_HB_LMAC_IDX 1
 
-struct iwl_scan_config {
+struct iwl_scan_config_v2 {
        __le32 flags;
        __le32 tx_chains;
        __le32 rx_chains;
@@ -564,6 +564,24 @@ struct iwl_scan_config {
        u8 bcast_sta_id;
        u8 channel_flags;
        u8 channel_array[];
+} __packed; /* SCAN_CONFIG_DB_CMD_API_S_2 */
+
+/**
+ * struct iwl_scan_config
+ * @enable_cam_mode: whether to enable CAM mode.
+ * @enable_promiscouos_mode: whether to enable promiscouos mode
+ * @bcast_sta_id: the index of the station in the fw
+ * @reserved: reserved
+ * @tx_chains: valid_tx antenna - ANT_* definitions
+ * @rx_chains: valid_rx antenna - ANT_* definitions
+ */
+struct iwl_scan_config {
+       u8 enable_cam_mode;
+       u8 enable_promiscouos_mode;
+       u8 bcast_sta_id;
+       u8 reserved;
+       __le32 tx_chains;
+       __le32 rx_chains;
 } __packed; /* SCAN_CONFIG_DB_CMD_API_S_3 */
 
 /**
index 5c8602d..8742180 100644 (file)
@@ -646,6 +646,7 @@ static struct scatterlist *alloc_sgtable(int size)
                                if (new_page)
                                        __free_page(new_page);
                        }
+                       kfree(table);
                        return NULL;
                }
                alloc_size = min_t(int, size, PAGE_SIZE);
index 423cc0c..0d5bc4c 100644 (file)
@@ -288,6 +288,8 @@ typedef unsigned int __bitwise iwl_ucode_tlv_api_t;
  *     STA_CONTEXT_DOT11AX_API_S
  * @IWL_UCODE_TLV_CAPA_SAR_TABLE_VER: This ucode supports different sar
  *     version tables.
+ * @IWL_UCODE_TLV_API_REDUCED_SCAN_CONFIG: This ucode supports v3 of
+ *  SCAN_CONFIG_DB_CMD_API_S.
  *
  * @NUM_IWL_UCODE_TLV_API: number of bits used
  */
@@ -321,6 +323,7 @@ enum iwl_ucode_tlv_api {
        IWL_UCODE_TLV_API_WOWLAN_TCP_SYN_WAKE   = (__force iwl_ucode_tlv_api_t)53,
        IWL_UCODE_TLV_API_FTM_RTT_ACCURACY      = (__force iwl_ucode_tlv_api_t)54,
        IWL_UCODE_TLV_API_SAR_TABLE_VER         = (__force iwl_ucode_tlv_api_t)55,
+       IWL_UCODE_TLV_API_REDUCED_SCAN_CONFIG   = (__force iwl_ucode_tlv_api_t)56,
        IWL_UCODE_TLV_API_ADWELL_HB_DEF_N_AP    = (__force iwl_ucode_tlv_api_t)57,
        IWL_UCODE_TLV_API_SCAN_EXT_CHAN_VER     = (__force iwl_ucode_tlv_api_t)58,
 
index cb4c551..695bbaa 100644 (file)
  *         Indicates MAC is entering a power-saving sleep power-down.
  *         Not a good time to access device-internal resources.
  */
+#define CSR_GP_CNTRL_REG_FLAG_INIT_DONE                     (0x00000004)
 #define CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP         (0x00000010)
 #define CSR_GP_CNTRL_REG_FLAG_XTAL_ON               (0x00000400)
 
index f8e4f0f..f09e368 100644 (file)
@@ -112,38 +112,38 @@ int iwl_dump_fh(struct iwl_trans *trans, char **buf);
  */
 static inline u32 iwl_umac_prph(struct iwl_trans *trans, u32 ofs)
 {
-       return ofs + trans->cfg->trans.umac_prph_offset;
+       return ofs + trans->trans_cfg->umac_prph_offset;
 }
 
 static inline u32 iwl_read_umac_prph_no_grab(struct iwl_trans *trans, u32 ofs)
 {
        return iwl_read_prph_no_grab(trans, ofs +
-                                    trans->cfg->trans.umac_prph_offset);
+                                    trans->trans_cfg->umac_prph_offset);
 }
 
 static inline u32 iwl_read_umac_prph(struct iwl_trans *trans, u32 ofs)
 {
-       return iwl_read_prph(trans, ofs + trans->cfg->trans.umac_prph_offset);
+       return iwl_read_prph(trans, ofs + trans->trans_cfg->umac_prph_offset);
 }
 
 static inline void iwl_write_umac_prph_no_grab(struct iwl_trans *trans, u32 ofs,
                                               u32 val)
 {
-       iwl_write_prph_no_grab(trans,  ofs + trans->cfg->trans.umac_prph_offset,
+       iwl_write_prph_no_grab(trans,  ofs + trans->trans_cfg->umac_prph_offset,
                               val);
 }
 
 static inline void iwl_write_umac_prph(struct iwl_trans *trans, u32 ofs,
                                       u32 val)
 {
-       iwl_write_prph(trans,  ofs + trans->cfg->trans.umac_prph_offset, val);
+       iwl_write_prph(trans,  ofs + trans->trans_cfg->umac_prph_offset, val);
 }
 
 static inline int iwl_poll_umac_prph_bit(struct iwl_trans *trans, u32 addr,
                                         u32 bits, u32 mask, int timeout)
 {
        return iwl_poll_prph_bit(trans, addr +
-                                trans->cfg->trans.umac_prph_offset,
+                                trans->trans_cfg->umac_prph_offset,
                                 bits, mask, timeout);
 }
 
index f47e0f9..23c25a7 100644 (file)
@@ -449,6 +449,11 @@ enum {
 #define PERSISTENCE_BIT                        BIT(12)
 #define PREG_WFPM_ACCESS               BIT(12)
 
+#define HPM_HIPM_GEN_CFG                       0xA03458
+#define HPM_HIPM_GEN_CFG_CR_PG_EN              BIT(0)
+#define HPM_HIPM_GEN_CFG_CR_SLP_EN             BIT(1)
+#define HPM_HIPM_GEN_CFG_CR_FORCE_ACTIVE       BIT(10)
+
 #define UREG_DOORBELL_TO_ISR6          0xA05C04
 #define UREG_DOORBELL_TO_ISR6_NMI_BIT  BIT(0)
 #define UREG_DOORBELL_TO_ISR6_SUSPEND  BIT(18)
index 014eca6..d9eb2b2 100644 (file)
@@ -420,6 +420,9 @@ static int iwl_run_unified_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
        };
        int ret;
 
+       if (mvm->trans->cfg->tx_with_siso_diversity)
+               init_cfg.init_flags |= cpu_to_le32(BIT(IWL_INIT_PHY));
+
        lockdep_assert_held(&mvm->mutex);
 
        mvm->rfkill_safe_init_done = false;
@@ -694,12 +697,13 @@ static int iwl_mvm_sar_get_wrds_table(struct iwl_mvm *mvm)
 
        wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
                                         ACPI_WRDS_WIFI_DATA_SIZE, &tbl_rev);
-       if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+       if (IS_ERR(wifi_pkg)) {
                ret = PTR_ERR(wifi_pkg);
                goto out_free;
        }
 
-       if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
+       if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
+           tbl_rev != 0) {
                ret = -EINVAL;
                goto out_free;
        }
@@ -731,13 +735,14 @@ static int iwl_mvm_sar_get_ewrd_table(struct iwl_mvm *mvm)
 
        wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
                                         ACPI_EWRD_WIFI_DATA_SIZE, &tbl_rev);
-       if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+       if (IS_ERR(wifi_pkg)) {
                ret = PTR_ERR(wifi_pkg);
                goto out_free;
        }
 
        if ((wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) ||
-           (wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER)) {
+           (wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER) ||
+           tbl_rev != 0) {
                ret = -EINVAL;
                goto out_free;
        }
@@ -791,11 +796,16 @@ static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm)
 
        wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
                                         ACPI_WGDS_WIFI_DATA_SIZE, &tbl_rev);
-       if (IS_ERR(wifi_pkg) || tbl_rev > 1) {
+       if (IS_ERR(wifi_pkg)) {
                ret = PTR_ERR(wifi_pkg);
                goto out_free;
        }
 
+       if (tbl_rev != 0) {
+               ret = -EINVAL;
+               goto out_free;
+       }
+
        mvm->geo_rev = tbl_rev;
        for (i = 0; i < ACPI_NUM_GEO_PROFILES; i++) {
                for (j = 0; j < ACPI_GEO_TABLE_SIZE; j++) {
@@ -889,13 +899,17 @@ static bool iwl_mvm_sar_geo_support(struct iwl_mvm *mvm)
         * firmware versions.  Unfortunately, we don't have a TLV API
         * flag to rely on, so rely on the major version which is in
         * the first byte of ucode_ver.  This was implemented
-        * initially on version 38 and then backported to 36, 29 and
-        * 17.
+        * initially on version 38 and then backported to 17.  It was
+        * also backported to 29, but only for 7265D devices.  The
+        * intention was to have it in 36 as well, but not all 8000
+        * family got this feature enabled.  The 8000 family is the
+        * only one using version 36, so skip this version entirely.
         */
        return IWL_UCODE_SERIAL(mvm->fw->ucode_ver) >= 38 ||
-              IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 36 ||
-              IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 29 ||
-              IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 17;
+              IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 17 ||
+              (IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 29 &&
+               ((mvm->trans->hw_rev & CSR_HW_REV_TYPE_MSK) ==
+                CSR_HW_REV_TYPE_7265D));
 }
 
 int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm)
@@ -1018,11 +1032,16 @@ static int iwl_mvm_get_ppag_table(struct iwl_mvm *mvm)
        wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
                                         ACPI_PPAG_WIFI_DATA_SIZE, &tbl_rev);
 
-       if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+       if (IS_ERR(wifi_pkg)) {
                ret = PTR_ERR(wifi_pkg);
                goto out_free;
        }
 
+       if (tbl_rev != 0) {
+               ret = -EINVAL;
+               goto out_free;
+       }
+
        enabled = &wifi_pkg->package.elements[1];
        if (enabled->type != ACPI_TYPE_INTEGER ||
            (enabled->integer.value != 0 && enabled->integer.value != 1)) {
index cd1b100..d31f96c 100644 (file)
@@ -4881,11 +4881,11 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
        if (!iwl_mvm_has_new_rx_api(mvm))
                return;
 
-       notif->cookie = mvm->queue_sync_cookie;
-
-       if (notif->sync)
+       if (notif->sync) {
+               notif->cookie = mvm->queue_sync_cookie;
                atomic_set(&mvm->queue_sync_counter,
                           mvm->trans->num_rx_queues);
+       }
 
        ret = iwl_mvm_notify_rx_queue(mvm, qmask, (u8 *)notif,
                                      size, !notif->sync);
@@ -4905,7 +4905,8 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
 
 out:
        atomic_set(&mvm->queue_sync_counter, 0);
-       mvm->queue_sync_cookie++;
+       if (notif->sync)
+               mvm->queue_sync_cookie++;
 }
 
 static void iwl_mvm_sync_rx_queues(struct ieee80211_hw *hw)
index 843d00b..5ca50f3 100644 (file)
@@ -1405,6 +1405,12 @@ static inline bool iwl_mvm_is_scan_ext_chan_supported(struct iwl_mvm *mvm)
                          IWL_UCODE_TLV_API_SCAN_EXT_CHAN_VER);
 }
 
+static inline bool iwl_mvm_is_reduced_config_scan_supported(struct iwl_mvm *mvm)
+{
+       return fw_has_api(&mvm->fw->ucode_capa,
+                         IWL_UCODE_TLV_API_REDUCED_SCAN_CONFIG);
+}
+
 static inline bool iwl_mvm_has_new_rx_stats_api(struct iwl_mvm *mvm)
 {
        return fw_has_api(&mvm->fw->ucode_capa,
index f6b3045..fcafa22 100644 (file)
@@ -1137,11 +1137,11 @@ static void iwl_mvm_fill_scan_config_v1(struct iwl_mvm *mvm, void *config,
        iwl_mvm_fill_channels(mvm, cfg->channel_array, max_channels);
 }
 
-static void iwl_mvm_fill_scan_config(struct iwl_mvm *mvm, void *config,
-                                    u32 flags, u8 channel_flags,
-                                    u32 max_channels)
+static void iwl_mvm_fill_scan_config_v2(struct iwl_mvm *mvm, void *config,
+                                       u32 flags, u8 channel_flags,
+                                       u32 max_channels)
 {
-       struct iwl_scan_config *cfg = config;
+       struct iwl_scan_config_v2 *cfg = config;
 
        cfg->flags = cpu_to_le32(flags);
        cfg->tx_chains = cpu_to_le32(iwl_mvm_get_valid_tx_ant(mvm));
@@ -1185,7 +1185,7 @@ static void iwl_mvm_fill_scan_config(struct iwl_mvm *mvm, void *config,
        iwl_mvm_fill_channels(mvm, cfg->channel_array, max_channels);
 }
 
-int iwl_mvm_config_scan(struct iwl_mvm *mvm)
+static int iwl_mvm_legacy_config_scan(struct iwl_mvm *mvm)
 {
        void *cfg;
        int ret, cmd_size;
@@ -1217,7 +1217,7 @@ int iwl_mvm_config_scan(struct iwl_mvm *mvm)
        }
 
        if (iwl_mvm_cdb_scan_api(mvm))
-               cmd_size = sizeof(struct iwl_scan_config);
+               cmd_size = sizeof(struct iwl_scan_config_v2);
        else
                cmd_size = sizeof(struct iwl_scan_config_v1);
        cmd_size += num_channels;
@@ -1254,8 +1254,8 @@ int iwl_mvm_config_scan(struct iwl_mvm *mvm)
                        flags |= (iwl_mvm_is_scan_fragmented(hb_type)) ?
                                 SCAN_CONFIG_FLAG_SET_LMAC2_FRAGMENTED :
                                 SCAN_CONFIG_FLAG_CLEAR_LMAC2_FRAGMENTED;
-               iwl_mvm_fill_scan_config(mvm, cfg, flags, channel_flags,
-                                        num_channels);
+               iwl_mvm_fill_scan_config_v2(mvm, cfg, flags, channel_flags,
+                                           num_channels);
        } else {
                iwl_mvm_fill_scan_config_v1(mvm, cfg, flags, channel_flags,
                                            num_channels);
@@ -1277,6 +1277,30 @@ int iwl_mvm_config_scan(struct iwl_mvm *mvm)
        return ret;
 }
 
+int iwl_mvm_config_scan(struct iwl_mvm *mvm)
+{
+       struct iwl_scan_config cfg;
+       struct iwl_host_cmd cmd = {
+               .id = iwl_cmd_id(SCAN_CFG_CMD, IWL_ALWAYS_LONG_GROUP, 0),
+               .len[0] = sizeof(cfg),
+               .data[0] = &cfg,
+               .dataflags[0] = IWL_HCMD_DFL_NOCOPY,
+       };
+
+       if (!iwl_mvm_is_reduced_config_scan_supported(mvm))
+               return iwl_mvm_legacy_config_scan(mvm);
+
+       memset(&cfg, 0, sizeof(cfg));
+
+       cfg.bcast_sta_id = mvm->aux_sta.sta_id;
+       cfg.tx_chains = cpu_to_le32(iwl_mvm_get_valid_tx_ant(mvm));
+       cfg.rx_chains = cpu_to_le32(iwl_mvm_scan_rx_ant(mvm));
+
+       IWL_DEBUG_SCAN(mvm, "Sending UMAC scan config\n");
+
+       return iwl_mvm_send_cmd(mvm, &cmd);
+}
+
 static int iwl_mvm_scan_uid_by_status(struct iwl_mvm *mvm, int status)
 {
        int i;
index 0bedba4..b3768d5 100644 (file)
@@ -1482,6 +1482,13 @@ static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm,
                                            mvm_sta->sta_id, i);
                        txq_id = iwl_mvm_tvqm_enable_txq(mvm, mvm_sta->sta_id,
                                                         i, wdg);
+                       /*
+                        * on failures, just set it to IWL_MVM_INVALID_QUEUE
+                        * to try again later, we have no other good way of
+                        * failing here
+                        */
+                       if (txq_id < 0)
+                               txq_id = IWL_MVM_INVALID_QUEUE;
                        tid_data->txq_id = txq_id;
 
                        /*
@@ -1950,30 +1957,73 @@ void iwl_mvm_dealloc_int_sta(struct iwl_mvm *mvm, struct iwl_mvm_int_sta *sta)
        sta->sta_id = IWL_MVM_INVALID_STA;
 }
 
-static void iwl_mvm_enable_aux_snif_queue(struct iwl_mvm *mvm, u16 *queue,
+static void iwl_mvm_enable_aux_snif_queue(struct iwl_mvm *mvm, u16 queue,
                                          u8 sta_id, u8 fifo)
 {
        unsigned int wdg_timeout = iwlmvm_mod_params.tfd_q_hang_detect ?
                mvm->trans->trans_cfg->base_params->wd_timeout :
                IWL_WATCHDOG_DISABLED;
+       struct iwl_trans_txq_scd_cfg cfg = {
+               .fifo = fifo,
+               .sta_id = sta_id,
+               .tid = IWL_MAX_TID_COUNT,
+               .aggregate = false,
+               .frame_limit = IWL_FRAME_LIMIT,
+       };
+
+       WARN_ON(iwl_mvm_has_new_tx_api(mvm));
+
+       iwl_mvm_enable_txq(mvm, NULL, queue, 0, &cfg, wdg_timeout);
+}
+
+static int iwl_mvm_enable_aux_snif_queue_tvqm(struct iwl_mvm *mvm, u8 sta_id)
+{
+       unsigned int wdg_timeout = iwlmvm_mod_params.tfd_q_hang_detect ?
+               mvm->trans->trans_cfg->base_params->wd_timeout :
+               IWL_WATCHDOG_DISABLED;
+
+       WARN_ON(!iwl_mvm_has_new_tx_api(mvm));
+
+       return iwl_mvm_tvqm_enable_txq(mvm, sta_id, IWL_MAX_TID_COUNT,
+                                      wdg_timeout);
+}
 
+static int iwl_mvm_add_int_sta_with_queue(struct iwl_mvm *mvm, int macidx,
+                                         int maccolor,
+                                         struct iwl_mvm_int_sta *sta,
+                                         u16 *queue, int fifo)
+{
+       int ret;
+
+       /* Map queue to fifo - needs to happen before adding station */
+       if (!iwl_mvm_has_new_tx_api(mvm))
+               iwl_mvm_enable_aux_snif_queue(mvm, *queue, sta->sta_id, fifo);
+
+       ret = iwl_mvm_add_int_sta_common(mvm, sta, NULL, macidx, maccolor);
+       if (ret) {
+               if (!iwl_mvm_has_new_tx_api(mvm))
+                       iwl_mvm_disable_txq(mvm, NULL, *queue,
+                                           IWL_MAX_TID_COUNT, 0);
+               return ret;
+       }
+
+       /*
+        * For 22000 firmware and on we cannot add queue to a station unknown
+        * to firmware so enable queue here - after the station was added
+        */
        if (iwl_mvm_has_new_tx_api(mvm)) {
-               int tvqm_queue =
-                       iwl_mvm_tvqm_enable_txq(mvm, sta_id,
-                                               IWL_MAX_TID_COUNT,
-                                               wdg_timeout);
-               *queue = tvqm_queue;
-       } else {
-               struct iwl_trans_txq_scd_cfg cfg = {
-                       .fifo = fifo,
-                       .sta_id = sta_id,
-                       .tid = IWL_MAX_TID_COUNT,
-                       .aggregate = false,
-                       .frame_limit = IWL_FRAME_LIMIT,
-               };
+               int txq;
 
-               iwl_mvm_enable_txq(mvm, NULL, *queue, 0, &cfg, wdg_timeout);
+               txq = iwl_mvm_enable_aux_snif_queue_tvqm(mvm, sta->sta_id);
+               if (txq < 0) {
+                       iwl_mvm_rm_sta_common(mvm, sta->sta_id);
+                       return txq;
+               }
+
+               *queue = txq;
        }
+
+       return 0;
 }
 
 int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm)
@@ -1989,59 +2039,26 @@ int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm)
        if (ret)
                return ret;
 
-       /* Map Aux queue to fifo - needs to happen before adding Aux station */
-       if (!iwl_mvm_has_new_tx_api(mvm))
-               iwl_mvm_enable_aux_snif_queue(mvm, &mvm->aux_queue,
-                                             mvm->aux_sta.sta_id,
-                                             IWL_MVM_TX_FIFO_MCAST);
-
-       ret = iwl_mvm_add_int_sta_common(mvm, &mvm->aux_sta, NULL,
-                                        MAC_INDEX_AUX, 0);
+       ret = iwl_mvm_add_int_sta_with_queue(mvm, MAC_INDEX_AUX, 0,
+                                            &mvm->aux_sta, &mvm->aux_queue,
+                                            IWL_MVM_TX_FIFO_MCAST);
        if (ret) {
                iwl_mvm_dealloc_int_sta(mvm, &mvm->aux_sta);
                return ret;
        }
 
-       /*
-        * For 22000 firmware and on we cannot add queue to a station unknown
-        * to firmware so enable queue here - after the station was added
-        */
-       if (iwl_mvm_has_new_tx_api(mvm))
-               iwl_mvm_enable_aux_snif_queue(mvm, &mvm->aux_queue,
-                                             mvm->aux_sta.sta_id,
-                                             IWL_MVM_TX_FIFO_MCAST);
-
        return 0;
 }
 
 int iwl_mvm_add_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 {
        struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
-       int ret;
 
        lockdep_assert_held(&mvm->mutex);
 
-       /* Map snif queue to fifo - must happen before adding snif station */
-       if (!iwl_mvm_has_new_tx_api(mvm))
-               iwl_mvm_enable_aux_snif_queue(mvm, &mvm->snif_queue,
-                                             mvm->snif_sta.sta_id,
+       return iwl_mvm_add_int_sta_with_queue(mvm, mvmvif->id, mvmvif->color,
+                                             &mvm->snif_sta, &mvm->snif_queue,
                                              IWL_MVM_TX_FIFO_BE);
-
-       ret = iwl_mvm_add_int_sta_common(mvm, &mvm->snif_sta, vif->addr,
-                                        mvmvif->id, 0);
-       if (ret)
-               return ret;
-
-       /*
-        * For 22000 firmware and on we cannot add queue to a station unknown
-        * to firmware so enable queue here - after the station was added
-        */
-       if (iwl_mvm_has_new_tx_api(mvm))
-               iwl_mvm_enable_aux_snif_queue(mvm, &mvm->snif_queue,
-                                             mvm->snif_sta.sta_id,
-                                             IWL_MVM_TX_FIFO_BE);
-
-       return 0;
 }
 
 int iwl_mvm_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
@@ -2133,6 +2150,10 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
                queue = iwl_mvm_tvqm_enable_txq(mvm, bsta->sta_id,
                                                IWL_MAX_TID_COUNT,
                                                wdg_timeout);
+               if (queue < 0) {
+                       iwl_mvm_rm_sta_common(mvm, bsta->sta_id);
+                       return queue;
+               }
 
                if (vif->type == NL80211_IFTYPE_AP ||
                    vif->type == NL80211_IFTYPE_ADHOC)
@@ -2307,10 +2328,8 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
        }
        ret = iwl_mvm_add_int_sta_common(mvm, msta, maddr,
                                         mvmvif->id, mvmvif->color);
-       if (ret) {
-               iwl_mvm_dealloc_int_sta(mvm, msta);
-               return ret;
-       }
+       if (ret)
+               goto err;
 
        /*
         * Enable cab queue after the ADD_STA command is sent.
@@ -2323,6 +2342,10 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
                int queue = iwl_mvm_tvqm_enable_txq(mvm, msta->sta_id,
                                                    0,
                                                    timeout);
+               if (queue < 0) {
+                       ret = queue;
+                       goto err;
+               }
                mvmvif->cab_queue = queue;
        } else if (!fw_has_api(&mvm->fw->ucode_capa,
                               IWL_UCODE_TLV_API_STA_TYPE))
@@ -2330,6 +2353,9 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
                                   timeout);
 
        return 0;
+err:
+       iwl_mvm_dealloc_int_sta(mvm, msta);
+       return ret;
 }
 
 static int __iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, u8 sta_id,
index 32a7083..f0c539b 100644 (file)
@@ -555,16 +555,19 @@ static int compare_temps(const void *a, const void *b)
        return ((s16)le16_to_cpu(*(__le16 *)a) -
                (s16)le16_to_cpu(*(__le16 *)b));
 }
+#endif
 
 int iwl_mvm_send_temp_report_ths_cmd(struct iwl_mvm *mvm)
 {
        struct temp_report_ths_cmd cmd = {0};
-       int ret, i, j, idx = 0;
+       int ret;
+#ifdef CONFIG_THERMAL
+       int i, j, idx = 0;
 
        lockdep_assert_held(&mvm->mutex);
 
        if (!mvm->tz_device.tzone)
-               return -EINVAL;
+               goto send;
 
        /* The driver holds array of temperature trips that are unsorted
         * and uncompressed, the FW should get it compressed and sorted
@@ -597,6 +600,7 @@ int iwl_mvm_send_temp_report_ths_cmd(struct iwl_mvm *mvm)
        }
 
 send:
+#endif
        ret = iwl_mvm_send_cmd_pdu(mvm, WIDE_ID(PHY_OPS_GROUP,
                                                TEMP_REPORTING_THRESHOLDS_CMD),
                                   0, sizeof(cmd), &cmd);
@@ -607,6 +611,7 @@ send:
        return ret;
 }
 
+#ifdef CONFIG_THERMAL
 static int iwl_mvm_tzone_get_temp(struct thermal_zone_device *device,
                                  int *temperature)
 {
index 75fa8a6..7498038 100644 (file)
@@ -107,13 +107,9 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
 
        /* allocate ucode sections in dram and set addresses */
        ret = iwl_pcie_init_fw_sec(trans, fw, &prph_scratch->dram);
-       if (ret) {
-               dma_free_coherent(trans->dev,
-                                 sizeof(*prph_scratch),
-                                 prph_scratch,
-                                 trans_pcie->prph_scratch_dma_addr);
-               return ret;
-       }
+       if (ret)
+               goto err_free_prph_scratch;
+
 
        /* Allocate prph information
         * currently we don't assign to the prph info anything, but it would get
@@ -121,16 +117,20 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
        prph_info = dma_alloc_coherent(trans->dev, sizeof(*prph_info),
                                       &trans_pcie->prph_info_dma_addr,
                                       GFP_KERNEL);
-       if (!prph_info)
-               return -ENOMEM;
+       if (!prph_info) {
+               ret = -ENOMEM;
+               goto err_free_prph_scratch;
+       }
 
        /* Allocate context info */
        ctxt_info_gen3 = dma_alloc_coherent(trans->dev,
                                            sizeof(*ctxt_info_gen3),
                                            &trans_pcie->ctxt_info_dma_addr,
                                            GFP_KERNEL);
-       if (!ctxt_info_gen3)
-               return -ENOMEM;
+       if (!ctxt_info_gen3) {
+               ret = -ENOMEM;
+               goto err_free_prph_info;
+       }
 
        ctxt_info_gen3->prph_info_base_addr =
                cpu_to_le64(trans_pcie->prph_info_dma_addr);
@@ -186,6 +186,20 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
                iwl_set_bit(trans, CSR_GP_CNTRL, CSR_AUTO_FUNC_INIT);
 
        return 0;
+
+err_free_prph_info:
+       dma_free_coherent(trans->dev,
+                         sizeof(*prph_info),
+                       prph_info,
+                       trans_pcie->prph_info_dma_addr);
+
+err_free_prph_scratch:
+       dma_free_coherent(trans->dev,
+                         sizeof(*prph_scratch),
+                       prph_scratch,
+                       trans_pcie->prph_scratch_dma_addr);
+       return ret;
+
 }
 
 void iwl_pcie_ctxt_info_gen3_free(struct iwl_trans *trans)
index e29c477..040cec1 100644 (file)
@@ -513,31 +513,33 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
        {IWL_PCI_DEVICE(0x24FD, 0x9074, iwl8265_2ac_cfg)},
 
 /* 9000 Series */
-       {IWL_PCI_DEVICE(0x02F0, 0x0030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x0034, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x0038, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x003C, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x0060, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x0064, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x00A0, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x00A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x0230, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x0234, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x0238, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x023C, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x0260, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x0264, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x02A0, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x02A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x1551, iwl9560_killer_s_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x1552, iwl9560_killer_i_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x2030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x2034, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x4030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x4034, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x40A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x4234, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
-       {IWL_PCI_DEVICE(0x02F0, 0x42A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
+       {IWL_PCI_DEVICE(0x02F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x02F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
        {IWL_PCI_DEVICE(0x06F0, 0x0030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
        {IWL_PCI_DEVICE(0x06F0, 0x0034, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
        {IWL_PCI_DEVICE(0x06F0, 0x0038, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
@@ -571,20 +573,20 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
        {IWL_PCI_DEVICE(0x2526, 0x0034, iwl9560_2ac_cfg)},
        {IWL_PCI_DEVICE(0x2526, 0x0038, iwl9560_2ac_160_cfg)},
        {IWL_PCI_DEVICE(0x2526, 0x003C, iwl9560_2ac_160_cfg)},
-       {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9460_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x2526, 0x0064, iwl9460_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x2526, 0x00A0, iwl9460_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x2526, 0x00A4, iwl9460_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9461_2ac_cfg_soc)},
+       {IWL_PCI_DEVICE(0x2526, 0x0064, iwl9461_2ac_cfg_soc)},
+       {IWL_PCI_DEVICE(0x2526, 0x00A0, iwl9462_2ac_cfg_soc)},
+       {IWL_PCI_DEVICE(0x2526, 0x00A4, iwl9462_2ac_cfg_soc)},
        {IWL_PCI_DEVICE(0x2526, 0x0210, iwl9260_2ac_cfg)},
        {IWL_PCI_DEVICE(0x2526, 0x0214, iwl9260_2ac_cfg)},
        {IWL_PCI_DEVICE(0x2526, 0x0230, iwl9560_2ac_cfg)},
        {IWL_PCI_DEVICE(0x2526, 0x0234, iwl9560_2ac_cfg)},
        {IWL_PCI_DEVICE(0x2526, 0x0238, iwl9560_2ac_cfg)},
        {IWL_PCI_DEVICE(0x2526, 0x023C, iwl9560_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x2526, 0x0260, iwl9460_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x2526, 0x0260, iwl9461_2ac_cfg_soc)},
        {IWL_PCI_DEVICE(0x2526, 0x0264, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2526, 0x02A0, iwl9460_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x2526, 0x02A4, iwl9460_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x2526, 0x02A0, iwl9462_2ac_cfg_soc)},
+       {IWL_PCI_DEVICE(0x2526, 0x02A4, iwl9462_2ac_cfg_soc)},
        {IWL_PCI_DEVICE(0x2526, 0x1010, iwl9260_2ac_cfg)},
        {IWL_PCI_DEVICE(0x2526, 0x1030, iwl9560_2ac_cfg)},
        {IWL_PCI_DEVICE(0x2526, 0x1210, iwl9260_2ac_cfg)},
@@ -601,7 +603,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
        {IWL_PCI_DEVICE(0x2526, 0x401C, iwl9260_2ac_160_cfg)},
        {IWL_PCI_DEVICE(0x2526, 0x4030, iwl9560_2ac_160_cfg)},
        {IWL_PCI_DEVICE(0x2526, 0x4034, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9462_2ac_cfg_soc)},
        {IWL_PCI_DEVICE(0x2526, 0x4234, iwl9560_2ac_cfg_soc)},
        {IWL_PCI_DEVICE(0x2526, 0x42A4, iwl9462_2ac_cfg_soc)},
        {IWL_PCI_DEVICE(0x2526, 0x6010, iwl9260_2ac_160_cfg)},
@@ -616,33 +618,33 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
        {IWL_PCI_DEVICE(0x271B, 0x0210, iwl9160_2ac_cfg)},
        {IWL_PCI_DEVICE(0x271B, 0x0214, iwl9260_2ac_cfg)},
        {IWL_PCI_DEVICE(0x271C, 0x0214, iwl9260_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x2720, 0x0034, iwl9560_2ac_160_cfg)},
-       {IWL_PCI_DEVICE(0x2720, 0x0038, iwl9560_2ac_160_cfg)},
-       {IWL_PCI_DEVICE(0x2720, 0x003C, iwl9560_2ac_160_cfg)},
-       {IWL_PCI_DEVICE(0x2720, 0x0060, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x0064, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x00A0, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x00A4, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x0230, iwl9560_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x2720, 0x0234, iwl9560_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x2720, 0x0238, iwl9560_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x2720, 0x023C, iwl9560_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x2720, 0x0260, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x0264, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x02A0, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x02A4, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x1010, iwl9260_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x2720, 0x1030, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x1210, iwl9260_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x2720, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x1552, iwl9560_killer_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x2030, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x2034, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_160_cfg)},
-       {IWL_PCI_DEVICE(0x2720, 0x4034, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x40A4, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x4234, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x2720, 0x42A4, iwl9462_2ac_cfg_soc)},
+
+       {IWL_PCI_DEVICE(0x2720, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x2720, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
        {IWL_PCI_DEVICE(0x30DC, 0x0030, iwl9560_2ac_160_cfg_soc)},
        {IWL_PCI_DEVICE(0x30DC, 0x0034, iwl9560_2ac_cfg_soc)},
        {IWL_PCI_DEVICE(0x30DC, 0x0038, iwl9560_2ac_160_cfg_soc)},
@@ -671,6 +673,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
        {IWL_PCI_DEVICE(0x30DC, 0x40A4, iwl9462_2ac_cfg_soc)},
        {IWL_PCI_DEVICE(0x30DC, 0x4234, iwl9560_2ac_cfg_soc)},
        {IWL_PCI_DEVICE(0x30DC, 0x42A4, iwl9462_2ac_cfg_soc)},
+
        {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_160_cfg_shared_clk)},
        {IWL_PCI_DEVICE(0x31DC, 0x0034, iwl9560_2ac_cfg_shared_clk)},
        {IWL_PCI_DEVICE(0x31DC, 0x0038, iwl9560_2ac_160_cfg_shared_clk)},
@@ -726,62 +729,60 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
        {IWL_PCI_DEVICE(0x34F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
        {IWL_PCI_DEVICE(0x34F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
 
-       {IWL_PCI_DEVICE(0x3DF0, 0x0030, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x0034, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x0038, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x003C, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x0060, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x0064, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x00A0, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x00A4, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x0230, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x0234, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x0238, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x023C, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x0260, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x0264, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x02A0, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x02A4, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x1010, iwl9260_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x1210, iwl9260_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x1552, iwl9560_killer_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x4034, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x40A4, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x4234, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x3DF0, 0x42A4, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x0030, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x0034, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x0038, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x003C, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x0060, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x0064, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x00A0, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x00A4, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x0230, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x0234, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x0238, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x023C, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x0260, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x0264, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x02A0, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x02A4, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x1010, iwl9260_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x1210, iwl9260_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x43F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x1552, iwl9560_killer_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x4034, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x40A4, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x4234, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0x43F0, 0x42A4, iwl9462_2ac_cfg_soc)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x3DF0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
+       {IWL_PCI_DEVICE(0x43F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0x43F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
        {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9460_2ac_cfg_soc)},
        {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg_soc)},
        {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_160_cfg_soc)},
@@ -821,34 +822,34 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
        {IWL_PCI_DEVICE(0x9DF0, 0x40A4, iwl9462_2ac_cfg_soc)},
        {IWL_PCI_DEVICE(0x9DF0, 0x4234, iwl9560_2ac_cfg_soc)},
        {IWL_PCI_DEVICE(0x9DF0, 0x42A4, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x0030, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x0034, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x0038, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x003C, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x0060, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x0064, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x00A0, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x00A4, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x0230, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x0234, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x0238, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x023C, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x0260, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x0264, iwl9461_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x02A0, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x02A4, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x1010, iwl9260_2ac_cfg)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x1210, iwl9260_2ac_cfg)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x1552, iwl9560_killer_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x4034, iwl9560_2ac_160_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x40A4, iwl9462_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x4234, iwl9560_2ac_cfg_soc)},
-       {IWL_PCI_DEVICE(0xA0F0, 0x42A4, iwl9462_2ac_cfg_soc)},
+
+       {IWL_PCI_DEVICE(0xA0F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+       {IWL_PCI_DEVICE(0xA0F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
        {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_160_cfg_soc)},
        {IWL_PCI_DEVICE(0xA370, 0x0034, iwl9560_2ac_cfg_soc)},
        {IWL_PCI_DEVICE(0xA370, 0x0038, iwl9560_2ac_160_cfg_soc)},
@@ -1067,11 +1068,7 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                }
        } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id) ==
                   CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) &&
-                  ((cfg != &iwl_ax200_cfg_cc &&
-                    cfg != &killer1650x_2ax_cfg &&
-                    cfg != &killer1650w_2ax_cfg &&
-                    cfg != &iwl_ax201_cfg_quz_hr) ||
-                   iwl_trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0)) {
+                  iwl_trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0) {
                u32 hw_status;
 
                hw_status = iwl_read_prph(iwl_trans, UMAG_GEN_HW_STATUS);
index df8455f..ca3bb4d 100644 (file)
 #include "internal.h"
 #include "fw/dbg.h"
 
+static int iwl_pcie_gen2_force_power_gating(struct iwl_trans *trans)
+{
+       iwl_set_bits_prph(trans, HPM_HIPM_GEN_CFG,
+                         HPM_HIPM_GEN_CFG_CR_FORCE_ACTIVE);
+       udelay(20);
+       iwl_set_bits_prph(trans, HPM_HIPM_GEN_CFG,
+                         HPM_HIPM_GEN_CFG_CR_PG_EN |
+                         HPM_HIPM_GEN_CFG_CR_SLP_EN);
+       udelay(20);
+       iwl_clear_bits_prph(trans, HPM_HIPM_GEN_CFG,
+                           HPM_HIPM_GEN_CFG_CR_FORCE_ACTIVE);
+
+       iwl_trans_sw_reset(trans);
+       iwl_clear_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+
+       return 0;
+}
+
 /*
  * Start up NIC's basic functionality after it has been reset
  * (e.g. after platform boot, or shutdown via iwl_pcie_apm_stop())
@@ -92,6 +110,13 @@ int iwl_pcie_gen2_apm_init(struct iwl_trans *trans)
 
        iwl_pcie_apm_config(trans);
 
+       if (trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_22000 &&
+           trans->cfg->integrated) {
+               ret = iwl_pcie_gen2_force_power_gating(trans);
+               if (ret)
+                       return ret;
+       }
+
        ret = iwl_finish_nic_init(trans, trans->trans_cfg);
        if (ret)
                return ret;
index 5ab87a8..6961f00 100644 (file)
@@ -62,7 +62,6 @@
  *
  *****************************************************************************/
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/interrupt.h>
 #include <linux/debugfs.h>
 #include <linux/sched.h>
@@ -3273,11 +3272,17 @@ static struct iwl_trans_dump_data
                ptr = cmdq->write_ptr;
                for (i = 0; i < cmdq->n_window; i++) {
                        u8 idx = iwl_pcie_get_cmd_index(cmdq, ptr);
+                       u8 tfdidx;
                        u32 caplen, cmdlen;
 
+                       if (trans->trans_cfg->use_tfh)
+                               tfdidx = idx;
+                       else
+                               tfdidx = ptr;
+
                        cmdlen = iwl_trans_pcie_get_cmdlen(trans,
-                                                          cmdq->tfds +
-                                                          tfd_size * ptr);
+                                                          (u8 *)cmdq->tfds +
+                                                          tfd_size * tfdidx);
                        caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen);
 
                        if (cmdlen) {
@@ -3451,6 +3456,15 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
        spin_lock_init(&trans_pcie->reg_lock);
        mutex_init(&trans_pcie->mutex);
        init_waitqueue_head(&trans_pcie->ucode_write_waitq);
+
+       trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator",
+                                                  WQ_HIGHPRI | WQ_UNBOUND, 1);
+       if (!trans_pcie->rba.alloc_wq) {
+               ret = -ENOMEM;
+               goto out_free_trans;
+       }
+       INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work);
+
        trans_pcie->tso_hdr_page = alloc_percpu(struct iwl_tso_hdr_page);
        if (!trans_pcie->tso_hdr_page) {
                ret = -ENOMEM;
@@ -3585,10 +3599,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
                trans_pcie->inta_mask = CSR_INI_SET_MASK;
         }
 
-       trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator",
-                                                  WQ_HIGHPRI | WQ_UNBOUND, 1);
-       INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work);
-
 #ifdef CONFIG_IWLWIFI_DEBUGFS
        trans_pcie->fw_mon_data.state = IWL_FW_MON_DBGFS_STATE_CLOSED;
        mutex_init(&trans_pcie->fw_mon_data.mutex);
@@ -3600,6 +3610,8 @@ out_free_ict:
        iwl_pcie_free_ict(trans);
 out_no_pci:
        free_percpu(trans_pcie->tso_hdr_page);
+       destroy_workqueue(trans_pcie->rba.alloc_wq);
+out_free_trans:
        iwl_trans_free(trans);
        return ERR_PTR(ret);
 }
index 158a3d7..e323e9a 100644 (file)
@@ -3041,30 +3041,6 @@ static void prism2_clear_set_tim_queue(local_info_t *local)
        }
 }
 
-
-/*
- * HostAP uses two layers of net devices, where the inner
- * layer gets called all the time from the outer layer.
- * This is a natural nesting, which needs a split lock type.
- */
-static struct lock_class_key hostap_netdev_xmit_lock_key;
-static struct lock_class_key hostap_netdev_addr_lock_key;
-
-static void prism2_set_lockdep_class_one(struct net_device *dev,
-                                        struct netdev_queue *txq,
-                                        void *_unused)
-{
-       lockdep_set_class(&txq->_xmit_lock,
-                         &hostap_netdev_xmit_lock_key);
-}
-
-static void prism2_set_lockdep_class(struct net_device *dev)
-{
-       lockdep_set_class(&dev->addr_list_lock,
-                         &hostap_netdev_addr_lock_key);
-       netdev_for_each_tx_queue(dev, prism2_set_lockdep_class_one, NULL);
-}
-
 static struct net_device *
 prism2_init_local_data(struct prism2_helper_functions *funcs, int card_idx,
                       struct device *sdev)
@@ -3223,7 +3199,6 @@ while (0)
        if (ret >= 0)
                ret = register_netdevice(dev);
 
-       prism2_set_lockdep_class(dev);
        rtnl_unlock();
        if (ret < 0) {
                printk(KERN_WARNING "%s: register netdevice failed!\n",
index 6359560..14f562c 100644 (file)
@@ -1261,8 +1261,8 @@ static bool mac80211_hwsim_tx_frame_no_nl(struct ieee80211_hw *hw,
        skb_orphan(skb);
        skb_dst_drop(skb);
        skb->mark = 0;
-       secpath_reset(skb);
-       nf_reset(skb);
+       skb_ext_reset(skb);
+       nf_reset_ct(skb);
 
        /*
         * Get absolute mactime here so all HWs RX at the "same time", and
@@ -4026,7 +4026,7 @@ static int __init init_mac80211_hwsim(void)
        err = dev_alloc_name(hwsim_mon, hwsim_mon->name);
        if (err < 0) {
                rtnl_unlock();
-               goto out_free_radios;
+               goto out_free_mon;
        }
 
        err = register_netdevice(hwsim_mon);
index 4d03596..d7a1ddc 100644 (file)
@@ -8,6 +8,8 @@ mt76-y := \
        mmio.o util.o trace.o dma.o mac80211.o debugfs.o eeprom.o \
        tx.o agg-rx.o mcu.o
 
+mt76-$(CONFIG_PCI) += pci.o
+
 mt76-usb-y := usb.o usb_trace.o
 
 CFLAGS_trace.o := -I$(src)
index c747eb2..8f69d00 100644 (file)
@@ -53,8 +53,10 @@ mt76_dma_add_buf(struct mt76_dev *dev, struct mt76_queue *q,
        u32 ctrl;
        int i, idx = -1;
 
-       if (txwi)
+       if (txwi) {
                q->entry[q->head].txwi = DMA_DUMMY_DATA;
+               q->entry[q->head].skip_buf0 = true;
+       }
 
        for (i = 0; i < nbufs; i += 2, buf += 2) {
                u32 buf0 = buf[0].addr, buf1 = 0;
@@ -97,7 +99,7 @@ mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
        __le32 __ctrl = READ_ONCE(q->desc[idx].ctrl);
        u32 ctrl = le32_to_cpu(__ctrl);
 
-       if (!e->txwi || !e->skb) {
+       if (!e->skip_buf0) {
                __le32 addr = READ_ONCE(q->desc[idx].buf0);
                u32 len = FIELD_GET(MT_DMA_CTL_SD_LEN0, ctrl);
 
index 570c159..8aec7cc 100644 (file)
@@ -93,8 +93,9 @@ struct mt76_queue_entry {
                struct urb *urb;
        };
        enum mt76_txq_id qid;
-       bool schedule;
-       bool done;
+       bool skip_buf0:1;
+       bool schedule:1;
+       bool done:1;
 };
 
 struct mt76_queue_regs {
@@ -578,6 +579,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
 #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
 
 void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
+void mt76_pci_disable_aspm(struct pci_dev *pdev);
 
 static inline u16 mt76_chip(struct mt76_dev *dev)
 {
index 275d5ea..842cd81 100644 (file)
@@ -333,7 +333,6 @@ static int mt7615_driver_own(struct mt7615_dev *dev)
 
 static int mt7615_load_patch(struct mt7615_dev *dev)
 {
-       const char *firmware = MT7615_ROM_PATCH;
        const struct mt7615_patch_hdr *hdr;
        const struct firmware *fw = NULL;
        int len, ret, sem;
@@ -349,7 +348,7 @@ static int mt7615_load_patch(struct mt7615_dev *dev)
                return -EAGAIN;
        }
 
-       ret = request_firmware(&fw, firmware, dev->mt76.dev);
+       ret = request_firmware(&fw, MT7615_ROM_PATCH, dev->mt76.dev);
        if (ret)
                goto out;
 
@@ -447,13 +446,11 @@ mt7615_mcu_send_ram_firmware(struct mt7615_dev *dev,
 
 static int mt7615_load_ram(struct mt7615_dev *dev)
 {
-       const struct firmware *fw;
        const struct mt7615_fw_trailer *hdr;
-       const char *n9_firmware = MT7615_FIRMWARE_N9;
-       const char *cr4_firmware = MT7615_FIRMWARE_CR4;
+       const struct firmware *fw;
        int ret;
 
-       ret = request_firmware(&fw, n9_firmware, dev->mt76.dev);
+       ret = request_firmware(&fw, MT7615_FIRMWARE_N9, dev->mt76.dev);
        if (ret)
                return ret;
 
@@ -482,7 +479,7 @@ static int mt7615_load_ram(struct mt7615_dev *dev)
 
        release_firmware(fw);
 
-       ret = request_firmware(&fw, cr4_firmware, dev->mt76.dev);
+       ret = request_firmware(&fw, MT7615_FIRMWARE_CR4, dev->mt76.dev);
        if (ret)
                return ret;
 
index cef3fd4..7963e30 100644 (file)
@@ -26,9 +26,9 @@
 #define MT7615_RX_RING_SIZE            1024
 #define MT7615_RX_MCU_RING_SIZE                512
 
-#define MT7615_FIRMWARE_CR4            "mt7615_cr4.bin"
-#define MT7615_FIRMWARE_N9             "mt7615_n9.bin"
-#define MT7615_ROM_PATCH               "mt7615_rom_patch.bin"
+#define MT7615_FIRMWARE_CR4            "mediatek/mt7615_cr4.bin"
+#define MT7615_FIRMWARE_N9             "mediatek/mt7615_n9.bin"
+#define MT7615_ROM_PATCH               "mediatek/mt7615_rom_patch.bin"
 
 #define MT7615_EEPROM_SIZE             1024
 #define MT7615_TOKEN_SIZE              4096
index 73c3104..cf611d1 100644 (file)
@@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        /* RG_SSUSB_CDR_BR_PE1D = 0x3 */
        mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
 
+       mt76_pci_disable_aspm(pdev);
+
        return 0;
 
 error:
diff --git a/drivers/net/wireless/mediatek/mt76/pci.c b/drivers/net/wireless/mediatek/mt76/pci.c
new file mode 100644 (file)
index 0000000..04c5a69
--- /dev/null
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: ISC
+/*
+ * Copyright (C) 2019 Lorenzo Bianconi <lorenzo@kernel.org>
+ */
+
+#include <linux/pci.h>
+
+void mt76_pci_disable_aspm(struct pci_dev *pdev)
+{
+       struct pci_dev *parent = pdev->bus->self;
+       u16 aspm_conf, parent_aspm_conf = 0;
+
+       pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
+       aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
+       if (parent) {
+               pcie_capability_read_word(parent, PCI_EXP_LNKCTL,
+                                         &parent_aspm_conf);
+               parent_aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
+       }
+
+       if (!aspm_conf && (!parent || !parent_aspm_conf)) {
+               /* aspm already disabled */
+               return;
+       }
+
+       dev_info(&pdev->dev, "disabling ASPM %s %s\n",
+                (aspm_conf & PCI_EXP_LNKCTL_ASPM_L0S) ? "L0s" : "",
+                (aspm_conf & PCI_EXP_LNKCTL_ASPM_L1) ? "L1" : "");
+
+       if (IS_ENABLED(CONFIG_PCIEASPM)) {
+               int err;
+
+               err = pci_disable_link_state(pdev, aspm_conf);
+               if (!err)
+                       return;
+       }
+
+       /* both device and parent should have the same ASPM setting.
+        * disable ASPM in downstream component first and then upstream.
+        */
+       pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, aspm_conf);
+       if (parent)
+               pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
+                                          aspm_conf);
+}
+EXPORT_SYMBOL_GPL(mt76_pci_disable_aspm);
index 858f8aa..f8a9244 100644 (file)
@@ -98,17 +98,17 @@ config RT2800PCI_RT53XX
        bool "rt2800pci - Include support for rt53xx devices (EXPERIMENTAL)"
        default y
        ---help---
-         This adds support for rt53xx wireless chipset family to the
-         rt2800pci driver.
-         Supported chips: RT5390
+        This adds support for rt53xx wireless chipset family to the
+        rt2800pci driver.
+        Supported chips: RT5390
 
 config RT2800PCI_RT3290
        bool "rt2800pci - Include support for rt3290 devices (EXPERIMENTAL)"
        default y
        ---help---
-         This adds support for rt3290 wireless chipset family to the
-         rt2800pci driver.
-         Supported chips: RT3290
+        This adds support for rt3290 wireless chipset family to the
+        rt2800pci driver.
+        Supported chips: RT3290
 endif
 
 config RT2500USB
@@ -176,16 +176,16 @@ config RT2800USB_RT3573
 config RT2800USB_RT53XX
        bool "rt2800usb - Include support for rt53xx devices (EXPERIMENTAL)"
        ---help---
-         This adds support for rt53xx wireless chipset family to the
-         rt2800usb driver.
-         Supported chips: RT5370
+        This adds support for rt53xx wireless chipset family to the
+        rt2800usb driver.
+        Supported chips: RT5370
 
 config RT2800USB_RT55XX
        bool "rt2800usb - Include support for rt55xx devices (EXPERIMENTAL)"
        ---help---
-         This adds support for rt55xx wireless chipset family to the
-         rt2800usb driver.
-         Supported chips: RT5572
+        This adds support for rt55xx wireless chipset family to the
+        rt2800usb driver.
+        Supported chips: RT5572
 
 config RT2800USB_UNKNOWN
        bool "rt2800usb - Include support for unknown (USB) devices"
index 2b216ed..a90a518 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/leds.h>
 #include <linux/mutex.h>
 #include <linux/etherdevice.h>
-#include <linux/input-polldev.h>
 #include <linux/kfifo.h>
 #include <linux/hrtimer.h>
 #include <linux/average.h>
index 4d4e388..f239530 100644 (file)
@@ -555,7 +555,7 @@ static ssize_t rt2x00debug_write_restart_hw(struct file *file,
 {
        struct rt2x00debug_intf *intf = file->private_data;
        struct rt2x00_dev *rt2x00dev = intf->rt2x00dev;
-       static unsigned long last_reset;
+       static unsigned long last_reset = INITIAL_JIFFIES;
 
        if (!rt2x00_has_cap_restart_hw(rt2x00dev))
                return -EOPNOTSUPP;
index 6087ec7..f88d265 100644 (file)
@@ -822,7 +822,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw)
                hdr = rtl_get_hdr(skb);
                fc = rtl_get_fc(skb);
 
-               if (!stats.crc && !stats.hwerror) {
+               if (!stats.crc && !stats.hwerror && (skb->len > FCS_LEN)) {
                        memcpy(IEEE80211_SKB_RXCB(skb), &rx_status,
                               sizeof(rx_status));
 
@@ -859,6 +859,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw)
                                _rtl_pci_rx_to_mac80211(hw, skb, rx_status);
                        }
                } else {
+                       /* drop packets with errors or those too short */
                        dev_kfree_skb_any(skb);
                }
 new_trx_end:
index 70f04c2..fff8dda 100644 (file)
@@ -754,6 +754,9 @@ static void rtl_p2p_noa_ie(struct ieee80211_hw *hw, void *data,
                                return;
                        } else {
                                noa_num = (noa_len - 2) / 13;
+                               if (noa_num > P2P_MAX_NOA_NUM)
+                                       noa_num = P2P_MAX_NOA_NUM;
+
                        }
                        noa_index = ie[3];
                        if (rtlpriv->psc.p2p_ps_info.p2p_ps_mode ==
@@ -848,6 +851,9 @@ static void rtl_p2p_action_ie(struct ieee80211_hw *hw, void *data,
                                return;
                        } else {
                                noa_num = (noa_len - 2) / 13;
+                               if (noa_num > P2P_MAX_NOA_NUM)
+                                       noa_num = P2P_MAX_NOA_NUM;
+
                        }
                        noa_index = ie[3];
                        if (rtlpriv->psc.p2p_ps_info.p2p_ps_mode ==
index fc14b37..b61b073 100644 (file)
@@ -707,9 +707,6 @@ int rtw_download_firmware(struct rtw_dev *rtwdev, struct rtw_fw_state *fw)
        rtwdev->h2c.last_box_num = 0;
        rtwdev->h2c.seq = 0;
 
-       rtw_fw_send_general_info(rtwdev);
-       rtw_fw_send_phydm_info(rtwdev);
-
        rtw_flag_set(rtwdev, RTW_FLAG_FW_RUNNING);
 
        return 0;
index fc8f621..6dd4577 100644 (file)
@@ -704,6 +704,10 @@ static int rtw_power_on(struct rtw_dev *rtwdev)
                goto err_off;
        }
 
+       /* send H2C after HCI has started */
+       rtw_fw_send_general_info(rtwdev);
+       rtw_fw_send_phydm_info(rtwdev);
+
        wifi_only = !rtwdev->efuse.btcoex;
        rtw_coex_power_on_setting(rtwdev);
        rtw_coex_init_hw_config(rtwdev, wifi_only);
index 3fdb52a..d90928b 100644 (file)
@@ -90,16 +90,13 @@ static inline void *rtw_pci_get_tx_desc(struct rtw_pci_tx_ring *tx_ring, u8 idx)
        return tx_ring->r.head + offset;
 }
 
-static void rtw_pci_free_tx_ring(struct rtw_dev *rtwdev,
-                                struct rtw_pci_tx_ring *tx_ring)
+static void rtw_pci_free_tx_ring_skbs(struct rtw_dev *rtwdev,
+                                     struct rtw_pci_tx_ring *tx_ring)
 {
        struct pci_dev *pdev = to_pci_dev(rtwdev->dev);
        struct rtw_pci_tx_data *tx_data;
        struct sk_buff *skb, *tmp;
        dma_addr_t dma;
-       u8 *head = tx_ring->r.head;
-       u32 len = tx_ring->r.len;
-       int ring_sz = len * tx_ring->r.desc_size;
 
        /* free every skb remained in tx list */
        skb_queue_walk_safe(&tx_ring->queue, skb, tmp) {
@@ -110,21 +107,30 @@ static void rtw_pci_free_tx_ring(struct rtw_dev *rtwdev,
                pci_unmap_single(pdev, dma, skb->len, PCI_DMA_TODEVICE);
                dev_kfree_skb_any(skb);
        }
+}
+
+static void rtw_pci_free_tx_ring(struct rtw_dev *rtwdev,
+                                struct rtw_pci_tx_ring *tx_ring)
+{
+       struct pci_dev *pdev = to_pci_dev(rtwdev->dev);
+       u8 *head = tx_ring->r.head;
+       u32 len = tx_ring->r.len;
+       int ring_sz = len * tx_ring->r.desc_size;
+
+       rtw_pci_free_tx_ring_skbs(rtwdev, tx_ring);
 
        /* free the ring itself */
        pci_free_consistent(pdev, ring_sz, head, tx_ring->r.dma);
        tx_ring->r.head = NULL;
 }
 
-static void rtw_pci_free_rx_ring(struct rtw_dev *rtwdev,
-                                struct rtw_pci_rx_ring *rx_ring)
+static void rtw_pci_free_rx_ring_skbs(struct rtw_dev *rtwdev,
+                                     struct rtw_pci_rx_ring *rx_ring)
 {
        struct pci_dev *pdev = to_pci_dev(rtwdev->dev);
        struct sk_buff *skb;
-       dma_addr_t dma;
-       u8 *head = rx_ring->r.head;
        int buf_sz = RTK_PCI_RX_BUF_SIZE;
-       int ring_sz = rx_ring->r.desc_size * rx_ring->r.len;
+       dma_addr_t dma;
        int i;
 
        for (i = 0; i < rx_ring->r.len; i++) {
@@ -137,6 +143,16 @@ static void rtw_pci_free_rx_ring(struct rtw_dev *rtwdev,
                dev_kfree_skb(skb);
                rx_ring->buf[i] = NULL;
        }
+}
+
+static void rtw_pci_free_rx_ring(struct rtw_dev *rtwdev,
+                                struct rtw_pci_rx_ring *rx_ring)
+{
+       struct pci_dev *pdev = to_pci_dev(rtwdev->dev);
+       u8 *head = rx_ring->r.head;
+       int ring_sz = rx_ring->r.desc_size * rx_ring->r.len;
+
+       rtw_pci_free_rx_ring_skbs(rtwdev, rx_ring);
 
        pci_free_consistent(pdev, ring_sz, head, rx_ring->r.dma);
 }
@@ -484,6 +500,17 @@ static void rtw_pci_dma_reset(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci)
        rtwpci->rx_tag = 0;
 }
 
+static void rtw_pci_dma_release(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci)
+{
+       struct rtw_pci_tx_ring *tx_ring;
+       u8 queue;
+
+       for (queue = 0; queue < RTK_MAX_TX_QUEUE_NUM; queue++) {
+               tx_ring = &rtwpci->tx_rings[queue];
+               rtw_pci_free_tx_ring_skbs(rtwdev, tx_ring);
+       }
+}
+
 static int rtw_pci_start(struct rtw_dev *rtwdev)
 {
        struct rtw_pci *rtwpci = (struct rtw_pci *)rtwdev->priv;
@@ -505,6 +532,7 @@ static void rtw_pci_stop(struct rtw_dev *rtwdev)
 
        spin_lock_irqsave(&rtwpci->irq_lock, flags);
        rtw_pci_disable_interrupt(rtwdev, rtwpci);
+       rtw_pci_dma_release(rtwdev, rtwpci);
        spin_unlock_irqrestore(&rtwpci->irq_lock, flags);
 }
 
index be92e12..7997cc6 100644 (file)
@@ -548,6 +548,7 @@ static int virt_wifi_newlink(struct net *src_net, struct net_device *dev,
        priv->is_connected = false;
        priv->is_up = false;
        INIT_DELAYED_WORK(&priv->connect, virt_wifi_connect_complete);
+       __module_get(THIS_MODULE);
 
        return 0;
 unregister_netdev:
@@ -578,6 +579,7 @@ static void virt_wifi_dellink(struct net_device *dev,
        netdev_upper_dev_unlink(priv->lowerdev, dev);
 
        unregister_netdevice_queue(dev, head);
+       module_put(THIS_MODULE);
 
        /* Deleting the wiphy is handled in the module destructor. */
 }
@@ -590,6 +592,42 @@ static struct rtnl_link_ops virt_wifi_link_ops = {
        .priv_size      = sizeof(struct virt_wifi_netdev_priv),
 };
 
+static bool netif_is_virt_wifi_dev(const struct net_device *dev)
+{
+       return rcu_access_pointer(dev->rx_handler) == virt_wifi_rx_handler;
+}
+
+static int virt_wifi_event(struct notifier_block *this, unsigned long event,
+                          void *ptr)
+{
+       struct net_device *lower_dev = netdev_notifier_info_to_dev(ptr);
+       struct virt_wifi_netdev_priv *priv;
+       struct net_device *upper_dev;
+       LIST_HEAD(list_kill);
+
+       if (!netif_is_virt_wifi_dev(lower_dev))
+               return NOTIFY_DONE;
+
+       switch (event) {
+       case NETDEV_UNREGISTER:
+               priv = rtnl_dereference(lower_dev->rx_handler_data);
+               if (!priv)
+                       return NOTIFY_DONE;
+
+               upper_dev = priv->upperdev;
+
+               upper_dev->rtnl_link_ops->dellink(upper_dev, &list_kill);
+               unregister_netdevice_many(&list_kill);
+               break;
+       }
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block virt_wifi_notifier = {
+       .notifier_call = virt_wifi_event,
+};
+
 /* Acquires and releases the rtnl lock. */
 static int __init virt_wifi_init_module(void)
 {
@@ -598,14 +636,25 @@ static int __init virt_wifi_init_module(void)
        /* Guaranteed to be locallly-administered and not multicast. */
        eth_random_addr(fake_router_bssid);
 
+       err = register_netdevice_notifier(&virt_wifi_notifier);
+       if (err)
+               return err;
+
+       err = -ENOMEM;
        common_wiphy = virt_wifi_make_wiphy();
        if (!common_wiphy)
-               return -ENOMEM;
+               goto notifier;
 
        err = rtnl_link_register(&virt_wifi_link_ops);
        if (err)
-               virt_wifi_destroy_wiphy(common_wiphy);
+               goto destroy_wiphy;
 
+       return 0;
+
+destroy_wiphy:
+       virt_wifi_destroy_wiphy(common_wiphy);
+notifier:
+       unregister_netdevice_notifier(&virt_wifi_notifier);
        return err;
 }
 
@@ -615,6 +664,7 @@ static void __exit virt_wifi_cleanup_module(void)
        /* Will delete any devices that depend on the wiphy. */
        rtnl_link_unregister(&virt_wifi_link_ops);
        virt_wifi_destroy_wiphy(common_wiphy);
+       unregister_netdevice_notifier(&virt_wifi_notifier);
 }
 
 module_init(virt_wifi_init_module);
index 4e44ea8..7b5c2fe 100644 (file)
@@ -1633,7 +1633,7 @@ static bool check_read_regs(struct zd_usb *usb, struct usb_req_read_regs *req,
         */
        if (rr->length < struct_size(regs, regs, count)) {
                dev_dbg_f(zd_usb_dev(usb),
-                        "error: actual length %d less than expected %ld\n",
+                        "error: actual length %d less than expected %zu\n",
                         rr->length, struct_size(regs, regs, count));
                return false;
        }
index 240f762..103ed00 100644 (file)
@@ -719,7 +719,6 @@ err_unmap:
        xenvif_unmap_frontend_data_rings(queue);
        netif_napi_del(&queue->napi);
 err:
-       module_put(THIS_MODULE);
        return err;
 }
 
index e14ec75..482c6c8 100644 (file)
@@ -887,9 +887,9 @@ static int xennet_set_skb_gso(struct sk_buff *skb,
        return 0;
 }
 
-static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
-                                 struct sk_buff *skb,
-                                 struct sk_buff_head *list)
+static int xennet_fill_frags(struct netfront_queue *queue,
+                            struct sk_buff *skb,
+                            struct sk_buff_head *list)
 {
        RING_IDX cons = queue->rx.rsp_cons;
        struct sk_buff *nskb;
@@ -908,7 +908,7 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
                if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
                        queue->rx.rsp_cons = ++cons + skb_queue_len(list);
                        kfree_skb(nskb);
-                       return ~0U;
+                       return -ENOENT;
                }
 
                skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
@@ -919,7 +919,9 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
                kfree_skb(nskb);
        }
 
-       return cons;
+       queue->rx.rsp_cons = cons;
+
+       return 0;
 }
 
 static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
@@ -1045,8 +1047,7 @@ err:
                skb->data_len = rx->status;
                skb->len += rx->status;
 
-               i = xennet_fill_frags(queue, skb, &tmpq);
-               if (unlikely(i == ~0U))
+               if (unlikely(xennet_fill_frags(queue, skb, &tmpq)))
                        goto err;
 
                if (rx->flags & XEN_NETRXF_csum_blank)
@@ -1056,7 +1057,7 @@ err:
 
                __skb_queue_tail(&rxq, skb);
 
-               queue->rx.rsp_cons = ++i;
+               i = ++queue->rx.rsp_cons;
                work_done++;
        }
 
index c5289ea..e897e4d 100644 (file)
@@ -547,18 +547,25 @@ static int pn533_usb_probe(struct usb_interface *interface,
 
        rc = pn533_finalize_setup(priv);
        if (rc)
-               goto error;
+               goto err_deregister;
 
        usb_set_intfdata(interface, phy);
 
        return 0;
 
+err_deregister:
+       pn533_unregister_device(phy->priv);
 error:
+       usb_kill_urb(phy->in_urb);
+       usb_kill_urb(phy->out_urb);
+       usb_kill_urb(phy->ack_urb);
+
        usb_free_urb(phy->in_urb);
        usb_free_urb(phy->out_urb);
        usb_free_urb(phy->ack_urb);
        usb_put_dev(phy->udev);
        kfree(in_buf);
+       kfree(phy->ack_buffer);
 
        return rc;
 }
index 7eda62a..9642971 100644 (file)
@@ -661,7 +661,7 @@ static int st95hf_error_handling(struct st95hf_context *stcontext,
                        result = -ETIMEDOUT;
                else
                        result = -EIO;
-       return  result;
+               return result;
        }
 
        /* Check for CRC err only if CRC is present in the tag response */
index 2859cc9..156c2a1 100644 (file)
@@ -78,7 +78,7 @@ static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
        if (idx < 0 || idx > ndev->mw_count)
                return -EINVAL;
 
-       return 1 << idx;
+       return ndev->dev_data->mw_idx << idx;
 }
 
 static int amd_ntb_mw_count(struct ntb_dev *ntb, int pidx)
@@ -909,7 +909,7 @@ static int amd_init_ntb(struct amd_ntb_dev *ndev)
 {
        void __iomem *mmio = ndev->self_mmio;
 
-       ndev->mw_count = AMD_MW_CNT;
+       ndev->mw_count = ndev->dev_data->mw_count;
        ndev->spad_count = AMD_SPADS_CNT;
        ndev->db_count = AMD_DB_CNT;
 
@@ -1069,6 +1069,8 @@ static int amd_ntb_pci_probe(struct pci_dev *pdev,
                goto err_ndev;
        }
 
+       ndev->dev_data = (struct ntb_dev_data *)id->driver_data;
+
        ndev_init_struct(ndev, pdev);
 
        rc = amd_ntb_init_pci(ndev, pdev);
@@ -1123,9 +1125,21 @@ static const struct file_operations amd_ntb_debugfs_info = {
        .read = ndev_debugfs_read,
 };
 
+static const struct ntb_dev_data dev_data[] = {
+       { /* for device 145b */
+               .mw_count = 3,
+               .mw_idx = 1,
+       },
+       { /* for device 148b */
+               .mw_count = 2,
+               .mw_idx = 2,
+       },
+};
+
 static const struct pci_device_id amd_ntb_pci_tbl[] = {
-       {PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
-       {0}
+       { PCI_VDEVICE(AMD, 0x145b), (kernel_ulong_t)&dev_data[0] },
+       { PCI_VDEVICE(AMD, 0x148b), (kernel_ulong_t)&dev_data[1] },
+       { 0, }
 };
 MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
 
index 8f3617a..139a307 100644 (file)
@@ -52,7 +52,6 @@
 #include <linux/ntb.h>
 #include <linux/pci.h>
 
-#define PCI_DEVICE_ID_AMD_NTB  0x145B
 #define AMD_LINK_HB_TIMEOUT    msecs_to_jiffies(1000)
 #define AMD_LINK_STATUS_OFFSET 0x68
 #define NTB_LIN_STA_ACTIVE_BIT 0x00000002
@@ -93,7 +92,6 @@ static inline void _write64(u64 val, void __iomem *mmio)
 
 enum {
        /* AMD NTB Capability */
-       AMD_MW_CNT              = 3,
        AMD_DB_CNT              = 16,
        AMD_MSIX_VECTOR_CNT     = 24,
        AMD_SPADS_CNT           = 16,
@@ -170,6 +168,11 @@ enum {
        AMD_PEER_OFFSET         = 0x400,
 };
 
+struct ntb_dev_data {
+       const unsigned char mw_count;
+       const unsigned int mw_idx;
+};
+
 struct amd_ntb_dev;
 
 struct amd_ntb_vec {
@@ -185,6 +188,7 @@ struct amd_ntb_dev {
        u32 cntl_sta;
        u32 peer_sta;
 
+       struct ntb_dev_data *dev_data;
        unsigned char mw_count;
        unsigned char spad_count;
        unsigned char db_count;
index bfc7cac..c79b54c 100644 (file)
@@ -4,11 +4,11 @@ config NTB_IDT
        depends on PCI
        select HWMON
        help
-        This driver supports NTB of cappable IDT PCIe-switches.
+        This driver supports NTB of capable IDT PCIe-switches.
 
         Some of the pre-initializations must be made before IDT PCIe-switch
-        exposes it NT-functions correctly. It should be done by either proper
-        initialisation of EEPROM connected to master smbus of the switch or
+        exposes its NT-functions correctly. It should be done by either proper
+        initialization of EEPROM connected to master SMbus of the switch or
         by BIOS using slave-SMBus interface changing corresponding registers
         value. Evidently it must be done before PCI bus enumeration is
         finished in Linux kernel.
index f495945..86ffa71 100644 (file)
@@ -306,7 +306,7 @@ static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
        if (rc)
                return rc;
 
-       if (addr == 0 || size == 0) {
+       if (size == 0) {
                if (widx < nr_direct_mw)
                        switchtec_ntb_mw_clr_direct(sndev, widx);
                else
index 40c90ca..00a5d57 100644 (file)
@@ -292,7 +292,7 @@ static int ntb_transport_bus_match(struct device *dev,
 static int ntb_transport_bus_probe(struct device *dev)
 {
        const struct ntb_transport_client *client;
-       int rc = -EINVAL;
+       int rc;
 
        get_device(dev);
 
index d028331..e9b7c2d 100644 (file)
@@ -1378,7 +1378,7 @@ static int perf_setup_peer_mw(struct perf_peer *peer)
        int ret;
 
        /* Get outbound MW parameters and map it */
-       ret = ntb_peer_mw_get_addr(perf->ntb, peer->gidx, &phys_addr,
+       ret = ntb_peer_mw_get_addr(perf->ntb, perf->gidx, &phys_addr,
                                   &peer->outbuf_size);
        if (ret)
                return ret;
index a8d5688..3e9f45a 100644 (file)
@@ -392,9 +392,9 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
        arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
        if (++(arena->freelist[lane].seq) == 4)
                arena->freelist[lane].seq = 1;
-       if (ent_e_flag(ent->old_map))
+       if (ent_e_flag(le32_to_cpu(ent->old_map)))
                arena->freelist[lane].has_err = 1;
-       arena->freelist[lane].block = le32_to_cpu(ent_lba(ent->old_map));
+       arena->freelist[lane].block = ent_lba(le32_to_cpu(ent->old_map));
 
        return ret;
 }
@@ -560,8 +560,8 @@ static int btt_freelist_init(struct arena_info *arena)
                 * FIXME: if error clearing fails during init, we want to make
                 * the BTT read-only
                 */
-               if (ent_e_flag(log_new.old_map) &&
-                               !ent_normal(log_new.old_map)) {
+               if (ent_e_flag(le32_to_cpu(log_new.old_map)) &&
+                   !ent_normal(le32_to_cpu(log_new.old_map))) {
                        arena->freelist[i].has_err = 1;
                        ret = arena_clear_freelist_error(arena, i);
                        if (ret)
index 75a58a6..d47412d 100644 (file)
@@ -180,7 +180,7 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data)
        sector_t sector;
 
        /* make sure device is a region */
-       if (!is_nd_pmem(dev))
+       if (!is_memory(dev))
                return 0;
 
        nd_region = to_nd_region(dev);
index 4340132..cca0a3b 100644 (file)
@@ -1987,7 +1987,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
                nd_mapping = &nd_region->mapping[i];
                label_ent = list_first_entry_or_null(&nd_mapping->labels,
                                typeof(*label_ent), list);
-               label0 = label_ent ? label_ent->label : 0;
+               label0 = label_ent ? label_ent->label : NULL;
 
                if (!label0) {
                        WARN_ON(1);
@@ -2322,8 +2322,9 @@ static struct device **scan_labels(struct nd_region *nd_region)
                        continue;
 
                /* skip labels that describe extents outside of the region */
-               if (nd_label->dpa < nd_mapping->start || nd_label->dpa > map_end)
-                       continue;
+               if (__le64_to_cpu(nd_label->dpa) < nd_mapping->start ||
+                   __le64_to_cpu(nd_label->dpa) > map_end)
+                               continue;
 
                i = add_namespace_resource(nd_region, nd_label, devs, count);
                if (i < 0)
index e89af4b..ee5c040 100644 (file)
@@ -289,11 +289,7 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region)
 struct nd_pfn *to_nd_pfn(struct device *dev);
 #if IS_ENABLED(CONFIG_NVDIMM_PFN)
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PFN_DEFAULT_ALIGNMENT HPAGE_PMD_SIZE
-#else
-#define PFN_DEFAULT_ALIGNMENT PAGE_SIZE
-#endif
+#define MAX_NVDIMM_ALIGN       4
 
 int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns);
 bool is_nd_pfn(struct device *dev);
index bb9cc5c..60d81fa 100644 (file)
@@ -103,39 +103,42 @@ static ssize_t align_show(struct device *dev,
        return sprintf(buf, "%ld\n", nd_pfn->align);
 }
 
-static const unsigned long *nd_pfn_supported_alignments(void)
+static unsigned long *nd_pfn_supported_alignments(unsigned long *alignments)
 {
-       /*
-        * This needs to be a non-static variable because the *_SIZE
-        * macros aren't always constants.
-        */
-       const unsigned long supported_alignments[] = {
-               PAGE_SIZE,
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-               HPAGE_PMD_SIZE,
-#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-               HPAGE_PUD_SIZE,
-#endif
-#endif
-               0,
-       };
-       static unsigned long data[ARRAY_SIZE(supported_alignments)];
 
-       memcpy(data, supported_alignments, sizeof(data));
+       alignments[0] = PAGE_SIZE;
+
+       if (has_transparent_hugepage()) {
+               alignments[1] = HPAGE_PMD_SIZE;
+               if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
+                       alignments[2] = HPAGE_PUD_SIZE;
+       }
+
+       return alignments;
+}
+
+/*
+ * Use pmd mapping if supported as default alignment
+ */
+static unsigned long nd_pfn_default_alignment(void)
+{
 
-       return data;
+       if (has_transparent_hugepage())
+               return HPAGE_PMD_SIZE;
+       return PAGE_SIZE;
 }
 
 static ssize_t align_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t len)
 {
        struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
+       unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, };
        ssize_t rc;
 
        nd_device_lock(dev);
        nvdimm_bus_lock(dev);
        rc = nd_size_select_store(dev, buf, &nd_pfn->align,
-                       nd_pfn_supported_alignments());
+                       nd_pfn_supported_alignments(aligns));
        dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
                        buf[len - 1] == '\n' ? "" : "\n");
        nvdimm_bus_unlock(dev);
@@ -259,7 +262,10 @@ static DEVICE_ATTR_RO(size);
 static ssize_t supported_alignments_show(struct device *dev,
                struct device_attribute *attr, char *buf)
 {
-       return nd_size_select_show(0, nd_pfn_supported_alignments(), buf);
+       unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, };
+
+       return nd_size_select_show(0,
+                       nd_pfn_supported_alignments(aligns), buf);
 }
 static DEVICE_ATTR_RO(supported_alignments);
 
@@ -302,7 +308,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
                return NULL;
 
        nd_pfn->mode = PFN_MODE_NONE;
-       nd_pfn->align = PFN_DEFAULT_ALIGNMENT;
+       nd_pfn->align = nd_pfn_default_alignment();
        dev = &nd_pfn->dev;
        device_initialize(&nd_pfn->dev);
        if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
@@ -412,6 +418,21 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn)
        return 0;
 }
 
+static bool nd_supported_alignment(unsigned long align)
+{
+       int i;
+       unsigned long supported[MAX_NVDIMM_ALIGN] = { [0] = 0, };
+
+       if (align == 0)
+               return false;
+
+       nd_pfn_supported_alignments(supported);
+       for (i = 0; supported[i]; i++)
+               if (align == supported[i])
+                       return true;
+       return false;
+}
+
 /**
  * nd_pfn_validate - read and validate info-block
  * @nd_pfn: fsdax namespace runtime state / properties
@@ -496,6 +517,18 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
                return -EOPNOTSUPP;
        }
 
+       /*
+        * Check whether the we support the alignment. For Dax if the
+        * superblock alignment is not matching, we won't initialize
+        * the device.
+        */
+       if (!nd_supported_alignment(align) &&
+                       !memcmp(pfn_sb->signature, DAX_SIG, PFN_SIG_LEN)) {
+               dev_err(&nd_pfn->dev, "init failed, alignment mismatch: "
+                               "%ld:%ld\n", nd_pfn->align, align);
+               return -EOPNOTSUPP;
+       }
+
        if (!nd_pfn->uuid) {
                /*
                 * When probing a namepace via nd_pfn_probe() the uuid
@@ -639,9 +672,11 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
        struct nd_namespace_common *ndns = nd_pfn->ndns;
        struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
        resource_size_t base = nsio->res.start + start_pad;
+       resource_size_t end = nsio->res.end - end_trunc;
        struct vmem_altmap __altmap = {
                .base_pfn = init_altmap_base(base),
                .reserve = init_altmap_reserve(base),
+               .end_pfn = PHYS_PFN(end),
        };
 
        memcpy(res, &nsio->res, sizeof(*res));
index 37bf871..0f6978e 100644 (file)
@@ -34,7 +34,7 @@ static int nd_region_probe(struct device *dev)
        if (rc)
                return rc;
 
-       if (is_nd_pmem(&nd_region->dev)) {
+       if (is_memory(&nd_region->dev)) {
                struct resource ndr_res;
 
                if (devm_init_badblocks(dev, &nd_region->bb))
@@ -123,7 +123,7 @@ static void nd_region_notify(struct device *dev, enum nvdimm_event event)
                struct nd_region *nd_region = to_nd_region(dev);
                struct resource res;
 
-               if (is_nd_pmem(&nd_region->dev)) {
+               if (is_memory(&nd_region->dev)) {
                        res.start = nd_region->ndr_start;
                        res.end = nd_region->ndr_start +
                                nd_region->ndr_size - 1;
index 3fd6b59..ef423ba 100644 (file)
@@ -632,11 +632,11 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
        if (!is_memory(dev) && a == &dev_attr_dax_seed.attr)
                return 0;
 
-       if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr)
+       if (!is_memory(dev) && a == &dev_attr_badblocks.attr)
                return 0;
 
        if (a == &dev_attr_resource.attr) {
-               if (is_nd_pmem(dev))
+               if (is_memory(dev))
                        return 0400;
                else
                        return 0;
@@ -1168,6 +1168,9 @@ EXPORT_SYMBOL_GPL(nvdimm_has_cache);
 
 bool is_nvdimm_sync(struct nd_region *nd_region)
 {
+       if (is_nd_volatile(&nd_region->dev))
+               return true;
+
        return is_nd_pmem(&nd_region->dev) &&
                !test_bit(ND_REGION_ASYNC, &nd_region->flags);
 }
index 9e45b20..89b8597 100644 (file)
@@ -177,6 +177,10 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
                        || !nvdimm->sec.flags)
                return -EIO;
 
+       /* No need to go further if security is disabled */
+       if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
+               return 0;
+
        if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
                dev_dbg(dev, "Security operation in progress.\n");
                return -EBUSY;
index 1ede176..fa7ba09 100644 (file)
@@ -102,10 +102,13 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
         */
        if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
                return;
-       revalidate_disk(ns->disk);
        blk_set_queue_dying(ns->queue);
        /* Forcibly unquiesce queues to avoid blocking dispatch */
        blk_mq_unquiesce_queue(ns->queue);
+       /*
+        * Revalidate after unblocking dispatchers that may be holding bd_butex
+        */
+       revalidate_disk(ns->disk);
 }
 
 static void nvme_queue_scan(struct nvme_ctrl *ctrl)
@@ -113,10 +116,26 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl)
        /*
         * Only new queue scan work when admin and IO queues are both alive
         */
-       if (ctrl->state == NVME_CTRL_LIVE)
+       if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
                queue_work(nvme_wq, &ctrl->scan_work);
 }
 
+/*
+ * Use this function to proceed with scheduling reset_work for a controller
+ * that had previously been set to the resetting state. This is intended for
+ * code paths that can't be interrupted by other reset attempts. A hot removal
+ * may prevent this from succeeding.
+ */
+int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
+{
+       if (ctrl->state != NVME_CTRL_RESETTING)
+               return -EBUSY;
+       if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
+               return -EBUSY;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
+
 int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
 {
        if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@ -134,8 +153,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
        ret = nvme_reset_ctrl(ctrl);
        if (!ret) {
                flush_work(&ctrl->reset_work);
-               if (ctrl->state != NVME_CTRL_LIVE &&
-                   ctrl->state != NVME_CTRL_ADMIN_ONLY)
+               if (ctrl->state != NVME_CTRL_LIVE)
                        ret = -ENETRESET;
        }
 
@@ -312,15 +330,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 
        old_state = ctrl->state;
        switch (new_state) {
-       case NVME_CTRL_ADMIN_ONLY:
-               switch (old_state) {
-               case NVME_CTRL_CONNECTING:
-                       changed = true;
-                       /* FALLTHRU */
-               default:
-                       break;
-               }
-               break;
        case NVME_CTRL_LIVE:
                switch (old_state) {
                case NVME_CTRL_NEW:
@@ -336,7 +345,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
                switch (old_state) {
                case NVME_CTRL_NEW:
                case NVME_CTRL_LIVE:
-               case NVME_CTRL_ADMIN_ONLY:
                        changed = true;
                        /* FALLTHRU */
                default:
@@ -356,7 +364,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
        case NVME_CTRL_DELETING:
                switch (old_state) {
                case NVME_CTRL_LIVE:
-               case NVME_CTRL_ADMIN_ONLY:
                case NVME_CTRL_RESETTING:
                case NVME_CTRL_CONNECTING:
                        changed = true;
@@ -378,8 +385,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
                break;
        }
 
-       if (changed)
+       if (changed) {
                ctrl->state = new_state;
+               wake_up_all(&ctrl->state_wq);
+       }
 
        spin_unlock_irqrestore(&ctrl->lock, flags);
        if (changed && ctrl->state == NVME_CTRL_LIVE)
@@ -388,6 +397,39 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 }
 EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
 
+/*
+ * Returns true for sink states that can't ever transition back to live.
+ */
+static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
+{
+       switch (ctrl->state) {
+       case NVME_CTRL_NEW:
+       case NVME_CTRL_LIVE:
+       case NVME_CTRL_RESETTING:
+       case NVME_CTRL_CONNECTING:
+               return false;
+       case NVME_CTRL_DELETING:
+       case NVME_CTRL_DEAD:
+               return true;
+       default:
+               WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
+               return true;
+       }
+}
+
+/*
+ * Waits for the controller state to be resetting, or returns false if it is
+ * not possible to ever transition to that state.
+ */
+bool nvme_wait_reset(struct nvme_ctrl *ctrl)
+{
+       wait_event(ctrl->state_wq,
+                  nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
+                  nvme_state_terminal(ctrl));
+       return ctrl->state == NVME_CTRL_RESETTING;
+}
+EXPORT_SYMBOL_GPL(nvme_wait_reset);
+
 static void nvme_free_ns_head(struct kref *ref)
 {
        struct nvme_ns_head *head =
@@ -666,8 +708,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
                        if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
                                return BLK_STS_NOTSUPP;
                        control |= NVME_RW_PRINFO_PRACT;
-               } else if (req_op(req) == REQ_OP_WRITE) {
-                       t10_pi_prepare(req, ns->pi_type);
                }
 
                switch (ns->pi_type) {
@@ -690,13 +730,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 
 void nvme_cleanup_cmd(struct request *req)
 {
-       if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
-           nvme_req(req)->status == 0) {
-               struct nvme_ns *ns = req->rq_disk->private_data;
-
-               t10_pi_complete(req, ns->pi_type,
-                               blk_rq_bytes(req) >> ns->lba_shift);
-       }
        if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
                struct nvme_ns *ns = req->rq_disk->private_data;
                struct page *page = req->special_vec.bv_page;
@@ -856,7 +889,7 @@ out:
 static int nvme_submit_user_cmd(struct request_queue *q,
                struct nvme_command *cmd, void __user *ubuffer,
                unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
-               u32 meta_seed, u32 *result, unsigned timeout)
+               u32 meta_seed, u64 *result, unsigned timeout)
 {
        bool write = nvme_is_write(cmd);
        struct nvme_ns *ns = q->queuedata;
@@ -897,7 +930,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
        else
                ret = nvme_req(req)->status;
        if (result)
-               *result = le32_to_cpu(nvme_req(req)->result.u32);
+               *result = le64_to_cpu(nvme_req(req)->result.u64);
        if (meta && !ret && !write) {
                if (copy_to_user(meta_buffer, meta, meta_len))
                        ret = -EFAULT;
@@ -1312,8 +1345,6 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl)
                if (ns->disk && nvme_revalidate_disk(ns->disk))
                        nvme_set_queue_dying(ns);
        up_read(&ctrl->namespaces_rwsem);
-
-       nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
 }
 
 static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
@@ -1329,6 +1360,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
                nvme_unfreeze(ctrl);
                nvme_mpath_unfreeze(ctrl->subsys);
                mutex_unlock(&ctrl->subsys->lock);
+               nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
                mutex_unlock(&ctrl->scan_lock);
        }
        if (effects & NVME_CMD_EFFECTS_CCC)
@@ -1344,6 +1376,54 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
        struct nvme_command c;
        unsigned timeout = 0;
        u32 effects;
+       u64 result;
+       int status;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+       if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
+               return -EFAULT;
+       if (cmd.flags)
+               return -EINVAL;
+
+       memset(&c, 0, sizeof(c));
+       c.common.opcode = cmd.opcode;
+       c.common.flags = cmd.flags;
+       c.common.nsid = cpu_to_le32(cmd.nsid);
+       c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
+       c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
+       c.common.cdw10 = cpu_to_le32(cmd.cdw10);
+       c.common.cdw11 = cpu_to_le32(cmd.cdw11);
+       c.common.cdw12 = cpu_to_le32(cmd.cdw12);
+       c.common.cdw13 = cpu_to_le32(cmd.cdw13);
+       c.common.cdw14 = cpu_to_le32(cmd.cdw14);
+       c.common.cdw15 = cpu_to_le32(cmd.cdw15);
+
+       if (cmd.timeout_ms)
+               timeout = msecs_to_jiffies(cmd.timeout_ms);
+
+       effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
+       status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
+                       (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
+                       (void __user *)(uintptr_t)cmd.metadata,
+                       cmd.metadata_len, 0, &result, timeout);
+       nvme_passthru_end(ctrl, effects);
+
+       if (status >= 0) {
+               if (put_user(result, &ucmd->result))
+                       return -EFAULT;
+       }
+
+       return status;
+}
+
+static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+                       struct nvme_passthru_cmd64 __user *ucmd)
+{
+       struct nvme_passthru_cmd64 cmd;
+       struct nvme_command c;
+       unsigned timeout = 0;
+       u32 effects;
        int status;
 
        if (!capable(CAP_SYS_ADMIN))
@@ -1414,6 +1494,41 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
                srcu_read_unlock(&head->srcu, idx);
 }
 
+static bool is_ctrl_ioctl(unsigned int cmd)
+{
+       if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
+               return true;
+       if (is_sed_ioctl(cmd))
+               return true;
+       return false;
+}
+
+static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
+                                 void __user *argp,
+                                 struct nvme_ns_head *head,
+                                 int srcu_idx)
+{
+       struct nvme_ctrl *ctrl = ns->ctrl;
+       int ret;
+
+       nvme_get_ctrl(ns->ctrl);
+       nvme_put_ns_from_disk(head, srcu_idx);
+
+       switch (cmd) {
+       case NVME_IOCTL_ADMIN_CMD:
+               ret = nvme_user_cmd(ctrl, NULL, argp);
+               break;
+       case NVME_IOCTL_ADMIN64_CMD:
+               ret = nvme_user_cmd64(ctrl, NULL, argp);
+               break;
+       default:
+               ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
+               break;
+       }
+       nvme_put_ctrl(ctrl);
+       return ret;
+}
+
 static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
                unsigned int cmd, unsigned long arg)
 {
@@ -1431,20 +1546,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
         * seperately and drop the ns SRCU reference early.  This avoids a
         * deadlock when deleting namespaces using the passthrough interface.
         */
-       if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) {
-               struct nvme_ctrl *ctrl = ns->ctrl;
-
-               nvme_get_ctrl(ns->ctrl);
-               nvme_put_ns_from_disk(head, srcu_idx);
-
-               if (cmd == NVME_IOCTL_ADMIN_CMD)
-                       ret = nvme_user_cmd(ctrl, NULL, argp);
-               else
-                       ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
-
-               nvme_put_ctrl(ctrl);
-               return ret;
-       }
+       if (is_ctrl_ioctl(cmd))
+               return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
 
        switch (cmd) {
        case NVME_IOCTL_ID:
@@ -1457,6 +1560,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
        case NVME_IOCTL_SUBMIT_IO:
                ret = nvme_submit_io(ns, argp);
                break;
+       case NVME_IOCTL_IO64_CMD:
+               ret = nvme_user_cmd64(ns->ctrl, ns, argp);
+               break;
        default:
                if (ns->ndev)
                        ret = nvme_nvm_ioctl(ns, cmd, arg);
@@ -2298,6 +2404,16 @@ static const struct nvme_core_quirk_entry core_quirks[] = {
                .vid = 0x14a4,
                .fr = "22301111",
                .quirks = NVME_QUIRK_SIMPLE_SUSPEND,
+       },
+       {
+               /*
+                * This Kingston E8FK11.T firmware version has no interrupt
+                * after resume with actions related to suspend to idle
+                * https://bugzilla.kernel.org/show_bug.cgi?id=204887
+                */
+               .vid = 0x2646,
+               .fr = "E8FK11.T",
+               .quirks = NVME_QUIRK_SIMPLE_SUSPEND,
        }
 };
 
@@ -2549,8 +2665,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
                list_add_tail(&subsys->entry, &nvme_subsystems);
        }
 
-       if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
-                       dev_name(ctrl->device))) {
+       ret = sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
+                               dev_name(ctrl->device));
+       if (ret) {
                dev_err(ctrl->device,
                        "failed to create sysfs link from subsystem.\n");
                goto out_put_subsystem;
@@ -2795,7 +2912,6 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
 
        switch (ctrl->state) {
        case NVME_CTRL_LIVE:
-       case NVME_CTRL_ADMIN_ONLY:
                break;
        default:
                return -EWOULDBLOCK;
@@ -2847,6 +2963,8 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
        switch (cmd) {
        case NVME_IOCTL_ADMIN_CMD:
                return nvme_user_cmd(ctrl, NULL, argp);
+       case NVME_IOCTL_ADMIN64_CMD:
+               return nvme_user_cmd64(ctrl, NULL, argp);
        case NVME_IOCTL_IO_CMD:
                return nvme_dev_user_cmd(ctrl, argp);
        case NVME_IOCTL_RESET:
@@ -3054,6 +3172,8 @@ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
 
 nvme_show_int_function(cntlid);
 nvme_show_int_function(numa_node);
+nvme_show_int_function(queue_count);
+nvme_show_int_function(sqsize);
 
 static ssize_t nvme_sysfs_delete(struct device *dev,
                                struct device_attribute *attr, const char *buf,
@@ -3085,7 +3205,6 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
        static const char *const state_name[] = {
                [NVME_CTRL_NEW]         = "new",
                [NVME_CTRL_LIVE]        = "live",
-               [NVME_CTRL_ADMIN_ONLY]  = "only-admin",
                [NVME_CTRL_RESETTING]   = "resetting",
                [NVME_CTRL_CONNECTING]  = "connecting",
                [NVME_CTRL_DELETING]    = "deleting",
@@ -3134,6 +3253,8 @@ static struct attribute *nvme_dev_attrs[] = {
        &dev_attr_address.attr,
        &dev_attr_state.attr,
        &dev_attr_numa_node.attr,
+       &dev_attr_queue_count.attr,
+       &dev_attr_sqsize.attr,
        NULL
 };
 
@@ -3594,11 +3715,10 @@ static void nvme_scan_work(struct work_struct *work)
        struct nvme_id_ctrl *id;
        unsigned nn;
 
-       if (ctrl->state != NVME_CTRL_LIVE)
+       /* No tagset on a live ctrl means IO queues could not created */
+       if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
                return;
 
-       WARN_ON_ONCE(!ctrl->tagset);
-
        if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
                dev_info(ctrl->device, "rescanning namespaces.\n");
                nvme_clear_changed_ns_log(ctrl);
@@ -3759,13 +3879,13 @@ static void nvme_fw_act_work(struct work_struct *work)
                if (time_after(jiffies, fw_act_timeout)) {
                        dev_warn(ctrl->device,
                                "Fw activation timeout, reset controller\n");
-                       nvme_reset_ctrl(ctrl);
-                       break;
+                       nvme_try_sched_reset(ctrl);
+                       return;
                }
                msleep(100);
        }
 
-       if (ctrl->state != NVME_CTRL_LIVE)
+       if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
                return;
 
        nvme_start_queues(ctrl);
@@ -3785,7 +3905,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
                nvme_queue_scan(ctrl);
                break;
        case NVME_AER_NOTICE_FW_ACT_STARTING:
-               queue_work(nvme_wq, &ctrl->fw_act_work);
+               /*
+                * We are (ab)using the RESETTING state to prevent subsequent
+                * recovery actions from interfering with the controller's
+                * firmware activation.
+                */
+               if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+                       queue_work(nvme_wq, &ctrl->fw_act_work);
                break;
 #ifdef CONFIG_NVME_MULTIPATH
        case NVME_AER_NOTICE_ANA:
@@ -3908,6 +4034,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
        INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
        INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
        INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
+       init_waitqueue_head(&ctrl->state_wq);
 
        INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
        memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
index 93f08d7..a0ec40a 100644 (file)
@@ -182,8 +182,7 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
 static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
                bool queue_live)
 {
-       if (likely(ctrl->state == NVME_CTRL_LIVE ||
-                  ctrl->state == NVME_CTRL_ADMIN_ONLY))
+       if (likely(ctrl->state == NVME_CTRL_LIVE))
                return true;
        return __nvmf_check_ready(ctrl, rq, queue_live);
 }
index 30de7ef..fc99a40 100644 (file)
@@ -522,14 +522,13 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
        return 0;
 }
 
-static int nvme_read_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
+static int nvme_read_ana_log(struct nvme_ctrl *ctrl)
 {
        u32 nr_change_groups = 0;
        int error;
 
        mutex_lock(&ctrl->ana_lock);
-       error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA,
-                       groups_only ? NVME_ANA_LOG_RGO : 0,
+       error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA, 0,
                        ctrl->ana_log_buf, ctrl->ana_log_size, 0);
        if (error) {
                dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error);
@@ -565,7 +564,7 @@ static void nvme_ana_work(struct work_struct *work)
 {
        struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work);
 
-       nvme_read_ana_log(ctrl, false);
+       nvme_read_ana_log(ctrl);
 }
 
 static void nvme_anatt_timeout(struct timer_list *t)
@@ -715,7 +714,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
                goto out;
        }
 
-       error = nvme_read_ana_log(ctrl, true);
+       error = nvme_read_ana_log(ctrl);
        if (error)
                goto out_free_ana_log_buf;
        return 0;
index b5013c1..22e8401 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/sed-opal.h>
 #include <linux/fault-inject.h>
 #include <linux/rcupdate.h>
+#include <linux/wait.h>
 
 #include <trace/events/block.h>
 
@@ -161,7 +162,6 @@ static inline u16 nvme_req_qid(struct request *req)
 enum nvme_ctrl_state {
        NVME_CTRL_NEW,
        NVME_CTRL_LIVE,
-       NVME_CTRL_ADMIN_ONLY,    /* Only admin queue live */
        NVME_CTRL_RESETTING,
        NVME_CTRL_CONNECTING,
        NVME_CTRL_DELETING,
@@ -199,6 +199,7 @@ struct nvme_ctrl {
        struct cdev cdev;
        struct work_struct reset_work;
        struct work_struct delete_work;
+       wait_queue_head_t state_wq;
 
        struct nvme_subsystem *subsys;
        struct list_head subsys_entry;
@@ -221,6 +222,7 @@ struct nvme_ctrl {
        u16 oacs;
        u16 nssa;
        u16 nr_streams;
+       u16 sqsize;
        u32 max_namespaces;
        atomic_t abort_limit;
        u8 vwc;
@@ -269,7 +271,6 @@ struct nvme_ctrl {
        u16 hmmaxd;
 
        /* Fabrics only */
-       u16 sqsize;
        u32 ioccsz;
        u32 iorcsz;
        u16 icdoff;
@@ -449,6 +450,7 @@ void nvme_complete_rq(struct request *req);
 bool nvme_cancel_request(struct request *req, void *data, bool reserved);
 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
                enum nvme_ctrl_state new_state);
+bool nvme_wait_reset(struct nvme_ctrl *ctrl);
 int nvme_disable_ctrl(struct nvme_ctrl *ctrl);
 int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
@@ -499,6 +501,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
 void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
 int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
 int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
+int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
 int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
 
 int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
index 6b4d7b0..869f462 100644 (file)
@@ -549,8 +549,10 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 
        WARN_ON_ONCE(!iod->nents);
 
-       /* P2PDMA requests do not need to be unmapped */
-       if (!is_pci_p2pdma_page(sg_page(iod->sg)))
+       if (is_pci_p2pdma_page(sg_page(iod->sg)))
+               pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents,
+                                   rq_dma_dir(req));
+       else
                dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req));
 
 
@@ -771,7 +773,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
                struct bio_vec *bv)
 {
        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-       unsigned int first_prp_len = dev->ctrl.page_size - bv->bv_offset;
+       unsigned int offset = bv->bv_offset & (dev->ctrl.page_size - 1);
+       unsigned int first_prp_len = dev->ctrl.page_size - offset;
 
        iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0);
        if (dma_mapping_error(dev->dev, iod->first_dma))
@@ -834,8 +837,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
                goto out;
 
        if (is_pci_p2pdma_page(sg_page(iod->sg)))
-               nr_mapped = pci_p2pdma_map_sg(dev->dev, iod->sg, iod->nents,
-                                             rq_dma_dir(req));
+               nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg,
+                               iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN);
        else
                nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
                                             rq_dma_dir(req), DMA_ATTR_NO_WARN);
@@ -2261,10 +2264,7 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
        return true;
 }
 
-/*
- * return error value only when tagset allocation failed
- */
-static int nvme_dev_add(struct nvme_dev *dev)
+static void nvme_dev_add(struct nvme_dev *dev)
 {
        int ret;
 
@@ -2294,7 +2294,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
                if (ret) {
                        dev_warn(dev->ctrl.device,
                                "IO queues tagset allocation failed %d\n", ret);
-                       return ret;
+                       return;
                }
                dev->ctrl.tagset = &dev->tagset;
        } else {
@@ -2305,7 +2305,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
        }
 
        nvme_dbbuf_set(dev);
-       return 0;
 }
 
 static int nvme_pci_enable(struct nvme_dev *dev)
@@ -2465,6 +2464,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
        mutex_unlock(&dev->shutdown_lock);
 }
 
+static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
+{
+       if (!nvme_wait_reset(&dev->ctrl))
+               return -EBUSY;
+       nvme_dev_disable(dev, shutdown);
+       return 0;
+}
+
 static int nvme_setup_prp_pools(struct nvme_dev *dev)
 {
        dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
@@ -2488,14 +2495,20 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
        dma_pool_destroy(dev->prp_small_pool);
 }
 
+static void nvme_free_tagset(struct nvme_dev *dev)
+{
+       if (dev->tagset.tags)
+               blk_mq_free_tag_set(&dev->tagset);
+       dev->ctrl.tagset = NULL;
+}
+
 static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
 {
        struct nvme_dev *dev = to_nvme_dev(ctrl);
 
        nvme_dbbuf_dma_free(dev);
        put_device(dev->dev);
-       if (dev->tagset.tags)
-               blk_mq_free_tag_set(&dev->tagset);
+       nvme_free_tagset(dev);
        if (dev->ctrl.admin_q)
                blk_put_queue(dev->ctrl.admin_q);
        kfree(dev->queues);
@@ -2506,6 +2519,11 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
 
 static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
 {
+       /*
+        * Set state to deleting now to avoid blocking nvme_wait_reset(), which
+        * may be holding this pci_dev's device lock.
+        */
+       nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
        nvme_get_ctrl(&dev->ctrl);
        nvme_dev_disable(dev, false);
        nvme_kill_queues(&dev->ctrl);
@@ -2519,7 +2537,6 @@ static void nvme_reset_work(struct work_struct *work)
                container_of(work, struct nvme_dev, ctrl.reset_work);
        bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
        int result;
-       enum nvme_ctrl_state new_state = NVME_CTRL_LIVE;
 
        if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
                result = -ENODEV;
@@ -2613,13 +2630,11 @@ static void nvme_reset_work(struct work_struct *work)
                dev_warn(dev->ctrl.device, "IO queues not created\n");
                nvme_kill_queues(&dev->ctrl);
                nvme_remove_namespaces(&dev->ctrl);
-               new_state = NVME_CTRL_ADMIN_ONLY;
+               nvme_free_tagset(dev);
        } else {
                nvme_start_queues(&dev->ctrl);
                nvme_wait_freeze(&dev->ctrl);
-               /* hit this only when allocate tagset fails */
-               if (nvme_dev_add(dev))
-                       new_state = NVME_CTRL_ADMIN_ONLY;
+               nvme_dev_add(dev);
                nvme_unfreeze(&dev->ctrl);
        }
 
@@ -2627,9 +2642,9 @@ static void nvme_reset_work(struct work_struct *work)
         * If only admin queue live, keep it to do further investigation or
         * recovery.
         */
-       if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) {
+       if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
                dev_warn(dev->ctrl.device,
-                       "failed to mark controller state %d\n", new_state);
+                       "failed to mark controller live state\n");
                result = -ENODEV;
                goto out;
        }
@@ -2670,7 +2685,7 @@ static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
 
 static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
 {
-       *val = readq(to_nvme_dev(ctrl)->bar + off);
+       *val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off);
        return 0;
 }
 
@@ -2834,19 +2849,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 static void nvme_reset_prepare(struct pci_dev *pdev)
 {
        struct nvme_dev *dev = pci_get_drvdata(pdev);
-       nvme_dev_disable(dev, false);
+
+       /*
+        * We don't need to check the return value from waiting for the reset
+        * state as pci_dev device lock is held, making it impossible to race
+        * with ->remove().
+        */
+       nvme_disable_prepare_reset(dev, false);
+       nvme_sync_queues(&dev->ctrl);
 }
 
 static void nvme_reset_done(struct pci_dev *pdev)
 {
        struct nvme_dev *dev = pci_get_drvdata(pdev);
-       nvme_reset_ctrl_sync(&dev->ctrl);
+
+       if (!nvme_try_sched_reset(&dev->ctrl))
+               flush_work(&dev->ctrl.reset_work);
 }
 
 static void nvme_shutdown(struct pci_dev *pdev)
 {
        struct nvme_dev *dev = pci_get_drvdata(pdev);
-       nvme_dev_disable(dev, true);
+       nvme_disable_prepare_reset(dev, true);
 }
 
 /*
@@ -2899,7 +2923,7 @@ static int nvme_resume(struct device *dev)
 
        if (ndev->last_ps == U32_MAX ||
            nvme_set_power_state(ctrl, ndev->last_ps) != 0)
-               nvme_reset_ctrl(ctrl);
+               return nvme_try_sched_reset(&ndev->ctrl);
        return 0;
 }
 
@@ -2927,43 +2951,42 @@ static int nvme_suspend(struct device *dev)
         */
        if (pm_suspend_via_firmware() || !ctrl->npss ||
            !pcie_aspm_enabled(pdev) ||
-           (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) {
-               nvme_dev_disable(ndev, true);
-               return 0;
-       }
+           (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
+               return nvme_disable_prepare_reset(ndev, true);
 
        nvme_start_freeze(ctrl);
        nvme_wait_freeze(ctrl);
        nvme_sync_queues(ctrl);
 
-       if (ctrl->state != NVME_CTRL_LIVE &&
-           ctrl->state != NVME_CTRL_ADMIN_ONLY)
+       if (ctrl->state != NVME_CTRL_LIVE)
                goto unfreeze;
 
        ret = nvme_get_power_state(ctrl, &ndev->last_ps);
        if (ret < 0)
                goto unfreeze;
 
+       /*
+        * A saved state prevents pci pm from generically controlling the
+        * device's power. If we're using protocol specific settings, we don't
+        * want pci interfering.
+        */
+       pci_save_state(pdev);
+
        ret = nvme_set_power_state(ctrl, ctrl->npss);
        if (ret < 0)
                goto unfreeze;
 
        if (ret) {
+               /* discard the saved state */
+               pci_load_saved_state(pdev, NULL);
+
                /*
                 * Clearing npss forces a controller reset on resume. The
                 * correct value will be resdicovered then.
                 */
-               nvme_dev_disable(ndev, true);
+               ret = nvme_disable_prepare_reset(ndev, true);
                ctrl->npss = 0;
-               ret = 0;
-               goto unfreeze;
        }
-       /*
-        * A saved state prevents pci pm from generically controlling the
-        * device's power. If we're using protocol specific settings, we don't
-        * want pci interfering.
-        */
-       pci_save_state(pdev);
 unfreeze:
        nvme_unfreeze(ctrl);
        return ret;
@@ -2972,9 +2995,7 @@ unfreeze:
 static int nvme_simple_suspend(struct device *dev)
 {
        struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
-
-       nvme_dev_disable(ndev, true);
-       return 0;
+       return nvme_disable_prepare_reset(ndev, true);
 }
 
 static int nvme_simple_resume(struct device *dev)
@@ -2982,8 +3003,7 @@ static int nvme_simple_resume(struct device *dev)
        struct pci_dev *pdev = to_pci_dev(dev);
        struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
-       nvme_reset_ctrl(&ndev->ctrl);
-       return 0;
+       return nvme_try_sched_reset(&ndev->ctrl);
 }
 
 static const struct dev_pm_ops nvme_dev_pm_ops = {
@@ -3088,6 +3108,9 @@ static const struct pci_device_id nvme_id_table[] = {
                .driver_data = NVME_QUIRK_LIGHTNVM, },
        { PCI_DEVICE(0x10ec, 0x5762),   /* ADATA SX6000LNP */
                .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+       { PCI_DEVICE(0x1cc1, 0x8201),   /* ADATA SX8200PNP 512GB */
+               .driver_data = NVME_QUIRK_NO_DEEPEST_PS |
+                               NVME_QUIRK_IGNORE_DEV_SUBNQN, },
        { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
index dfa07bb..f19a28b 100644 (file)
@@ -427,7 +427,7 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
 static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)
 {
        return min_t(u32, NVME_RDMA_MAX_SEGMENTS,
-                    ibdev->attrs.max_fast_reg_page_list_len);
+                    ibdev->attrs.max_fast_reg_page_list_len - 1);
 }
 
 static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
@@ -437,7 +437,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
        const int cq_factor = send_wr_factor + 1;       /* + RECV */
        int comp_vector, idx = nvme_rdma_queue_idx(queue);
        enum ib_poll_context poll_ctx;
-       int ret;
+       int ret, pages_per_mr;
 
        queue->device = nvme_rdma_find_get_device(queue->cm_id);
        if (!queue->device) {
@@ -479,10 +479,16 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
                goto out_destroy_qp;
        }
 
+       /*
+        * Currently we don't use SG_GAPS MR's so if the first entry is
+        * misaligned we'll end up using two entries for a single data page,
+        * so one additional entry is required.
+        */
+       pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1;
        ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
                              queue->queue_size,
                              IB_MR_TYPE_MEM_REG,
-                             nvme_rdma_get_max_fr_pages(ibdev), 0);
+                             pages_per_mr, 0);
        if (ret) {
                dev_err(queue->ctrl->ctrl.device,
                        "failed to initialize MR pool sized %d for QID %d\n",
@@ -614,7 +620,8 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
        if (!ret) {
                set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
        } else {
-               __nvme_rdma_stop_queue(queue);
+               if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
+                       __nvme_rdma_stop_queue(queue);
                dev_info(ctrl->ctrl.device,
                        "failed to connect queue: %d ret=%d\n", idx, ret);
        }
@@ -820,8 +827,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
        if (error)
                goto out_stop_queue;
 
-       ctrl->ctrl.max_hw_sectors =
-               (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9);
+       ctrl->ctrl.max_segments = ctrl->max_fr_pages;
+       ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
 
        blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 
@@ -1694,6 +1701,14 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
        dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
                 rq->tag, nvme_rdma_queue_idx(queue));
 
+       /*
+        * Restart the timer if a controller reset is already scheduled. Any
+        * timed out commands would be handled before entering the connecting
+        * state.
+        */
+       if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
+               return BLK_EH_RESET_TIMER;
+
        if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
                /*
                 * Teardown immediately if controller times out while starting
index 4ffd595..7544be8 100644 (file)
@@ -1042,7 +1042,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
 {
        struct nvme_tcp_queue *queue =
                container_of(w, struct nvme_tcp_queue, io_work);
-       unsigned long start = jiffies + msecs_to_jiffies(1);
+       unsigned long deadline = jiffies + msecs_to_jiffies(1);
 
        do {
                bool pending = false;
@@ -1067,7 +1067,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
                if (!pending)
                        return;
 
-       } while (time_after(jiffies, start)); /* quota is exhausted */
+       } while (!time_after(jiffies, deadline)); /* quota is exhausted */
 
        queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
 }
@@ -1386,7 +1386,9 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
        queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
        queue->sock->sk->sk_state_change = nvme_tcp_state_change;
        queue->sock->sk->sk_write_space = nvme_tcp_write_space;
+#ifdef CONFIG_NET_RX_BUSY_POLL
        queue->sock->sk->sk_ll_usec = 1;
+#endif
        write_unlock_bh(&queue->sock->sk->sk_callback_lock);
 
        return 0;
@@ -2044,6 +2046,14 @@ nvme_tcp_timeout(struct request *rq, bool reserved)
        struct nvme_tcp_ctrl *ctrl = req->queue->ctrl;
        struct nvme_tcp_cmd_pdu *pdu = req->pdu;
 
+       /*
+        * Restart the timer if a controller reset is already scheduled. Any
+        * timed out commands would be handled before entering the connecting
+        * state.
+        */
+       if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
+               return BLK_EH_RESET_TIMER;
+
        dev_warn(ctrl->ctrl.device,
                "queue %d: timeout request %#x type %d\n",
                nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
@@ -2126,6 +2136,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
 
        ret = nvme_tcp_map_data(queue, rq);
        if (unlikely(ret)) {
+               nvme_cleanup_cmd(rq);
                dev_err(queue->ctrl->ctrl.device,
                        "Failed to map data (%d)\n", ret);
                return ret;
@@ -2208,7 +2219,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
        struct nvme_tcp_queue *queue = hctx->driver_data;
        struct sock *sk = queue->sock->sk;
 
-       if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue))
+       if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
                sk_busy_loop(sk, true);
        nvme_tcp_try_recv(queue);
        return queue->nr_cqe;
index de0bff7..32008d8 100644 (file)
 void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
 {
        const struct queue_limits *ql = &bdev_get_queue(bdev)->limits;
-       /* Number of physical blocks per logical block. */
-       const u32 ppl = ql->physical_block_size / ql->logical_block_size;
-       /* Physical blocks per logical block, 0's based. */
-       const __le16 ppl0b = to0based(ppl);
+       /* Number of logical blocks per physical block. */
+       const u32 lpp = ql->physical_block_size / ql->logical_block_size;
+       /* Logical blocks per physical block, 0's based. */
+       const __le16 lpp0b = to0based(lpp);
 
        /*
         * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
@@ -25,9 +25,9 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
         * field from the identify controller data structure should be used.
         */
        id->nsfeat |= 1 << 1;
-       id->nawun = ppl0b;
-       id->nawupf = ppl0b;
-       id->nacwu = ppl0b;
+       id->nawun = lpp0b;
+       id->nawupf = lpp0b;
+       id->nacwu = lpp0b;
 
        /*
         * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
@@ -36,7 +36,7 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
         */
        id->nsfeat |= 1 << 4;
        /* NPWG = Namespace Preferred Write Granularity. 0's based */
-       id->npwg = ppl0b;
+       id->npwg = lpp0b;
        /* NPWA = Namespace Preferred Write Alignment. 0's based */
        id->npwa = id->npwg;
        /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
index 748a39f..11f5aea 100644 (file)
@@ -157,8 +157,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
                iod->sg_table.sgl = iod->first_sgl;
                if (sg_alloc_table_chained(&iod->sg_table,
                                blk_rq_nr_phys_segments(req),
-                               iod->sg_table.sgl, SG_CHUNK_SIZE))
+                               iod->sg_table.sgl, SG_CHUNK_SIZE)) {
+                       nvme_cleanup_cmd(req);
                        return BLK_STS_RESOURCE;
+               }
 
                iod->req.sg = iod->sg_table.sgl;
                iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
index bf4f034..d535080 100644 (file)
@@ -348,8 +348,7 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
 
        return 0;
 err:
-       if (cmd->req.sg_cnt)
-               sgl_free(cmd->req.sg);
+       sgl_free(cmd->req.sg);
        return NVME_SC_INTERNAL;
 }
 
@@ -554,8 +553,7 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd)
 
        if (queue->nvme_sq.sqhd_disabled) {
                kfree(cmd->iov);
-               if (cmd->req.sg_cnt)
-                       sgl_free(cmd->req.sg);
+               sgl_free(cmd->req.sg);
        }
 
        return 1;
@@ -586,8 +584,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,
                return -EAGAIN;
 
        kfree(cmd->iov);
-       if (cmd->req.sg_cnt)
-               sgl_free(cmd->req.sg);
+       sgl_free(cmd->req.sg);
        cmd->queue->snd_cmd = NULL;
        nvmet_tcp_put_cmd(cmd);
        return 1;
@@ -1310,8 +1307,7 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd)
        nvmet_req_uninit(&cmd->req);
        nvmet_tcp_unmap_pdu_iovec(cmd);
        kfree(cmd->iov);
-       if (cmd->req.sg_cnt)
-               sgl_free(cmd->req.sg);
+       sgl_free(cmd->req.sg);
 }
 
 static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
index 000b957..bd6129d 100644 (file)
@@ -362,7 +362,7 @@ struct phy_device *of_phy_get_and_connect(struct net_device *dev,
        int ret;
 
        iface = of_get_phy_mode(np);
-       if (iface < 0)
+       if ((int)iface < 0)
                return NULL;
        if (of_phy_is_fixed_link(np)) {
                ret = of_phy_register_fixed_link(np);
index 7989703..6bd610e 100644 (file)
@@ -324,8 +324,10 @@ int of_reserved_mem_device_init_by_idx(struct device *dev,
        if (!target)
                return -ENODEV;
 
-       if (!of_device_is_available(target))
+       if (!of_device_is_available(target)) {
+               of_node_put(target);
                return 0;
+       }
 
        rmem = __find_rmem(target);
        of_node_put(target);
index 480a21e..92e895d 100644 (file)
@@ -1207,6 +1207,7 @@ static int __init unittest_data_add(void)
        of_fdt_unflatten_tree(unittest_data, NULL, &unittest_data_node);
        if (!unittest_data_node) {
                pr_warn("%s: No tree to attach; not running tests\n", __func__);
+               kfree(unittest_data);
                return -ENODATA;
        }
 
index 3b7ffd0..9ff0538 100644 (file)
@@ -1626,12 +1626,6 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev,
                        goto free_regulators;
                }
 
-               ret = regulator_enable(reg);
-               if (ret < 0) {
-                       regulator_put(reg);
-                       goto free_regulators;
-               }
-
                opp_table->regulators[i] = reg;
        }
 
@@ -1645,10 +1639,8 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev,
        return opp_table;
 
 free_regulators:
-       while (i--) {
-               regulator_disable(opp_table->regulators[i]);
-               regulator_put(opp_table->regulators[i]);
-       }
+       while (i != 0)
+               regulator_put(opp_table->regulators[--i]);
 
        kfree(opp_table->regulators);
        opp_table->regulators = NULL;
@@ -1674,10 +1666,8 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table)
        /* Make sure there are no concurrent readers while updating opp_table */
        WARN_ON(!list_empty(&opp_table->opp_list));
 
-       for (i = opp_table->regulator_count - 1; i >= 0; i--) {
-               regulator_disable(opp_table->regulators[i]);
+       for (i = opp_table->regulator_count - 1; i >= 0; i--)
                regulator_put(opp_table->regulators[i]);
-       }
 
        _free_set_opp_data(opp_table);
 
index 1813f5a..1cbb582 100644 (file)
@@ -77,8 +77,6 @@ static struct dev_pm_opp *_find_opp_of_np(struct opp_table *opp_table,
 {
        struct dev_pm_opp *opp;
 
-       lockdep_assert_held(&opp_table_lock);
-
        mutex_lock(&opp_table->lock);
 
        list_for_each_entry(opp, &opp_table->opp_list, node) {
@@ -665,6 +663,13 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table)
                return 0;
        }
 
+       /*
+        * Re-initialize list_kref every time we add static OPPs to the OPP
+        * table as the reference count may be 0 after the last tie static OPPs
+        * were removed.
+        */
+       kref_init(&opp_table->list_kref);
+
        /* We have opp-table node now, iterate over it and add OPPs */
        for_each_available_child_of_node(opp_table->np, np) {
                opp = _opp_add_static_v2(opp_table, dev, np);
index ed50502..de8e4e3 100644 (file)
@@ -678,14 +678,6 @@ static int sba_dma_supported( struct device *dev, u64 mask)
                return(0);
        }
 
-       /* Documentation/DMA-API-HOWTO.txt tells drivers to try 64-bit
-        * first, then fall back to 32-bit if that fails.
-        * We are just "encouraging" 32-bit DMA masks here since we can
-        * never allow IOMMU bypass unless we add special support for ZX1.
-        */
-       if (mask > ~0U)
-               return 0;
-
        ioc = GET_IOC(dev);
        if (!ioc)
                return 0;
index c313de9..a304f5e 100644 (file)
@@ -52,7 +52,7 @@ config PCI_MSI
           If you don't know what to do here, say Y.
 
 config PCI_MSI_IRQ_DOMAIN
-       def_bool ARC || ARM || ARM64 || X86
+       def_bool ARC || ARM || ARM64 || X86 || RISCV
        depends on PCI_MSI
        select GENERIC_MSI_IRQ_DOMAIN
 
@@ -170,7 +170,7 @@ config PCI_P2PDMA
 
          Many PCIe root complexes do not support P2P transactions and
          it's hard to tell which support it at all, so at this time,
-         P2P DMA transations must be between devices behind the same root
+         P2P DMA transactions must be between devices behind the same root
          port.
 
          If unsure, say N.
@@ -181,7 +181,7 @@ config PCI_LABEL
 
 config PCI_HYPERV
         tristate "Hyper-V PCI Frontend"
-        depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+        depends on X86_64 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && SYSFS
        select PCI_HYPERV_INTERFACE
         help
           The PCI device frontend driver allows the kernel to import arbitrary
index 544922f..2fccb57 100644 (file)
@@ -336,15 +336,6 @@ static inline int pcie_cap_version(const struct pci_dev *dev)
        return pcie_caps_reg(dev) & PCI_EXP_FLAGS_VERS;
 }
 
-static bool pcie_downstream_port(const struct pci_dev *dev)
-{
-       int type = pci_pcie_type(dev);
-
-       return type == PCI_EXP_TYPE_ROOT_PORT ||
-              type == PCI_EXP_TYPE_DOWNSTREAM ||
-              type == PCI_EXP_TYPE_PCIE_BRIDGE;
-}
-
 bool pcie_cap_has_lnkctl(const struct pci_dev *dev)
 {
        int type = pci_pcie_type(dev);
index 495059d..8e40b3e 100644 (file)
@@ -417,11 +417,9 @@ struct pci_bus *pci_bus_get(struct pci_bus *bus)
                get_device(&bus->dev);
        return bus;
 }
-EXPORT_SYMBOL(pci_bus_get);
 
 void pci_bus_put(struct pci_bus *bus)
 {
        if (bus)
                put_device(&bus->dev);
 }
-EXPORT_SYMBOL(pci_bus_put);
index 6ea778a..0ba988b 100644 (file)
@@ -131,13 +131,29 @@ config PCI_KEYSTONE_EP
          DesignWare core functions to implement the driver.
 
 config PCI_LAYERSCAPE
-       bool "Freescale Layerscape PCIe controller"
+       bool "Freescale Layerscape PCIe controller - Host mode"
        depends on OF && (ARM || ARCH_LAYERSCAPE || COMPILE_TEST)
        depends on PCI_MSI_IRQ_DOMAIN
        select MFD_SYSCON
        select PCIE_DW_HOST
        help
-         Say Y here if you want PCIe controller support on Layerscape SoCs.
+         Say Y here if you want to enable PCIe controller support on Layerscape
+         SoCs to work in Host mode.
+         This controller can work either as EP or RC. The RCW[HOST_AGT_PEX]
+         determines which PCIe controller works in EP mode and which PCIe
+         controller works in RC mode.
+
+config PCI_LAYERSCAPE_EP
+       bool "Freescale Layerscape PCIe controller - Endpoint mode"
+       depends on OF && (ARM || ARCH_LAYERSCAPE || COMPILE_TEST)
+       depends on PCI_ENDPOINT
+       select PCIE_DW_EP
+       help
+         Say Y here if you want to enable PCIe controller support on Layerscape
+         SoCs to work in Endpoint mode.
+         This controller can work either as EP or RC. The RCW[HOST_AGT_PEX]
+         determines which PCIe controller works in EP mode and which PCIe
+         controller works in RC mode.
 
 config PCI_HISI
        depends on OF && (ARM64 || COMPILE_TEST)
@@ -220,6 +236,16 @@ config PCI_MESON
          and therefore the driver re-uses the DesignWare core functions to
          implement the driver.
 
+config PCIE_TEGRA194
+       tristate "NVIDIA Tegra194 (and later) PCIe controller"
+       depends on ARCH_TEGRA_194_SOC || COMPILE_TEST
+       depends on PCI_MSI_IRQ_DOMAIN
+       select PCIE_DW_HOST
+       select PHY_TEGRA194_P2U
+       help
+         Say Y here if you want support for DesignWare core based PCIe host
+         controller found in NVIDIA Tegra194 SoC.
+
 config PCIE_UNIPHIER
        bool "Socionext UniPhier PCIe controllers"
        depends on ARCH_UNIPHIER || COMPILE_TEST
@@ -230,4 +256,16 @@ config PCIE_UNIPHIER
          Say Y here if you want PCIe controller support on UniPhier SoCs.
          This driver supports LD20 and PXs3 SoCs.
 
+config PCIE_AL
+       bool "Amazon Annapurna Labs PCIe controller"
+       depends on OF && (ARM64 || COMPILE_TEST)
+       depends on PCI_MSI_IRQ_DOMAIN
+       select PCIE_DW_HOST
+       help
+         Say Y here to enable support of the Amazon's Annapurna Labs PCIe
+         controller IP on Amazon SoCs. The PCIe controller uses the DesignWare
+         core plus Annapurna Labs proprietary hardware wrappers. This is
+         required only for DT-based platforms. ACPI platforms with the
+         Annapurna Labs PCIe controller don't need to enable this.
+
 endmenu
index b085dfd..69faff3 100644 (file)
@@ -8,13 +8,15 @@ obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o
 obj-$(CONFIG_PCI_IMX6) += pci-imx6.o
 obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o
 obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone.o
-obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o pci-layerscape-ep.o
+obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
+obj-$(CONFIG_PCI_LAYERSCAPE_EP) += pci-layerscape-ep.o
 obj-$(CONFIG_PCIE_QCOM) += pcie-qcom.o
 obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o
 obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o
 obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o
 obj-$(CONFIG_PCIE_HISI_STB) += pcie-histb.o
 obj-$(CONFIG_PCI_MESON) += pci-meson.o
+obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
 obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
 
 # The following drivers are for devices that use the generic ACPI
index cee5f2f..14a6ba4 100644 (file)
@@ -465,7 +465,7 @@ static int __init exynos_pcie_probe(struct platform_device *pdev)
 
        ep->phy = devm_of_phy_get(dev, np, NULL);
        if (IS_ERR(ep->phy)) {
-               if (PTR_ERR(ep->phy) == -EPROBE_DEFER)
+               if (PTR_ERR(ep->phy) != -ENODEV)
                        return PTR_ERR(ep->phy);
 
                ep->phy = NULL;
index 9b5cb5b..acfbd34 100644 (file)
@@ -57,6 +57,7 @@ enum imx6_pcie_variants {
 struct imx6_pcie_drvdata {
        enum imx6_pcie_variants variant;
        u32 flags;
+       int dbi_length;
 };
 
 struct imx6_pcie {
@@ -1173,8 +1174,8 @@ static int imx6_pcie_probe(struct platform_device *pdev)
 
        imx6_pcie->vpcie = devm_regulator_get_optional(&pdev->dev, "vpcie");
        if (IS_ERR(imx6_pcie->vpcie)) {
-               if (PTR_ERR(imx6_pcie->vpcie) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
+               if (PTR_ERR(imx6_pcie->vpcie) != -ENODEV)
+                       return PTR_ERR(imx6_pcie->vpcie);
                imx6_pcie->vpcie = NULL;
        }
 
@@ -1212,6 +1213,7 @@ static const struct imx6_pcie_drvdata drvdata[] = {
                .variant = IMX6Q,
                .flags = IMX6_PCIE_FLAG_IMX6_PHY |
                         IMX6_PCIE_FLAG_IMX6_SPEED_CHANGE,
+               .dbi_length = 0x200,
        },
        [IMX6SX] = {
                .variant = IMX6SX,
@@ -1254,6 +1256,37 @@ static struct platform_driver imx6_pcie_driver = {
        .shutdown = imx6_pcie_shutdown,
 };
 
+static void imx6_pcie_quirk(struct pci_dev *dev)
+{
+       struct pci_bus *bus = dev->bus;
+       struct pcie_port *pp = bus->sysdata;
+
+       /* Bus parent is the PCI bridge, its parent is this platform driver */
+       if (!bus->dev.parent || !bus->dev.parent->parent)
+               return;
+
+       /* Make sure we only quirk devices associated with this driver */
+       if (bus->dev.parent->parent->driver != &imx6_pcie_driver.driver)
+               return;
+
+       if (bus->number == pp->root_bus_nr) {
+               struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+               struct imx6_pcie *imx6_pcie = to_imx6_pcie(pci);
+
+               /*
+                * Limit config length to avoid the kernel reading beyond
+                * the register set and causing an abort on i.MX 6Quad
+                */
+               if (imx6_pcie->drvdata->dbi_length) {
+                       dev->cfg_size = imx6_pcie->drvdata->dbi_length;
+                       dev_info(&dev->dev, "Limiting cfg_size to %d\n",
+                                       dev->cfg_size);
+               }
+       }
+}
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_SYNOPSYS, 0xabcd,
+                       PCI_CLASS_BRIDGE_PCI, 8, imx6_pcie_quirk);
+
 static int __init imx6_pcie_init(void)
 {
 #ifdef CONFIG_ARM
index be61d96..ca9aa45 100644 (file)
@@ -44,6 +44,7 @@ static const struct pci_epc_features ls_pcie_epc_features = {
        .linkup_notifier = false,
        .msi_capable = true,
        .msix_capable = false,
+       .bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4),
 };
 
 static const struct pci_epc_features*
index 3ab58f0..1eeda2f 100644 (file)
@@ -91,3 +91,368 @@ struct pci_ecam_ops al_pcie_ops = {
 };
 
 #endif /* defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS) */
+
+#ifdef CONFIG_PCIE_AL
+
+#include <linux/of_pci.h>
+#include "pcie-designware.h"
+
+#define AL_PCIE_REV_ID_2       2
+#define AL_PCIE_REV_ID_3       3
+#define AL_PCIE_REV_ID_4       4
+
+#define AXI_BASE_OFFSET                0x0
+
+#define DEVICE_ID_OFFSET       0x16c
+
+#define DEVICE_REV_ID                  0x0
+#define DEVICE_REV_ID_DEV_ID_MASK      GENMASK(31, 16)
+
+#define DEVICE_REV_ID_DEV_ID_X4                0
+#define DEVICE_REV_ID_DEV_ID_X8                2
+#define DEVICE_REV_ID_DEV_ID_X16       4
+
+#define OB_CTRL_REV1_2_OFFSET  0x0040
+#define OB_CTRL_REV3_5_OFFSET  0x0030
+
+#define CFG_TARGET_BUS                 0x0
+#define CFG_TARGET_BUS_MASK_MASK       GENMASK(7, 0)
+#define CFG_TARGET_BUS_BUSNUM_MASK     GENMASK(15, 8)
+
+#define CFG_CONTROL                    0x4
+#define CFG_CONTROL_SUBBUS_MASK                GENMASK(15, 8)
+#define CFG_CONTROL_SEC_BUS_MASK       GENMASK(23, 16)
+
+struct al_pcie_reg_offsets {
+       unsigned int ob_ctrl;
+};
+
+struct al_pcie_target_bus_cfg {
+       u8 reg_val;
+       u8 reg_mask;
+       u8 ecam_mask;
+};
+
+struct al_pcie {
+       struct dw_pcie *pci;
+       void __iomem *controller_base; /* base of PCIe unit (not DW core) */
+       struct device *dev;
+       resource_size_t ecam_size;
+       unsigned int controller_rev_id;
+       struct al_pcie_reg_offsets reg_offsets;
+       struct al_pcie_target_bus_cfg target_bus_cfg;
+};
+
+#define PCIE_ECAM_DEVFN(x)             (((x) & 0xff) << 12)
+
+#define to_al_pcie(x)          dev_get_drvdata((x)->dev)
+
+static inline u32 al_pcie_controller_readl(struct al_pcie *pcie, u32 offset)
+{
+       return readl_relaxed(pcie->controller_base + offset);
+}
+
+static inline void al_pcie_controller_writel(struct al_pcie *pcie, u32 offset,
+                                            u32 val)
+{
+       writel_relaxed(val, pcie->controller_base + offset);
+}
+
+static int al_pcie_rev_id_get(struct al_pcie *pcie, unsigned int *rev_id)
+{
+       u32 dev_rev_id_val;
+       u32 dev_id_val;
+
+       dev_rev_id_val = al_pcie_controller_readl(pcie, AXI_BASE_OFFSET +
+                                                 DEVICE_ID_OFFSET +
+                                                 DEVICE_REV_ID);
+       dev_id_val = FIELD_GET(DEVICE_REV_ID_DEV_ID_MASK, dev_rev_id_val);
+
+       switch (dev_id_val) {
+       case DEVICE_REV_ID_DEV_ID_X4:
+               *rev_id = AL_PCIE_REV_ID_2;
+               break;
+       case DEVICE_REV_ID_DEV_ID_X8:
+               *rev_id = AL_PCIE_REV_ID_3;
+               break;
+       case DEVICE_REV_ID_DEV_ID_X16:
+               *rev_id = AL_PCIE_REV_ID_4;
+               break;
+       default:
+               dev_err(pcie->dev, "Unsupported dev_id_val (0x%x)\n",
+                       dev_id_val);
+               return -EINVAL;
+       }
+
+       dev_dbg(pcie->dev, "dev_id_val: 0x%x\n", dev_id_val);
+
+       return 0;
+}
+
+static int al_pcie_reg_offsets_set(struct al_pcie *pcie)
+{
+       switch (pcie->controller_rev_id) {
+       case AL_PCIE_REV_ID_2:
+               pcie->reg_offsets.ob_ctrl = OB_CTRL_REV1_2_OFFSET;
+               break;
+       case AL_PCIE_REV_ID_3:
+       case AL_PCIE_REV_ID_4:
+               pcie->reg_offsets.ob_ctrl = OB_CTRL_REV3_5_OFFSET;
+               break;
+       default:
+               dev_err(pcie->dev, "Unsupported controller rev_id: 0x%x\n",
+                       pcie->controller_rev_id);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static inline void al_pcie_target_bus_set(struct al_pcie *pcie,
+                                         u8 target_bus,
+                                         u8 mask_target_bus)
+{
+       u32 reg;
+
+       reg = FIELD_PREP(CFG_TARGET_BUS_MASK_MASK, mask_target_bus) |
+             FIELD_PREP(CFG_TARGET_BUS_BUSNUM_MASK, target_bus);
+
+       al_pcie_controller_writel(pcie, AXI_BASE_OFFSET +
+                                 pcie->reg_offsets.ob_ctrl + CFG_TARGET_BUS,
+                                 reg);
+}
+
+static void __iomem *al_pcie_conf_addr_map(struct al_pcie *pcie,
+                                          unsigned int busnr,
+                                          unsigned int devfn)
+{
+       struct al_pcie_target_bus_cfg *target_bus_cfg = &pcie->target_bus_cfg;
+       unsigned int busnr_ecam = busnr & target_bus_cfg->ecam_mask;
+       unsigned int busnr_reg = busnr & target_bus_cfg->reg_mask;
+       struct pcie_port *pp = &pcie->pci->pp;
+       void __iomem *pci_base_addr;
+
+       pci_base_addr = (void __iomem *)((uintptr_t)pp->va_cfg0_base +
+                                        (busnr_ecam << 20) +
+                                        PCIE_ECAM_DEVFN(devfn));
+
+       if (busnr_reg != target_bus_cfg->reg_val) {
+               dev_dbg(pcie->pci->dev, "Changing target bus busnum val from 0x%x to 0x%x\n",
+                       target_bus_cfg->reg_val, busnr_reg);
+               target_bus_cfg->reg_val = busnr_reg;
+               al_pcie_target_bus_set(pcie,
+                                      target_bus_cfg->reg_val,
+                                      target_bus_cfg->reg_mask);
+       }
+
+       return pci_base_addr;
+}
+
+static int al_pcie_rd_other_conf(struct pcie_port *pp, struct pci_bus *bus,
+                                unsigned int devfn, int where, int size,
+                                u32 *val)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct al_pcie *pcie = to_al_pcie(pci);
+       unsigned int busnr = bus->number;
+       void __iomem *pci_addr;
+       int rc;
+
+       pci_addr = al_pcie_conf_addr_map(pcie, busnr, devfn);
+
+       rc = dw_pcie_read(pci_addr + where, size, val);
+
+       dev_dbg(pci->dev, "%d-byte config read from %04x:%02x:%02x.%d offset 0x%x (pci_addr: 0x%px) - val:0x%x\n",
+               size, pci_domain_nr(bus), bus->number,
+               PCI_SLOT(devfn), PCI_FUNC(devfn), where,
+               (pci_addr + where), *val);
+
+       return rc;
+}
+
+static int al_pcie_wr_other_conf(struct pcie_port *pp, struct pci_bus *bus,
+                                unsigned int devfn, int where, int size,
+                                u32 val)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct al_pcie *pcie = to_al_pcie(pci);
+       unsigned int busnr = bus->number;
+       void __iomem *pci_addr;
+       int rc;
+
+       pci_addr = al_pcie_conf_addr_map(pcie, busnr, devfn);
+
+       rc = dw_pcie_write(pci_addr + where, size, val);
+
+       dev_dbg(pci->dev, "%d-byte config write to %04x:%02x:%02x.%d offset 0x%x (pci_addr: 0x%px) - val:0x%x\n",
+               size, pci_domain_nr(bus), bus->number,
+               PCI_SLOT(devfn), PCI_FUNC(devfn), where,
+               (pci_addr + where), val);
+
+       return rc;
+}
+
+static void al_pcie_config_prepare(struct al_pcie *pcie)
+{
+       struct al_pcie_target_bus_cfg *target_bus_cfg;
+       struct pcie_port *pp = &pcie->pci->pp;
+       unsigned int ecam_bus_mask;
+       u32 cfg_control_offset;
+       u8 subordinate_bus;
+       u8 secondary_bus;
+       u32 cfg_control;
+       u32 reg;
+
+       target_bus_cfg = &pcie->target_bus_cfg;
+
+       ecam_bus_mask = (pcie->ecam_size >> 20) - 1;
+       if (ecam_bus_mask > 255) {
+               dev_warn(pcie->dev, "ECAM window size is larger than 256MB. Cutting off at 256\n");
+               ecam_bus_mask = 255;
+       }
+
+       /* This portion is taken from the transaction address */
+       target_bus_cfg->ecam_mask = ecam_bus_mask;
+       /* This portion is taken from the cfg_target_bus reg */
+       target_bus_cfg->reg_mask = ~target_bus_cfg->ecam_mask;
+       target_bus_cfg->reg_val = pp->busn->start & target_bus_cfg->reg_mask;
+
+       al_pcie_target_bus_set(pcie, target_bus_cfg->reg_val,
+                              target_bus_cfg->reg_mask);
+
+       secondary_bus = pp->busn->start + 1;
+       subordinate_bus = pp->busn->end;
+
+       /* Set the valid values of secondary and subordinate buses */
+       cfg_control_offset = AXI_BASE_OFFSET + pcie->reg_offsets.ob_ctrl +
+                            CFG_CONTROL;
+
+       cfg_control = al_pcie_controller_readl(pcie, cfg_control_offset);
+
+       reg = cfg_control &
+             ~(CFG_CONTROL_SEC_BUS_MASK | CFG_CONTROL_SUBBUS_MASK);
+
+       reg |= FIELD_PREP(CFG_CONTROL_SUBBUS_MASK, subordinate_bus) |
+              FIELD_PREP(CFG_CONTROL_SEC_BUS_MASK, secondary_bus);
+
+       al_pcie_controller_writel(pcie, cfg_control_offset, reg);
+}
+
+static int al_pcie_host_init(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct al_pcie *pcie = to_al_pcie(pci);
+       int rc;
+
+       rc = al_pcie_rev_id_get(pcie, &pcie->controller_rev_id);
+       if (rc)
+               return rc;
+
+       rc = al_pcie_reg_offsets_set(pcie);
+       if (rc)
+               return rc;
+
+       al_pcie_config_prepare(pcie);
+
+       return 0;
+}
+
+static const struct dw_pcie_host_ops al_pcie_host_ops = {
+       .rd_other_conf = al_pcie_rd_other_conf,
+       .wr_other_conf = al_pcie_wr_other_conf,
+       .host_init = al_pcie_host_init,
+};
+
+static int al_add_pcie_port(struct pcie_port *pp,
+                           struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       int ret;
+
+       pp->ops = &al_pcie_host_ops;
+
+       ret = dw_pcie_host_init(pp);
+       if (ret) {
+               dev_err(dev, "failed to initialize host\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+};
+
+static int al_pcie_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct resource *controller_res;
+       struct resource *ecam_res;
+       struct resource *dbi_res;
+       struct al_pcie *al_pcie;
+       struct dw_pcie *pci;
+
+       al_pcie = devm_kzalloc(dev, sizeof(*al_pcie), GFP_KERNEL);
+       if (!al_pcie)
+               return -ENOMEM;
+
+       pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+       if (!pci)
+               return -ENOMEM;
+
+       pci->dev = dev;
+       pci->ops = &dw_pcie_ops;
+
+       al_pcie->pci = pci;
+       al_pcie->dev = dev;
+
+       dbi_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
+       pci->dbi_base = devm_pci_remap_cfg_resource(dev, dbi_res);
+       if (IS_ERR(pci->dbi_base)) {
+               dev_err(dev, "couldn't remap dbi base %pR\n", dbi_res);
+               return PTR_ERR(pci->dbi_base);
+       }
+
+       ecam_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "config");
+       if (!ecam_res) {
+               dev_err(dev, "couldn't find 'config' reg in DT\n");
+               return -ENOENT;
+       }
+       al_pcie->ecam_size = resource_size(ecam_res);
+
+       controller_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+                                                     "controller");
+       al_pcie->controller_base = devm_ioremap_resource(dev, controller_res);
+       if (IS_ERR(al_pcie->controller_base)) {
+               dev_err(dev, "couldn't remap controller base %pR\n",
+                       controller_res);
+               return PTR_ERR(al_pcie->controller_base);
+       }
+
+       dev_dbg(dev, "From DT: dbi_base: %pR, controller_base: %pR\n",
+               dbi_res, controller_res);
+
+       platform_set_drvdata(pdev, al_pcie);
+
+       return al_add_pcie_port(&pci->pp, pdev);
+}
+
+static const struct of_device_id al_pcie_of_match[] = {
+       { .compatible = "amazon,al-alpine-v2-pcie",
+       },
+       { .compatible = "amazon,al-alpine-v3-pcie",
+       },
+       {},
+};
+
+static struct platform_driver al_pcie_driver = {
+       .driver = {
+               .name   = "al-pcie",
+               .of_match_table = al_pcie_of_match,
+               .suppress_bind_attrs = true,
+       },
+       .probe = al_pcie_probe,
+};
+builtin_platform_driver(al_pcie_driver);
+
+#endif /* CONFIG_PCIE_AL*/
index 3d55dc7..4959654 100644 (file)
@@ -118,11 +118,10 @@ static int armada8k_pcie_setup_phys(struct armada8k_pcie *pcie)
 
        for (i = 0; i < ARMADA8K_PCIE_MAX_LANES; i++) {
                pcie->phy[i] = devm_of_phy_get_by_index(dev, node, i);
-               if (IS_ERR(pcie->phy[i]) &&
-                   (PTR_ERR(pcie->phy[i]) == -EPROBE_DEFER))
-                       return PTR_ERR(pcie->phy[i]);
-
                if (IS_ERR(pcie->phy[i])) {
+                       if (PTR_ERR(pcie->phy[i]) != -ENODEV)
+                               return PTR_ERR(pcie->phy[i]);
+
                        pcie->phy[i] = NULL;
                        continue;
                }
index 2bf5a35..3dd2e26 100644 (file)
@@ -40,39 +40,6 @@ void dw_pcie_ep_reset_bar(struct dw_pcie *pci, enum pci_barno bar)
        __dw_pcie_ep_reset_bar(pci, bar, 0);
 }
 
-static u8 __dw_pcie_ep_find_next_cap(struct dw_pcie *pci, u8 cap_ptr,
-                             u8 cap)
-{
-       u8 cap_id, next_cap_ptr;
-       u16 reg;
-
-       if (!cap_ptr)
-               return 0;
-
-       reg = dw_pcie_readw_dbi(pci, cap_ptr);
-       cap_id = (reg & 0x00ff);
-
-       if (cap_id > PCI_CAP_ID_MAX)
-               return 0;
-
-       if (cap_id == cap)
-               return cap_ptr;
-
-       next_cap_ptr = (reg & 0xff00) >> 8;
-       return __dw_pcie_ep_find_next_cap(pci, next_cap_ptr, cap);
-}
-
-static u8 dw_pcie_ep_find_capability(struct dw_pcie *pci, u8 cap)
-{
-       u8 next_cap_ptr;
-       u16 reg;
-
-       reg = dw_pcie_readw_dbi(pci, PCI_CAPABILITY_LIST);
-       next_cap_ptr = (reg & 0x00ff);
-
-       return __dw_pcie_ep_find_next_cap(pci, next_cap_ptr, cap);
-}
-
 static int dw_pcie_ep_write_header(struct pci_epc *epc, u8 func_no,
                                   struct pci_epf_header *hdr)
 {
@@ -531,6 +498,7 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
        int ret;
        u32 reg;
        void *addr;
+       u8 hdr_type;
        unsigned int nbars;
        unsigned int offset;
        struct pci_epc *epc;
@@ -595,6 +563,13 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
        if (ep->ops->ep_init)
                ep->ops->ep_init(ep);
 
+       hdr_type = dw_pcie_readb_dbi(pci, PCI_HEADER_TYPE);
+       if (hdr_type != PCI_HEADER_TYPE_NORMAL) {
+               dev_err(pci->dev, "PCIe controller is not set to EP mode (hdr_type:0x%x)!\n",
+                       hdr_type);
+               return -EIO;
+       }
+
        ret = of_property_read_u8(np, "max-functions", &epc->max_functions);
        if (ret < 0)
                epc->max_functions = 1;
@@ -612,9 +587,9 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
                dev_err(dev, "Failed to reserve memory for MSI/MSI-X\n");
                return -ENOMEM;
        }
-       ep->msi_cap = dw_pcie_ep_find_capability(pci, PCI_CAP_ID_MSI);
+       ep->msi_cap = dw_pcie_find_capability(pci, PCI_CAP_ID_MSI);
 
-       ep->msix_cap = dw_pcie_ep_find_capability(pci, PCI_CAP_ID_MSIX);
+       ep->msix_cap = dw_pcie_find_capability(pci, PCI_CAP_ID_MSIX);
 
        offset = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_REBAR);
        if (offset) {
index f93252d..0f36a92 100644 (file)
@@ -323,6 +323,7 @@ int dw_pcie_host_init(struct pcie_port *pp)
        struct pci_bus *child;
        struct pci_host_bridge *bridge;
        struct resource *cfg_res;
+       u32 hdr_type;
        int ret;
 
        raw_spin_lock_init(&pci->pp.lock);
@@ -464,6 +465,21 @@ int dw_pcie_host_init(struct pcie_port *pp)
                        goto err_free_msi;
        }
 
+       ret = dw_pcie_rd_own_conf(pp, PCI_HEADER_TYPE, 1, &hdr_type);
+       if (ret != PCIBIOS_SUCCESSFUL) {
+               dev_err(pci->dev, "Failed reading PCI_HEADER_TYPE cfg space reg (ret: 0x%x)\n",
+                       ret);
+               ret = pcibios_err_to_errno(ret);
+               goto err_free_msi;
+       }
+       if (hdr_type != PCI_HEADER_TYPE_BRIDGE) {
+               dev_err(pci->dev,
+                       "PCIe controller is not set to bridge type (hdr_type: 0x%x)!\n",
+                       hdr_type);
+               ret = -EIO;
+               goto err_free_msi;
+       }
+
        pp->root_bus_nr = pp->busn->start;
 
        bridge->dev.parent = dev;
@@ -628,6 +644,12 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
        u32 val, ctrl, num_ctrls;
        struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
 
+       /*
+        * Enable DBI read-only registers for writing/updating configuration.
+        * Write permission gets disabled towards the end of this function.
+        */
+       dw_pcie_dbi_ro_wr_en(pci);
+
        dw_pcie_setup(pci);
 
        if (!pp->ops->msi_host_init) {
@@ -650,12 +672,10 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
        dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_1, 0x00000000);
 
        /* Setup interrupt pins */
-       dw_pcie_dbi_ro_wr_en(pci);
        val = dw_pcie_readl_dbi(pci, PCI_INTERRUPT_LINE);
        val &= 0xffff00ff;
        val |= 0x00000100;
        dw_pcie_writel_dbi(pci, PCI_INTERRUPT_LINE, val);
-       dw_pcie_dbi_ro_wr_dis(pci);
 
        /* Setup bus numbers */
        val = dw_pcie_readl_dbi(pci, PCI_PRIMARY_BUS);
@@ -687,15 +707,13 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
 
        dw_pcie_wr_own_conf(pp, PCI_BASE_ADDRESS_0, 4, 0);
 
-       /* Enable write permission for the DBI read-only register */
-       dw_pcie_dbi_ro_wr_en(pci);
        /* Program correct class for RC */
        dw_pcie_wr_own_conf(pp, PCI_CLASS_DEVICE, 2, PCI_CLASS_BRIDGE_PCI);
-       /* Better disable write permission right after the update */
-       dw_pcie_dbi_ro_wr_dis(pci);
 
        dw_pcie_rd_own_conf(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, 4, &val);
        val |= PORT_LOGIC_SPEED_CHANGE;
        dw_pcie_wr_own_conf(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, 4, val);
+
+       dw_pcie_dbi_ro_wr_dis(pci);
 }
 EXPORT_SYMBOL_GPL(dw_pcie_setup_rc);
index 7d25102..820488d 100644 (file)
 
 #include "pcie-designware.h"
 
+/*
+ * These interfaces resemble the pci_find_*capability() interfaces, but these
+ * are for configuring host controllers, which are bridges *to* PCI devices but
+ * are not PCI devices themselves.
+ */
+static u8 __dw_pcie_find_next_cap(struct dw_pcie *pci, u8 cap_ptr,
+                                 u8 cap)
+{
+       u8 cap_id, next_cap_ptr;
+       u16 reg;
+
+       if (!cap_ptr)
+               return 0;
+
+       reg = dw_pcie_readw_dbi(pci, cap_ptr);
+       cap_id = (reg & 0x00ff);
+
+       if (cap_id > PCI_CAP_ID_MAX)
+               return 0;
+
+       if (cap_id == cap)
+               return cap_ptr;
+
+       next_cap_ptr = (reg & 0xff00) >> 8;
+       return __dw_pcie_find_next_cap(pci, next_cap_ptr, cap);
+}
+
+u8 dw_pcie_find_capability(struct dw_pcie *pci, u8 cap)
+{
+       u8 next_cap_ptr;
+       u16 reg;
+
+       reg = dw_pcie_readw_dbi(pci, PCI_CAPABILITY_LIST);
+       next_cap_ptr = (reg & 0x00ff);
+
+       return __dw_pcie_find_next_cap(pci, next_cap_ptr, cap);
+}
+EXPORT_SYMBOL_GPL(dw_pcie_find_capability);
+
+static u16 dw_pcie_find_next_ext_capability(struct dw_pcie *pci, u16 start,
+                                           u8 cap)
+{
+       u32 header;
+       int ttl;
+       int pos = PCI_CFG_SPACE_SIZE;
+
+       /* minimum 8 bytes per capability */
+       ttl = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8;
+
+       if (start)
+               pos = start;
+
+       header = dw_pcie_readl_dbi(pci, pos);
+       /*
+        * If we have no capabilities, this is indicated by cap ID,
+        * cap version and next pointer all being 0.
+        */
+       if (header == 0)
+               return 0;
+
+       while (ttl-- > 0) {
+               if (PCI_EXT_CAP_ID(header) == cap && pos != start)
+                       return pos;
+
+               pos = PCI_EXT_CAP_NEXT(header);
+               if (pos < PCI_CFG_SPACE_SIZE)
+                       break;
+
+               header = dw_pcie_readl_dbi(pci, pos);
+       }
+
+       return 0;
+}
+
+u16 dw_pcie_find_ext_capability(struct dw_pcie *pci, u8 cap)
+{
+       return dw_pcie_find_next_ext_capability(pci, 0, cap);
+}
+EXPORT_SYMBOL_GPL(dw_pcie_find_ext_capability);
+
 int dw_pcie_read(void __iomem *addr, int size, u32 *val)
 {
        if (!IS_ALIGNED((uintptr_t)addr, size)) {
@@ -376,10 +456,11 @@ int dw_pcie_wait_for_link(struct dw_pcie *pci)
                usleep_range(LINK_WAIT_USLEEP_MIN, LINK_WAIT_USLEEP_MAX);
        }
 
-       dev_err(pci->dev, "Phy link never came up\n");
+       dev_info(pci->dev, "Phy link never came up\n");
 
        return -ETIMEDOUT;
 }
+EXPORT_SYMBOL_GPL(dw_pcie_wait_for_link);
 
 int dw_pcie_link_up(struct dw_pcie *pci)
 {
@@ -423,8 +504,10 @@ void dw_pcie_setup(struct dw_pcie *pci)
 
 
        ret = of_property_read_u32(np, "num-lanes", &lanes);
-       if (ret)
-               lanes = 0;
+       if (ret) {
+               dev_dbg(pci->dev, "property num-lanes isn't found\n");
+               return;
+       }
 
        /* Set the number of lanes */
        val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL);
@@ -466,4 +549,11 @@ void dw_pcie_setup(struct dw_pcie *pci)
                break;
        }
        dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+
+       if (of_property_read_bool(np, "snps,enable-cdm-check")) {
+               val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS);
+               val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS |
+                      PCIE_PL_CHK_REG_CHK_REG_START;
+               dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val);
+       }
 }
index ffed084..5a18e94 100644 (file)
 #define PCIE_MISC_CONTROL_1_OFF                0x8BC
 #define PCIE_DBI_RO_WR_EN              BIT(0)
 
+#define PCIE_PL_CHK_REG_CONTROL_STATUS                 0xB20
+#define PCIE_PL_CHK_REG_CHK_REG_START                  BIT(0)
+#define PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS             BIT(1)
+#define PCIE_PL_CHK_REG_CHK_REG_COMPARISON_ERROR       BIT(16)
+#define PCIE_PL_CHK_REG_CHK_REG_LOGIC_ERROR            BIT(17)
+#define PCIE_PL_CHK_REG_CHK_REG_COMPLETE               BIT(18)
+
+#define PCIE_PL_CHK_REG_ERR_ADDR                       0xB28
+
 /*
  * iATU Unroll-specific register definitions
  * From 4.80 core version the address translation will be made by unroll
@@ -251,6 +260,9 @@ struct dw_pcie {
 #define to_dw_pcie_from_ep(endpoint)   \
                container_of((endpoint), struct dw_pcie, ep)
 
+u8 dw_pcie_find_capability(struct dw_pcie *pci, u8 cap);
+u16 dw_pcie_find_ext_capability(struct dw_pcie *pci, u8 cap);
+
 int dw_pcie_read(void __iomem *addr, int size, u32 *val);
 int dw_pcie_write(void __iomem *addr, int size, u32 val);
 
index 954bc2b..811b5c6 100644 (file)
@@ -340,8 +340,8 @@ static int histb_pcie_probe(struct platform_device *pdev)
 
        hipcie->vpcie = devm_regulator_get_optional(dev, "vpcie");
        if (IS_ERR(hipcie->vpcie)) {
-               if (PTR_ERR(hipcie->vpcie) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
+               if (PTR_ERR(hipcie->vpcie) != -ENODEV)
+                       return PTR_ERR(hipcie->vpcie);
                hipcie->vpcie = NULL;
        }
 
index 8df1914..c19617a 100644 (file)
@@ -436,7 +436,7 @@ static int kirin_pcie_host_init(struct pcie_port *pp)
        return 0;
 }
 
-static struct dw_pcie_ops kirin_dw_pcie_ops = {
+static const struct dw_pcie_ops kirin_dw_pcie_ops = {
        .read_dbi = kirin_pcie_read_dbi,
        .write_dbi = kirin_pcie_write_dbi,
        .link_up = kirin_pcie_link_up,
diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c
new file mode 100644 (file)
index 0000000..f89f5ac
--- /dev/null
@@ -0,0 +1,1732 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCIe host controller driver for Tegra194 SoC
+ *
+ * Copyright (C) 2019 NVIDIA Corporation.
+ *
+ * Author: Vidya Sagar <vidyas@nvidia.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/random.h>
+#include <linux/reset.h>
+#include <linux/resource.h>
+#include <linux/types.h>
+#include "pcie-designware.h"
+#include <soc/tegra/bpmp.h>
+#include <soc/tegra/bpmp-abi.h>
+#include "../../pci.h"
+
+#define APPL_PINMUX                            0x0
+#define APPL_PINMUX_PEX_RST                    BIT(0)
+#define APPL_PINMUX_CLKREQ_OVERRIDE_EN         BIT(2)
+#define APPL_PINMUX_CLKREQ_OVERRIDE            BIT(3)
+#define APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE_EN  BIT(4)
+#define APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE     BIT(5)
+#define APPL_PINMUX_CLKREQ_OUT_OVRD_EN         BIT(9)
+#define APPL_PINMUX_CLKREQ_OUT_OVRD            BIT(10)
+
+#define APPL_CTRL                              0x4
+#define APPL_CTRL_SYS_PRE_DET_STATE            BIT(6)
+#define APPL_CTRL_LTSSM_EN                     BIT(7)
+#define APPL_CTRL_HW_HOT_RST_EN                        BIT(20)
+#define APPL_CTRL_HW_HOT_RST_MODE_MASK         GENMASK(1, 0)
+#define APPL_CTRL_HW_HOT_RST_MODE_SHIFT                22
+#define APPL_CTRL_HW_HOT_RST_MODE_IMDT_RST     0x1
+
+#define APPL_INTR_EN_L0_0                      0x8
+#define APPL_INTR_EN_L0_0_LINK_STATE_INT_EN    BIT(0)
+#define APPL_INTR_EN_L0_0_MSI_RCV_INT_EN       BIT(4)
+#define APPL_INTR_EN_L0_0_INT_INT_EN           BIT(8)
+#define APPL_INTR_EN_L0_0_CDM_REG_CHK_INT_EN   BIT(19)
+#define APPL_INTR_EN_L0_0_SYS_INTR_EN          BIT(30)
+#define APPL_INTR_EN_L0_0_SYS_MSI_INTR_EN      BIT(31)
+
+#define APPL_INTR_STATUS_L0                    0xC
+#define APPL_INTR_STATUS_L0_LINK_STATE_INT     BIT(0)
+#define APPL_INTR_STATUS_L0_INT_INT            BIT(8)
+#define APPL_INTR_STATUS_L0_CDM_REG_CHK_INT    BIT(18)
+
+#define APPL_INTR_EN_L1_0_0                            0x1C
+#define APPL_INTR_EN_L1_0_0_LINK_REQ_RST_NOT_INT_EN    BIT(1)
+
+#define APPL_INTR_STATUS_L1_0_0                                0x20
+#define APPL_INTR_STATUS_L1_0_0_LINK_REQ_RST_NOT_CHGED BIT(1)
+
+#define APPL_INTR_STATUS_L1_1                  0x2C
+#define APPL_INTR_STATUS_L1_2                  0x30
+#define APPL_INTR_STATUS_L1_3                  0x34
+#define APPL_INTR_STATUS_L1_6                  0x3C
+#define APPL_INTR_STATUS_L1_7                  0x40
+
+#define APPL_INTR_EN_L1_8_0                    0x44
+#define APPL_INTR_EN_L1_8_BW_MGT_INT_EN                BIT(2)
+#define APPL_INTR_EN_L1_8_AUTO_BW_INT_EN       BIT(3)
+#define APPL_INTR_EN_L1_8_INTX_EN              BIT(11)
+#define APPL_INTR_EN_L1_8_AER_INT_EN           BIT(15)
+
+#define APPL_INTR_STATUS_L1_8_0                        0x4C
+#define APPL_INTR_STATUS_L1_8_0_EDMA_INT_MASK  GENMASK(11, 6)
+#define APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS BIT(2)
+#define APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS        BIT(3)
+
+#define APPL_INTR_STATUS_L1_9                  0x54
+#define APPL_INTR_STATUS_L1_10                 0x58
+#define APPL_INTR_STATUS_L1_11                 0x64
+#define APPL_INTR_STATUS_L1_13                 0x74
+#define APPL_INTR_STATUS_L1_14                 0x78
+#define APPL_INTR_STATUS_L1_15                 0x7C
+#define APPL_INTR_STATUS_L1_17                 0x88
+
+#define APPL_INTR_EN_L1_18                             0x90
+#define APPL_INTR_EN_L1_18_CDM_REG_CHK_CMPLT           BIT(2)
+#define APPL_INTR_EN_L1_18_CDM_REG_CHK_CMP_ERR         BIT(1)
+#define APPL_INTR_EN_L1_18_CDM_REG_CHK_LOGIC_ERR       BIT(0)
+
+#define APPL_INTR_STATUS_L1_18                         0x94
+#define APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMPLT       BIT(2)
+#define APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMP_ERR     BIT(1)
+#define APPL_INTR_STATUS_L1_18_CDM_REG_CHK_LOGIC_ERR   BIT(0)
+
+#define APPL_MSI_CTRL_2                                0xB0
+
+#define APPL_LTR_MSG_1                         0xC4
+#define LTR_MSG_REQ                            BIT(15)
+#define LTR_MST_NO_SNOOP_SHIFT                 16
+
+#define APPL_LTR_MSG_2                         0xC8
+#define APPL_LTR_MSG_2_LTR_MSG_REQ_STATE       BIT(3)
+
+#define APPL_LINK_STATUS                       0xCC
+#define APPL_LINK_STATUS_RDLH_LINK_UP          BIT(0)
+
+#define APPL_DEBUG                             0xD0
+#define APPL_DEBUG_PM_LINKST_IN_L2_LAT         BIT(21)
+#define APPL_DEBUG_PM_LINKST_IN_L0             0x11
+#define APPL_DEBUG_LTSSM_STATE_MASK            GENMASK(8, 3)
+#define APPL_DEBUG_LTSSM_STATE_SHIFT           3
+#define LTSSM_STATE_PRE_DETECT                 5
+
+#define APPL_RADM_STATUS                       0xE4
+#define APPL_PM_XMT_TURNOFF_STATE              BIT(0)
+
+#define APPL_DM_TYPE                           0x100
+#define APPL_DM_TYPE_MASK                      GENMASK(3, 0)
+#define APPL_DM_TYPE_RP                                0x4
+#define APPL_DM_TYPE_EP                                0x0
+
+#define APPL_CFG_BASE_ADDR                     0x104
+#define APPL_CFG_BASE_ADDR_MASK                        GENMASK(31, 12)
+
+#define APPL_CFG_IATU_DMA_BASE_ADDR            0x108
+#define APPL_CFG_IATU_DMA_BASE_ADDR_MASK       GENMASK(31, 18)
+
+#define APPL_CFG_MISC                          0x110
+#define APPL_CFG_MISC_SLV_EP_MODE              BIT(14)
+#define APPL_CFG_MISC_ARCACHE_MASK             GENMASK(13, 10)
+#define APPL_CFG_MISC_ARCACHE_SHIFT            10
+#define APPL_CFG_MISC_ARCACHE_VAL              3
+
+#define APPL_CFG_SLCG_OVERRIDE                 0x114
+#define APPL_CFG_SLCG_OVERRIDE_SLCG_EN_MASTER  BIT(0)
+
+#define APPL_CAR_RESET_OVRD                            0x12C
+#define APPL_CAR_RESET_OVRD_CYA_OVERRIDE_CORE_RST_N    BIT(0)
+
+#define IO_BASE_IO_DECODE                              BIT(0)
+#define IO_BASE_IO_DECODE_BIT8                         BIT(8)
+
+#define CFG_PREF_MEM_LIMIT_BASE_MEM_DECODE             BIT(0)
+#define CFG_PREF_MEM_LIMIT_BASE_MEM_LIMIT_DECODE       BIT(16)
+
+#define CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF        0x718
+#define CFG_TIMER_CTRL_ACK_NAK_SHIFT   (19)
+
+#define EVENT_COUNTER_ALL_CLEAR                0x3
+#define EVENT_COUNTER_ENABLE_ALL       0x7
+#define EVENT_COUNTER_ENABLE_SHIFT     2
+#define EVENT_COUNTER_EVENT_SEL_MASK   GENMASK(7, 0)
+#define EVENT_COUNTER_EVENT_SEL_SHIFT  16
+#define EVENT_COUNTER_EVENT_Tx_L0S     0x2
+#define EVENT_COUNTER_EVENT_Rx_L0S     0x3
+#define EVENT_COUNTER_EVENT_L1         0x5
+#define EVENT_COUNTER_EVENT_L1_1       0x7
+#define EVENT_COUNTER_EVENT_L1_2       0x8
+#define EVENT_COUNTER_GROUP_SEL_SHIFT  24
+#define EVENT_COUNTER_GROUP_5          0x5
+
+#define PORT_LOGIC_ACK_F_ASPM_CTRL                     0x70C
+#define ENTER_ASPM                                     BIT(30)
+#define L0S_ENTRANCE_LAT_SHIFT                         24
+#define L0S_ENTRANCE_LAT_MASK                          GENMASK(26, 24)
+#define L1_ENTRANCE_LAT_SHIFT                          27
+#define L1_ENTRANCE_LAT_MASK                           GENMASK(29, 27)
+#define N_FTS_SHIFT                                    8
+#define N_FTS_MASK                                     GENMASK(7, 0)
+#define N_FTS_VAL                                      52
+
+#define PORT_LOGIC_GEN2_CTRL                           0x80C
+#define PORT_LOGIC_GEN2_CTRL_DIRECT_SPEED_CHANGE       BIT(17)
+#define FTS_MASK                                       GENMASK(7, 0)
+#define FTS_VAL                                                52
+
+#define PORT_LOGIC_MSI_CTRL_INT_0_EN           0x828
+
+#define GEN3_EQ_CONTROL_OFF                    0x8a8
+#define GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_SHIFT 8
+#define GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_MASK  GENMASK(23, 8)
+#define GEN3_EQ_CONTROL_OFF_FB_MODE_MASK       GENMASK(3, 0)
+
+#define GEN3_RELATED_OFF                       0x890
+#define GEN3_RELATED_OFF_GEN3_ZRXDC_NONCOMPL   BIT(0)
+#define GEN3_RELATED_OFF_GEN3_EQ_DISABLE       BIT(16)
+#define GEN3_RELATED_OFF_RATE_SHADOW_SEL_SHIFT 24
+#define GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK  GENMASK(25, 24)
+
+#define PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT 0x8D0
+#define AMBA_ERROR_RESPONSE_CRS_SHIFT          3
+#define AMBA_ERROR_RESPONSE_CRS_MASK           GENMASK(1, 0)
+#define AMBA_ERROR_RESPONSE_CRS_OKAY           0
+#define AMBA_ERROR_RESPONSE_CRS_OKAY_FFFFFFFF  1
+#define AMBA_ERROR_RESPONSE_CRS_OKAY_FFFF0001  2
+
+#define PORT_LOGIC_MSIX_DOORBELL                       0x948
+
+#define CAP_SPCIE_CAP_OFF                      0x154
+#define CAP_SPCIE_CAP_OFF_DSP_TX_PRESET0_MASK  GENMASK(3, 0)
+#define CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_MASK  GENMASK(11, 8)
+#define CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_SHIFT 8
+
+#define PME_ACK_TIMEOUT 10000
+
+#define LTSSM_TIMEOUT 50000    /* 50ms */
+
+#define GEN3_GEN4_EQ_PRESET_INIT       5
+
+#define GEN1_CORE_CLK_FREQ     62500000
+#define GEN2_CORE_CLK_FREQ     125000000
+#define GEN3_CORE_CLK_FREQ     250000000
+#define GEN4_CORE_CLK_FREQ     500000000
+
+static const unsigned int pcie_gen_freq[] = {
+       GEN1_CORE_CLK_FREQ,
+       GEN2_CORE_CLK_FREQ,
+       GEN3_CORE_CLK_FREQ,
+       GEN4_CORE_CLK_FREQ
+};
+
+static const u32 event_cntr_ctrl_offset[] = {
+       0x1d8,
+       0x1a8,
+       0x1a8,
+       0x1a8,
+       0x1c4,
+       0x1d8
+};
+
+static const u32 event_cntr_data_offset[] = {
+       0x1dc,
+       0x1ac,
+       0x1ac,
+       0x1ac,
+       0x1c8,
+       0x1dc
+};
+
+struct tegra_pcie_dw {
+       struct device *dev;
+       struct resource *appl_res;
+       struct resource *dbi_res;
+       struct resource *atu_dma_res;
+       void __iomem *appl_base;
+       struct clk *core_clk;
+       struct reset_control *core_apb_rst;
+       struct reset_control *core_rst;
+       struct dw_pcie pci;
+       struct tegra_bpmp *bpmp;
+
+       bool supports_clkreq;
+       bool enable_cdm_check;
+       bool link_state;
+       bool update_fc_fixup;
+       u8 init_link_width;
+       u32 msi_ctrl_int;
+       u32 num_lanes;
+       u32 max_speed;
+       u32 cid;
+       u32 cfg_link_cap_l1sub;
+       u32 pcie_cap_base;
+       u32 aspm_cmrt;
+       u32 aspm_pwr_on_t;
+       u32 aspm_l0s_enter_lat;
+
+       struct regulator *pex_ctl_supply;
+       struct regulator *slot_ctl_3v3;
+       struct regulator *slot_ctl_12v;
+
+       unsigned int phy_count;
+       struct phy **phys;
+
+       struct dentry *debugfs;
+};
+
+static inline struct tegra_pcie_dw *to_tegra_pcie(struct dw_pcie *pci)
+{
+       return container_of(pci, struct tegra_pcie_dw, pci);
+}
+
+static inline void appl_writel(struct tegra_pcie_dw *pcie, const u32 value,
+                              const u32 reg)
+{
+       writel_relaxed(value, pcie->appl_base + reg);
+}
+
+static inline u32 appl_readl(struct tegra_pcie_dw *pcie, const u32 reg)
+{
+       return readl_relaxed(pcie->appl_base + reg);
+}
+
+struct tegra_pcie_soc {
+       enum dw_pcie_device_mode mode;
+};
+
+static void apply_bad_link_workaround(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+       u32 current_link_width;
+       u16 val;
+
+       /*
+        * NOTE:- Since this scenario is uncommon and link as such is not
+        * stable anyway, not waiting to confirm if link is really
+        * transitioning to Gen-2 speed
+        */
+       val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA);
+       if (val & PCI_EXP_LNKSTA_LBMS) {
+               current_link_width = (val & PCI_EXP_LNKSTA_NLW) >>
+                                    PCI_EXP_LNKSTA_NLW_SHIFT;
+               if (pcie->init_link_width > current_link_width) {
+                       dev_warn(pci->dev, "PCIe link is bad, width reduced\n");
+                       val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base +
+                                               PCI_EXP_LNKCTL2);
+                       val &= ~PCI_EXP_LNKCTL2_TLS;
+                       val |= PCI_EXP_LNKCTL2_TLS_2_5GT;
+                       dw_pcie_writew_dbi(pci, pcie->pcie_cap_base +
+                                          PCI_EXP_LNKCTL2, val);
+
+                       val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base +
+                                               PCI_EXP_LNKCTL);
+                       val |= PCI_EXP_LNKCTL_RL;
+                       dw_pcie_writew_dbi(pci, pcie->pcie_cap_base +
+                                          PCI_EXP_LNKCTL, val);
+               }
+       }
+}
+
+static irqreturn_t tegra_pcie_rp_irq_handler(struct tegra_pcie_dw *pcie)
+{
+       struct dw_pcie *pci = &pcie->pci;
+       struct pcie_port *pp = &pci->pp;
+       u32 val, tmp;
+       u16 val_w;
+
+       val = appl_readl(pcie, APPL_INTR_STATUS_L0);
+       if (val & APPL_INTR_STATUS_L0_LINK_STATE_INT) {
+               val = appl_readl(pcie, APPL_INTR_STATUS_L1_0_0);
+               if (val & APPL_INTR_STATUS_L1_0_0_LINK_REQ_RST_NOT_CHGED) {
+                       appl_writel(pcie, val, APPL_INTR_STATUS_L1_0_0);
+
+                       /* SBR & Surprise Link Down WAR */
+                       val = appl_readl(pcie, APPL_CAR_RESET_OVRD);
+                       val &= ~APPL_CAR_RESET_OVRD_CYA_OVERRIDE_CORE_RST_N;
+                       appl_writel(pcie, val, APPL_CAR_RESET_OVRD);
+                       udelay(1);
+                       val = appl_readl(pcie, APPL_CAR_RESET_OVRD);
+                       val |= APPL_CAR_RESET_OVRD_CYA_OVERRIDE_CORE_RST_N;
+                       appl_writel(pcie, val, APPL_CAR_RESET_OVRD);
+
+                       val = dw_pcie_readl_dbi(pci, PORT_LOGIC_GEN2_CTRL);
+                       val |= PORT_LOGIC_GEN2_CTRL_DIRECT_SPEED_CHANGE;
+                       dw_pcie_writel_dbi(pci, PORT_LOGIC_GEN2_CTRL, val);
+               }
+       }
+
+       if (val & APPL_INTR_STATUS_L0_INT_INT) {
+               val = appl_readl(pcie, APPL_INTR_STATUS_L1_8_0);
+               if (val & APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS) {
+                       appl_writel(pcie,
+                                   APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS,
+                                   APPL_INTR_STATUS_L1_8_0);
+                       apply_bad_link_workaround(pp);
+               }
+               if (val & APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS) {
+                       appl_writel(pcie,
+                                   APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS,
+                                   APPL_INTR_STATUS_L1_8_0);
+
+                       val_w = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base +
+                                                 PCI_EXP_LNKSTA);
+                       dev_dbg(pci->dev, "Link Speed : Gen-%u\n", val_w &
+                               PCI_EXP_LNKSTA_CLS);
+               }
+       }
+
+       val = appl_readl(pcie, APPL_INTR_STATUS_L0);
+       if (val & APPL_INTR_STATUS_L0_CDM_REG_CHK_INT) {
+               val = appl_readl(pcie, APPL_INTR_STATUS_L1_18);
+               tmp = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS);
+               if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMPLT) {
+                       dev_info(pci->dev, "CDM check complete\n");
+                       tmp |= PCIE_PL_CHK_REG_CHK_REG_COMPLETE;
+               }
+               if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMP_ERR) {
+                       dev_err(pci->dev, "CDM comparison mismatch\n");
+                       tmp |= PCIE_PL_CHK_REG_CHK_REG_COMPARISON_ERROR;
+               }
+               if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_LOGIC_ERR) {
+                       dev_err(pci->dev, "CDM Logic error\n");
+                       tmp |= PCIE_PL_CHK_REG_CHK_REG_LOGIC_ERROR;
+               }
+               dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, tmp);
+               tmp = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_ERR_ADDR);
+               dev_err(pci->dev, "CDM Error Address Offset = 0x%08X\n", tmp);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t tegra_pcie_irq_handler(int irq, void *arg)
+{
+       struct tegra_pcie_dw *pcie = arg;
+
+       return tegra_pcie_rp_irq_handler(pcie);
+}
+
+static int tegra_pcie_dw_rd_own_conf(struct pcie_port *pp, int where, int size,
+                                    u32 *val)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+
+       /*
+        * This is an endpoint mode specific register happen to appear even
+        * when controller is operating in root port mode and system hangs
+        * when it is accessed with link being in ASPM-L1 state.
+        * So skip accessing it altogether
+        */
+       if (where == PORT_LOGIC_MSIX_DOORBELL) {
+               *val = 0x00000000;
+               return PCIBIOS_SUCCESSFUL;
+       }
+
+       return dw_pcie_read(pci->dbi_base + where, size, val);
+}
+
+static int tegra_pcie_dw_wr_own_conf(struct pcie_port *pp, int where, int size,
+                                    u32 val)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+
+       /*
+        * This is an endpoint mode specific register happen to appear even
+        * when controller is operating in root port mode and system hangs
+        * when it is accessed with link being in ASPM-L1 state.
+        * So skip accessing it altogether
+        */
+       if (where == PORT_LOGIC_MSIX_DOORBELL)
+               return PCIBIOS_SUCCESSFUL;
+
+       return dw_pcie_write(pci->dbi_base + where, size, val);
+}
+
+#if defined(CONFIG_PCIEASPM)
+static void disable_aspm_l11(struct tegra_pcie_dw *pcie)
+{
+       u32 val;
+
+       val = dw_pcie_readl_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub);
+       val &= ~PCI_L1SS_CAP_ASPM_L1_1;
+       dw_pcie_writel_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub, val);
+}
+
+static void disable_aspm_l12(struct tegra_pcie_dw *pcie)
+{
+       u32 val;
+
+       val = dw_pcie_readl_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub);
+       val &= ~PCI_L1SS_CAP_ASPM_L1_2;
+       dw_pcie_writel_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub, val);
+}
+
+static inline u32 event_counter_prog(struct tegra_pcie_dw *pcie, u32 event)
+{
+       u32 val;
+
+       val = dw_pcie_readl_dbi(&pcie->pci, event_cntr_ctrl_offset[pcie->cid]);
+       val &= ~(EVENT_COUNTER_EVENT_SEL_MASK << EVENT_COUNTER_EVENT_SEL_SHIFT);
+       val |= EVENT_COUNTER_GROUP_5 << EVENT_COUNTER_GROUP_SEL_SHIFT;
+       val |= event << EVENT_COUNTER_EVENT_SEL_SHIFT;
+       val |= EVENT_COUNTER_ENABLE_ALL << EVENT_COUNTER_ENABLE_SHIFT;
+       dw_pcie_writel_dbi(&pcie->pci, event_cntr_ctrl_offset[pcie->cid], val);
+       val = dw_pcie_readl_dbi(&pcie->pci, event_cntr_data_offset[pcie->cid]);
+
+       return val;
+}
+
+static int aspm_state_cnt(struct seq_file *s, void *data)
+{
+       struct tegra_pcie_dw *pcie = (struct tegra_pcie_dw *)
+                                    dev_get_drvdata(s->private);
+       u32 val;
+
+       seq_printf(s, "Tx L0s entry count : %u\n",
+                  event_counter_prog(pcie, EVENT_COUNTER_EVENT_Tx_L0S));
+
+       seq_printf(s, "Rx L0s entry count : %u\n",
+                  event_counter_prog(pcie, EVENT_COUNTER_EVENT_Rx_L0S));
+
+       seq_printf(s, "Link L1 entry count : %u\n",
+                  event_counter_prog(pcie, EVENT_COUNTER_EVENT_L1));
+
+       seq_printf(s, "Link L1.1 entry count : %u\n",
+                  event_counter_prog(pcie, EVENT_COUNTER_EVENT_L1_1));
+
+       seq_printf(s, "Link L1.2 entry count : %u\n",
+                  event_counter_prog(pcie, EVENT_COUNTER_EVENT_L1_2));
+
+       /* Clear all counters */
+       dw_pcie_writel_dbi(&pcie->pci, event_cntr_ctrl_offset[pcie->cid],
+                          EVENT_COUNTER_ALL_CLEAR);
+
+       /* Re-enable counting */
+       val = EVENT_COUNTER_ENABLE_ALL << EVENT_COUNTER_ENABLE_SHIFT;
+       val |= EVENT_COUNTER_GROUP_5 << EVENT_COUNTER_GROUP_SEL_SHIFT;
+       dw_pcie_writel_dbi(&pcie->pci, event_cntr_ctrl_offset[pcie->cid], val);
+
+       return 0;
+}
+
+static void init_host_aspm(struct tegra_pcie_dw *pcie)
+{
+       struct dw_pcie *pci = &pcie->pci;
+       u32 val;
+
+       val = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_L1SS);
+       pcie->cfg_link_cap_l1sub = val + PCI_L1SS_CAP;
+
+       /* Enable ASPM counters */
+       val = EVENT_COUNTER_ENABLE_ALL << EVENT_COUNTER_ENABLE_SHIFT;
+       val |= EVENT_COUNTER_GROUP_5 << EVENT_COUNTER_GROUP_SEL_SHIFT;
+       dw_pcie_writel_dbi(pci, event_cntr_ctrl_offset[pcie->cid], val);
+
+       /* Program T_cmrt and T_pwr_on values */
+       val = dw_pcie_readl_dbi(pci, pcie->cfg_link_cap_l1sub);
+       val &= ~(PCI_L1SS_CAP_CM_RESTORE_TIME | PCI_L1SS_CAP_P_PWR_ON_VALUE);
+       val |= (pcie->aspm_cmrt << 8);
+       val |= (pcie->aspm_pwr_on_t << 19);
+       dw_pcie_writel_dbi(pci, pcie->cfg_link_cap_l1sub, val);
+
+       /* Program L0s and L1 entrance latencies */
+       val = dw_pcie_readl_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL);
+       val &= ~L0S_ENTRANCE_LAT_MASK;
+       val |= (pcie->aspm_l0s_enter_lat << L0S_ENTRANCE_LAT_SHIFT);
+       val |= ENTER_ASPM;
+       dw_pcie_writel_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL, val);
+}
+
+static int init_debugfs(struct tegra_pcie_dw *pcie)
+{
+       struct dentry *d;
+
+       d = debugfs_create_devm_seqfile(pcie->dev, "aspm_state_cnt",
+                                       pcie->debugfs, aspm_state_cnt);
+       if (IS_ERR_OR_NULL(d))
+               dev_err(pcie->dev,
+                       "Failed to create debugfs file \"aspm_state_cnt\"\n");
+
+       return 0;
+}
+#else
+static inline void disable_aspm_l12(struct tegra_pcie_dw *pcie) { return; }
+static inline void disable_aspm_l11(struct tegra_pcie_dw *pcie) { return; }
+static inline void init_host_aspm(struct tegra_pcie_dw *pcie) { return; }
+static inline int init_debugfs(struct tegra_pcie_dw *pcie) { return 0; }
+#endif
+
+static void tegra_pcie_enable_system_interrupts(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+       u32 val;
+       u16 val_w;
+
+       val = appl_readl(pcie, APPL_INTR_EN_L0_0);
+       val |= APPL_INTR_EN_L0_0_LINK_STATE_INT_EN;
+       appl_writel(pcie, val, APPL_INTR_EN_L0_0);
+
+       val = appl_readl(pcie, APPL_INTR_EN_L1_0_0);
+       val |= APPL_INTR_EN_L1_0_0_LINK_REQ_RST_NOT_INT_EN;
+       appl_writel(pcie, val, APPL_INTR_EN_L1_0_0);
+
+       if (pcie->enable_cdm_check) {
+               val = appl_readl(pcie, APPL_INTR_EN_L0_0);
+               val |= APPL_INTR_EN_L0_0_CDM_REG_CHK_INT_EN;
+               appl_writel(pcie, val, APPL_INTR_EN_L0_0);
+
+               val = appl_readl(pcie, APPL_INTR_EN_L1_18);
+               val |= APPL_INTR_EN_L1_18_CDM_REG_CHK_CMP_ERR;
+               val |= APPL_INTR_EN_L1_18_CDM_REG_CHK_LOGIC_ERR;
+               appl_writel(pcie, val, APPL_INTR_EN_L1_18);
+       }
+
+       val_w = dw_pcie_readw_dbi(&pcie->pci, pcie->pcie_cap_base +
+                                 PCI_EXP_LNKSTA);
+       pcie->init_link_width = (val_w & PCI_EXP_LNKSTA_NLW) >>
+                               PCI_EXP_LNKSTA_NLW_SHIFT;
+
+       val_w = dw_pcie_readw_dbi(&pcie->pci, pcie->pcie_cap_base +
+                                 PCI_EXP_LNKCTL);
+       val_w |= PCI_EXP_LNKCTL_LBMIE;
+       dw_pcie_writew_dbi(&pcie->pci, pcie->pcie_cap_base + PCI_EXP_LNKCTL,
+                          val_w);
+}
+
+static void tegra_pcie_enable_legacy_interrupts(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+       u32 val;
+
+       /* Enable legacy interrupt generation */
+       val = appl_readl(pcie, APPL_INTR_EN_L0_0);
+       val |= APPL_INTR_EN_L0_0_SYS_INTR_EN;
+       val |= APPL_INTR_EN_L0_0_INT_INT_EN;
+       appl_writel(pcie, val, APPL_INTR_EN_L0_0);
+
+       val = appl_readl(pcie, APPL_INTR_EN_L1_8_0);
+       val |= APPL_INTR_EN_L1_8_INTX_EN;
+       val |= APPL_INTR_EN_L1_8_AUTO_BW_INT_EN;
+       val |= APPL_INTR_EN_L1_8_BW_MGT_INT_EN;
+       if (IS_ENABLED(CONFIG_PCIEAER))
+               val |= APPL_INTR_EN_L1_8_AER_INT_EN;
+       appl_writel(pcie, val, APPL_INTR_EN_L1_8_0);
+}
+
+static void tegra_pcie_enable_msi_interrupts(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+       u32 val;
+
+       dw_pcie_msi_init(pp);
+
+       /* Enable MSI interrupt generation */
+       val = appl_readl(pcie, APPL_INTR_EN_L0_0);
+       val |= APPL_INTR_EN_L0_0_SYS_MSI_INTR_EN;
+       val |= APPL_INTR_EN_L0_0_MSI_RCV_INT_EN;
+       appl_writel(pcie, val, APPL_INTR_EN_L0_0);
+}
+
+static void tegra_pcie_enable_interrupts(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+
+       /* Clear interrupt statuses before enabling interrupts */
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L0);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_0_0);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_1);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_2);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_3);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_6);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_7);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_8_0);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_9);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_10);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_11);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_13);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_14);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_15);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_17);
+
+       tegra_pcie_enable_system_interrupts(pp);
+       tegra_pcie_enable_legacy_interrupts(pp);
+       if (IS_ENABLED(CONFIG_PCI_MSI))
+               tegra_pcie_enable_msi_interrupts(pp);
+}
+
+static void config_gen3_gen4_eq_presets(struct tegra_pcie_dw *pcie)
+{
+       struct dw_pcie *pci = &pcie->pci;
+       u32 val, offset, i;
+
+       /* Program init preset */
+       for (i = 0; i < pcie->num_lanes; i++) {
+               dw_pcie_read(pci->dbi_base + CAP_SPCIE_CAP_OFF
+                                + (i * 2), 2, &val);
+               val &= ~CAP_SPCIE_CAP_OFF_DSP_TX_PRESET0_MASK;
+               val |= GEN3_GEN4_EQ_PRESET_INIT;
+               val &= ~CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_MASK;
+               val |= (GEN3_GEN4_EQ_PRESET_INIT <<
+                          CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_SHIFT);
+               dw_pcie_write(pci->dbi_base + CAP_SPCIE_CAP_OFF
+                                + (i * 2), 2, val);
+
+               offset = dw_pcie_find_ext_capability(pci,
+                                                    PCI_EXT_CAP_ID_PL_16GT) +
+                               PCI_PL_16GT_LE_CTRL;
+               dw_pcie_read(pci->dbi_base + offset + i, 1, &val);
+               val &= ~PCI_PL_16GT_LE_CTRL_DSP_TX_PRESET_MASK;
+               val |= GEN3_GEN4_EQ_PRESET_INIT;
+               val &= ~PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK;
+               val |= (GEN3_GEN4_EQ_PRESET_INIT <<
+                       PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT);
+               dw_pcie_write(pci->dbi_base + offset + i, 1, val);
+       }
+
+       val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+       val &= ~GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK;
+       dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+
+       val = dw_pcie_readl_dbi(pci, GEN3_EQ_CONTROL_OFF);
+       val &= ~GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_MASK;
+       val |= (0x3ff << GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_SHIFT);
+       val &= ~GEN3_EQ_CONTROL_OFF_FB_MODE_MASK;
+       dw_pcie_writel_dbi(pci, GEN3_EQ_CONTROL_OFF, val);
+
+       val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+       val &= ~GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK;
+       val |= (0x1 << GEN3_RELATED_OFF_RATE_SHADOW_SEL_SHIFT);
+       dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+
+       val = dw_pcie_readl_dbi(pci, GEN3_EQ_CONTROL_OFF);
+       val &= ~GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_MASK;
+       val |= (0x360 << GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_SHIFT);
+       val &= ~GEN3_EQ_CONTROL_OFF_FB_MODE_MASK;
+       dw_pcie_writel_dbi(pci, GEN3_EQ_CONTROL_OFF, val);
+
+       val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+       val &= ~GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK;
+       dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+}
+
+static void tegra_pcie_prepare_host(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+       u32 val;
+
+       val = dw_pcie_readl_dbi(pci, PCI_IO_BASE);
+       val &= ~(IO_BASE_IO_DECODE | IO_BASE_IO_DECODE_BIT8);
+       dw_pcie_writel_dbi(pci, PCI_IO_BASE, val);
+
+       val = dw_pcie_readl_dbi(pci, PCI_PREF_MEMORY_BASE);
+       val |= CFG_PREF_MEM_LIMIT_BASE_MEM_DECODE;
+       val |= CFG_PREF_MEM_LIMIT_BASE_MEM_LIMIT_DECODE;
+       dw_pcie_writel_dbi(pci, PCI_PREF_MEMORY_BASE, val);
+
+       dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0);
+
+       /* Configure FTS */
+       val = dw_pcie_readl_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL);
+       val &= ~(N_FTS_MASK << N_FTS_SHIFT);
+       val |= N_FTS_VAL << N_FTS_SHIFT;
+       dw_pcie_writel_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL, val);
+
+       val = dw_pcie_readl_dbi(pci, PORT_LOGIC_GEN2_CTRL);
+       val &= ~FTS_MASK;
+       val |= FTS_VAL;
+       dw_pcie_writel_dbi(pci, PORT_LOGIC_GEN2_CTRL, val);
+
+       /* Enable as 0xFFFF0001 response for CRS */
+       val = dw_pcie_readl_dbi(pci, PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT);
+       val &= ~(AMBA_ERROR_RESPONSE_CRS_MASK << AMBA_ERROR_RESPONSE_CRS_SHIFT);
+       val |= (AMBA_ERROR_RESPONSE_CRS_OKAY_FFFF0001 <<
+               AMBA_ERROR_RESPONSE_CRS_SHIFT);
+       dw_pcie_writel_dbi(pci, PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT, val);
+
+       /* Configure Max Speed from DT */
+       if (pcie->max_speed && pcie->max_speed != -EINVAL) {
+               val = dw_pcie_readl_dbi(pci, pcie->pcie_cap_base +
+                                       PCI_EXP_LNKCAP);
+               val &= ~PCI_EXP_LNKCAP_SLS;
+               val |= pcie->max_speed;
+               dw_pcie_writel_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKCAP,
+                                  val);
+       }
+
+       /* Configure Max lane width from DT */
+       val = dw_pcie_readl_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKCAP);
+       val &= ~PCI_EXP_LNKCAP_MLW;
+       val |= (pcie->num_lanes << PCI_EXP_LNKSTA_NLW_SHIFT);
+       dw_pcie_writel_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKCAP, val);
+
+       config_gen3_gen4_eq_presets(pcie);
+
+       init_host_aspm(pcie);
+
+       val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+       val &= ~GEN3_RELATED_OFF_GEN3_ZRXDC_NONCOMPL;
+       dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+
+       if (pcie->update_fc_fixup) {
+               val = dw_pcie_readl_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF);
+               val |= 0x1 << CFG_TIMER_CTRL_ACK_NAK_SHIFT;
+               dw_pcie_writel_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF, val);
+       }
+
+       dw_pcie_setup_rc(pp);
+
+       clk_set_rate(pcie->core_clk, GEN4_CORE_CLK_FREQ);
+
+       /* Assert RST */
+       val = appl_readl(pcie, APPL_PINMUX);
+       val &= ~APPL_PINMUX_PEX_RST;
+       appl_writel(pcie, val, APPL_PINMUX);
+
+       usleep_range(100, 200);
+
+       /* Enable LTSSM */
+       val = appl_readl(pcie, APPL_CTRL);
+       val |= APPL_CTRL_LTSSM_EN;
+       appl_writel(pcie, val, APPL_CTRL);
+
+       /* De-assert RST */
+       val = appl_readl(pcie, APPL_PINMUX);
+       val |= APPL_PINMUX_PEX_RST;
+       appl_writel(pcie, val, APPL_PINMUX);
+
+       msleep(100);
+}
+
+static int tegra_pcie_dw_host_init(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+       u32 val, tmp, offset, speed;
+
+       tegra_pcie_prepare_host(pp);
+
+       if (dw_pcie_wait_for_link(pci)) {
+               /*
+                * There are some endpoints which can't get the link up if
+                * root port has Data Link Feature (DLF) enabled.
+                * Refer Spec rev 4.0 ver 1.0 sec 3.4.2 & 7.7.4 for more info
+                * on Scaled Flow Control and DLF.
+                * So, need to confirm that is indeed the case here and attempt
+                * link up once again with DLF disabled.
+                */
+               val = appl_readl(pcie, APPL_DEBUG);
+               val &= APPL_DEBUG_LTSSM_STATE_MASK;
+               val >>= APPL_DEBUG_LTSSM_STATE_SHIFT;
+               tmp = appl_readl(pcie, APPL_LINK_STATUS);
+               tmp &= APPL_LINK_STATUS_RDLH_LINK_UP;
+               if (!(val == 0x11 && !tmp)) {
+                       /* Link is down for all good reasons */
+                       return 0;
+               }
+
+               dev_info(pci->dev, "Link is down in DLL");
+               dev_info(pci->dev, "Trying again with DLFE disabled\n");
+               /* Disable LTSSM */
+               val = appl_readl(pcie, APPL_CTRL);
+               val &= ~APPL_CTRL_LTSSM_EN;
+               appl_writel(pcie, val, APPL_CTRL);
+
+               reset_control_assert(pcie->core_rst);
+               reset_control_deassert(pcie->core_rst);
+
+               offset = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_DLF);
+               val = dw_pcie_readl_dbi(pci, offset + PCI_DLF_CAP);
+               val &= ~PCI_DLF_EXCHANGE_ENABLE;
+               dw_pcie_writel_dbi(pci, offset, val);
+
+               tegra_pcie_prepare_host(pp);
+
+               if (dw_pcie_wait_for_link(pci))
+                       return 0;
+       }
+
+       speed = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA) &
+               PCI_EXP_LNKSTA_CLS;
+       clk_set_rate(pcie->core_clk, pcie_gen_freq[speed - 1]);
+
+       tegra_pcie_enable_interrupts(pp);
+
+       return 0;
+}
+
+static int tegra_pcie_dw_link_up(struct dw_pcie *pci)
+{
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+       u32 val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA);
+
+       return !!(val & PCI_EXP_LNKSTA_DLLLA);
+}
+
+static void tegra_pcie_set_msi_vec_num(struct pcie_port *pp)
+{
+       pp->num_vectors = MAX_MSI_IRQS;
+}
+
+static const struct dw_pcie_ops tegra_dw_pcie_ops = {
+       .link_up = tegra_pcie_dw_link_up,
+};
+
+static struct dw_pcie_host_ops tegra_pcie_dw_host_ops = {
+       .rd_own_conf = tegra_pcie_dw_rd_own_conf,
+       .wr_own_conf = tegra_pcie_dw_wr_own_conf,
+       .host_init = tegra_pcie_dw_host_init,
+       .set_num_vectors = tegra_pcie_set_msi_vec_num,
+};
+
+static void tegra_pcie_disable_phy(struct tegra_pcie_dw *pcie)
+{
+       unsigned int phy_count = pcie->phy_count;
+
+       while (phy_count--) {
+               phy_power_off(pcie->phys[phy_count]);
+               phy_exit(pcie->phys[phy_count]);
+       }
+}
+
+static int tegra_pcie_enable_phy(struct tegra_pcie_dw *pcie)
+{
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < pcie->phy_count; i++) {
+               ret = phy_init(pcie->phys[i]);
+               if (ret < 0)
+                       goto phy_power_off;
+
+               ret = phy_power_on(pcie->phys[i]);
+               if (ret < 0)
+                       goto phy_exit;
+       }
+
+       return 0;
+
+phy_power_off:
+       while (i--) {
+               phy_power_off(pcie->phys[i]);
+phy_exit:
+               phy_exit(pcie->phys[i]);
+       }
+
+       return ret;
+}
+
+static int tegra_pcie_dw_parse_dt(struct tegra_pcie_dw *pcie)
+{
+       struct device_node *np = pcie->dev->of_node;
+       int ret;
+
+       ret = of_property_read_u32(np, "nvidia,aspm-cmrt-us", &pcie->aspm_cmrt);
+       if (ret < 0) {
+               dev_info(pcie->dev, "Failed to read ASPM T_cmrt: %d\n", ret);
+               return ret;
+       }
+
+       ret = of_property_read_u32(np, "nvidia,aspm-pwr-on-t-us",
+                                  &pcie->aspm_pwr_on_t);
+       if (ret < 0)
+               dev_info(pcie->dev, "Failed to read ASPM Power On time: %d\n",
+                        ret);
+
+       ret = of_property_read_u32(np, "nvidia,aspm-l0s-entrance-latency-us",
+                                  &pcie->aspm_l0s_enter_lat);
+       if (ret < 0)
+               dev_info(pcie->dev,
+                        "Failed to read ASPM L0s Entrance latency: %d\n", ret);
+
+       ret = of_property_read_u32(np, "num-lanes", &pcie->num_lanes);
+       if (ret < 0) {
+               dev_err(pcie->dev, "Failed to read num-lanes: %d\n", ret);
+               return ret;
+       }
+
+       pcie->max_speed = of_pci_get_max_link_speed(np);
+
+       ret = of_property_read_u32_index(np, "nvidia,bpmp", 1, &pcie->cid);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to read Controller-ID: %d\n", ret);
+               return ret;
+       }
+
+       ret = of_property_count_strings(np, "phy-names");
+       if (ret < 0) {
+               dev_err(pcie->dev, "Failed to find PHY entries: %d\n",
+                       ret);
+               return ret;
+       }
+       pcie->phy_count = ret;
+
+       if (of_property_read_bool(np, "nvidia,update-fc-fixup"))
+               pcie->update_fc_fixup = true;
+
+       pcie->supports_clkreq =
+               of_property_read_bool(pcie->dev->of_node, "supports-clkreq");
+
+       pcie->enable_cdm_check =
+               of_property_read_bool(np, "snps,enable-cdm-check");
+
+       return 0;
+}
+
+static int tegra_pcie_bpmp_set_ctrl_state(struct tegra_pcie_dw *pcie,
+                                         bool enable)
+{
+       struct mrq_uphy_response resp;
+       struct tegra_bpmp_message msg;
+       struct mrq_uphy_request req;
+
+       /* Controller-5 doesn't need to have its state set by BPMP-FW */
+       if (pcie->cid == 5)
+               return 0;
+
+       memset(&req, 0, sizeof(req));
+       memset(&resp, 0, sizeof(resp));
+
+       req.cmd = CMD_UPHY_PCIE_CONTROLLER_STATE;
+       req.controller_state.pcie_controller = pcie->cid;
+       req.controller_state.enable = enable;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.mrq = MRQ_UPHY;
+       msg.tx.data = &req;
+       msg.tx.size = sizeof(req);
+       msg.rx.data = &resp;
+       msg.rx.size = sizeof(resp);
+
+       return tegra_bpmp_transfer(pcie->bpmp, &msg);
+}
+
+static void tegra_pcie_downstream_dev_to_D0(struct tegra_pcie_dw *pcie)
+{
+       struct pcie_port *pp = &pcie->pci.pp;
+       struct pci_bus *child, *root_bus = NULL;
+       struct pci_dev *pdev;
+
+       /*
+        * link doesn't go into L2 state with some of the endpoints with Tegra
+        * if they are not in D0 state. So, need to make sure that immediate
+        * downstream devices are in D0 state before sending PME_TurnOff to put
+        * link into L2 state.
+        * This is as per PCI Express Base r4.0 v1.0 September 27-2017,
+        * 5.2 Link State Power Management (Page #428).
+        */
+
+       list_for_each_entry(child, &pp->root_bus->children, node) {
+               /* Bring downstream devices to D0 if they are not already in */
+               if (child->parent == pp->root_bus) {
+                       root_bus = child;
+                       break;
+               }
+       }
+
+       if (!root_bus) {
+               dev_err(pcie->dev, "Failed to find downstream devices\n");
+               return;
+       }
+
+       list_for_each_entry(pdev, &root_bus->devices, bus_list) {
+               if (PCI_SLOT(pdev->devfn) == 0) {
+                       if (pci_set_power_state(pdev, PCI_D0))
+                               dev_err(pcie->dev,
+                                       "Failed to transition %s to D0 state\n",
+                                       dev_name(&pdev->dev));
+               }
+       }
+}
+
+static int tegra_pcie_get_slot_regulators(struct tegra_pcie_dw *pcie)
+{
+       pcie->slot_ctl_3v3 = devm_regulator_get_optional(pcie->dev, "vpcie3v3");
+       if (IS_ERR(pcie->slot_ctl_3v3)) {
+               if (PTR_ERR(pcie->slot_ctl_3v3) != -ENODEV)
+                       return PTR_ERR(pcie->slot_ctl_3v3);
+
+               pcie->slot_ctl_3v3 = NULL;
+       }
+
+       pcie->slot_ctl_12v = devm_regulator_get_optional(pcie->dev, "vpcie12v");
+       if (IS_ERR(pcie->slot_ctl_12v)) {
+               if (PTR_ERR(pcie->slot_ctl_12v) != -ENODEV)
+                       return PTR_ERR(pcie->slot_ctl_12v);
+
+               pcie->slot_ctl_12v = NULL;
+       }
+
+       return 0;
+}
+
+static int tegra_pcie_enable_slot_regulators(struct tegra_pcie_dw *pcie)
+{
+       int ret;
+
+       if (pcie->slot_ctl_3v3) {
+               ret = regulator_enable(pcie->slot_ctl_3v3);
+               if (ret < 0) {
+                       dev_err(pcie->dev,
+                               "Failed to enable 3.3V slot supply: %d\n", ret);
+                       return ret;
+               }
+       }
+
+       if (pcie->slot_ctl_12v) {
+               ret = regulator_enable(pcie->slot_ctl_12v);
+               if (ret < 0) {
+                       dev_err(pcie->dev,
+                               "Failed to enable 12V slot supply: %d\n", ret);
+                       goto fail_12v_enable;
+               }
+       }
+
+       /*
+        * According to PCI Express Card Electromechanical Specification
+        * Revision 1.1, Table-2.4, T_PVPERL (Power stable to PERST# inactive)
+        * should be a minimum of 100ms.
+        */
+       if (pcie->slot_ctl_3v3 || pcie->slot_ctl_12v)
+               msleep(100);
+
+       return 0;
+
+fail_12v_enable:
+       if (pcie->slot_ctl_3v3)
+               regulator_disable(pcie->slot_ctl_3v3);
+       return ret;
+}
+
+static void tegra_pcie_disable_slot_regulators(struct tegra_pcie_dw *pcie)
+{
+       if (pcie->slot_ctl_12v)
+               regulator_disable(pcie->slot_ctl_12v);
+       if (pcie->slot_ctl_3v3)
+               regulator_disable(pcie->slot_ctl_3v3);
+}
+
+static int tegra_pcie_config_controller(struct tegra_pcie_dw *pcie,
+                                       bool en_hw_hot_rst)
+{
+       int ret;
+       u32 val;
+
+       ret = tegra_pcie_bpmp_set_ctrl_state(pcie, true);
+       if (ret) {
+               dev_err(pcie->dev,
+                       "Failed to enable controller %u: %d\n", pcie->cid, ret);
+               return ret;
+       }
+
+       ret = tegra_pcie_enable_slot_regulators(pcie);
+       if (ret < 0)
+               goto fail_slot_reg_en;
+
+       ret = regulator_enable(pcie->pex_ctl_supply);
+       if (ret < 0) {
+               dev_err(pcie->dev, "Failed to enable regulator: %d\n", ret);
+               goto fail_reg_en;
+       }
+
+       ret = clk_prepare_enable(pcie->core_clk);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to enable core clock: %d\n", ret);
+               goto fail_core_clk;
+       }
+
+       ret = reset_control_deassert(pcie->core_apb_rst);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to deassert core APB reset: %d\n",
+                       ret);
+               goto fail_core_apb_rst;
+       }
+
+       if (en_hw_hot_rst) {
+               /* Enable HW_HOT_RST mode */
+               val = appl_readl(pcie, APPL_CTRL);
+               val &= ~(APPL_CTRL_HW_HOT_RST_MODE_MASK <<
+                        APPL_CTRL_HW_HOT_RST_MODE_SHIFT);
+               val |= APPL_CTRL_HW_HOT_RST_EN;
+               appl_writel(pcie, val, APPL_CTRL);
+       }
+
+       ret = tegra_pcie_enable_phy(pcie);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to enable PHY: %d\n", ret);
+               goto fail_phy;
+       }
+
+       /* Update CFG base address */
+       appl_writel(pcie, pcie->dbi_res->start & APPL_CFG_BASE_ADDR_MASK,
+                   APPL_CFG_BASE_ADDR);
+
+       /* Configure this core for RP mode operation */
+       appl_writel(pcie, APPL_DM_TYPE_RP, APPL_DM_TYPE);
+
+       appl_writel(pcie, 0x0, APPL_CFG_SLCG_OVERRIDE);
+
+       val = appl_readl(pcie, APPL_CTRL);
+       appl_writel(pcie, val | APPL_CTRL_SYS_PRE_DET_STATE, APPL_CTRL);
+
+       val = appl_readl(pcie, APPL_CFG_MISC);
+       val |= (APPL_CFG_MISC_ARCACHE_VAL << APPL_CFG_MISC_ARCACHE_SHIFT);
+       appl_writel(pcie, val, APPL_CFG_MISC);
+
+       if (!pcie->supports_clkreq) {
+               val = appl_readl(pcie, APPL_PINMUX);
+               val |= APPL_PINMUX_CLKREQ_OUT_OVRD_EN;
+               val |= APPL_PINMUX_CLKREQ_OUT_OVRD;
+               appl_writel(pcie, val, APPL_PINMUX);
+       }
+
+       /* Update iATU_DMA base address */
+       appl_writel(pcie,
+                   pcie->atu_dma_res->start & APPL_CFG_IATU_DMA_BASE_ADDR_MASK,
+                   APPL_CFG_IATU_DMA_BASE_ADDR);
+
+       reset_control_deassert(pcie->core_rst);
+
+       pcie->pcie_cap_base = dw_pcie_find_capability(&pcie->pci,
+                                                     PCI_CAP_ID_EXP);
+
+       /* Disable ASPM-L1SS advertisement as there is no CLKREQ routing */
+       if (!pcie->supports_clkreq) {
+               disable_aspm_l11(pcie);
+               disable_aspm_l12(pcie);
+       }
+
+       return ret;
+
+fail_phy:
+       reset_control_assert(pcie->core_apb_rst);
+fail_core_apb_rst:
+       clk_disable_unprepare(pcie->core_clk);
+fail_core_clk:
+       regulator_disable(pcie->pex_ctl_supply);
+fail_reg_en:
+       tegra_pcie_disable_slot_regulators(pcie);
+fail_slot_reg_en:
+       tegra_pcie_bpmp_set_ctrl_state(pcie, false);
+
+       return ret;
+}
+
+static int __deinit_controller(struct tegra_pcie_dw *pcie)
+{
+       int ret;
+
+       ret = reset_control_assert(pcie->core_rst);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to assert \"core\" reset: %d\n",
+                       ret);
+               return ret;
+       }
+
+       tegra_pcie_disable_phy(pcie);
+
+       ret = reset_control_assert(pcie->core_apb_rst);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to assert APB reset: %d\n", ret);
+               return ret;
+       }
+
+       clk_disable_unprepare(pcie->core_clk);
+
+       ret = regulator_disable(pcie->pex_ctl_supply);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to disable regulator: %d\n", ret);
+               return ret;
+       }
+
+       tegra_pcie_disable_slot_regulators(pcie);
+
+       ret = tegra_pcie_bpmp_set_ctrl_state(pcie, false);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to disable controller %d: %d\n",
+                       pcie->cid, ret);
+               return ret;
+       }
+
+       return ret;
+}
+
+static int tegra_pcie_init_controller(struct tegra_pcie_dw *pcie)
+{
+       struct dw_pcie *pci = &pcie->pci;
+       struct pcie_port *pp = &pci->pp;
+       int ret;
+
+       ret = tegra_pcie_config_controller(pcie, false);
+       if (ret < 0)
+               return ret;
+
+       pp->ops = &tegra_pcie_dw_host_ops;
+
+       ret = dw_pcie_host_init(pp);
+       if (ret < 0) {
+               dev_err(pcie->dev, "Failed to add PCIe port: %d\n", ret);
+               goto fail_host_init;
+       }
+
+       return 0;
+
+fail_host_init:
+       return __deinit_controller(pcie);
+}
+
+static int tegra_pcie_try_link_l2(struct tegra_pcie_dw *pcie)
+{
+       u32 val;
+
+       if (!tegra_pcie_dw_link_up(&pcie->pci))
+               return 0;
+
+       val = appl_readl(pcie, APPL_RADM_STATUS);
+       val |= APPL_PM_XMT_TURNOFF_STATE;
+       appl_writel(pcie, val, APPL_RADM_STATUS);
+
+       return readl_poll_timeout_atomic(pcie->appl_base + APPL_DEBUG, val,
+                                val & APPL_DEBUG_PM_LINKST_IN_L2_LAT,
+                                1, PME_ACK_TIMEOUT);
+}
+
+static void tegra_pcie_dw_pme_turnoff(struct tegra_pcie_dw *pcie)
+{
+       u32 data;
+       int err;
+
+       if (!tegra_pcie_dw_link_up(&pcie->pci)) {
+               dev_dbg(pcie->dev, "PCIe link is not up...!\n");
+               return;
+       }
+
+       if (tegra_pcie_try_link_l2(pcie)) {
+               dev_info(pcie->dev, "Link didn't transition to L2 state\n");
+               /*
+                * TX lane clock freq will reset to Gen1 only if link is in L2
+                * or detect state.
+                * So apply pex_rst to end point to force RP to go into detect
+                * state
+                */
+               data = appl_readl(pcie, APPL_PINMUX);
+               data &= ~APPL_PINMUX_PEX_RST;
+               appl_writel(pcie, data, APPL_PINMUX);
+
+               err = readl_poll_timeout_atomic(pcie->appl_base + APPL_DEBUG,
+                                               data,
+                                               ((data &
+                                               APPL_DEBUG_LTSSM_STATE_MASK) >>
+                                               APPL_DEBUG_LTSSM_STATE_SHIFT) ==
+                                               LTSSM_STATE_PRE_DETECT,
+                                               1, LTSSM_TIMEOUT);
+               if (err) {
+                       dev_info(pcie->dev, "Link didn't go to detect state\n");
+               } else {
+                       /* Disable LTSSM after link is in detect state */
+                       data = appl_readl(pcie, APPL_CTRL);
+                       data &= ~APPL_CTRL_LTSSM_EN;
+                       appl_writel(pcie, data, APPL_CTRL);
+               }
+       }
+       /*
+        * DBI registers may not be accessible after this as PLL-E would be
+        * down depending on how CLKREQ is pulled by end point
+        */
+       data = appl_readl(pcie, APPL_PINMUX);
+       data |= (APPL_PINMUX_CLKREQ_OVERRIDE_EN | APPL_PINMUX_CLKREQ_OVERRIDE);
+       /* Cut REFCLK to slot */
+       data |= APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE_EN;
+       data &= ~APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE;
+       appl_writel(pcie, data, APPL_PINMUX);
+}
+
+static int tegra_pcie_deinit_controller(struct tegra_pcie_dw *pcie)
+{
+       tegra_pcie_downstream_dev_to_D0(pcie);
+       dw_pcie_host_deinit(&pcie->pci.pp);
+       tegra_pcie_dw_pme_turnoff(pcie);
+
+       return __deinit_controller(pcie);
+}
+
+static int tegra_pcie_config_rp(struct tegra_pcie_dw *pcie)
+{
+       struct pcie_port *pp = &pcie->pci.pp;
+       struct device *dev = pcie->dev;
+       char *name;
+       int ret;
+
+       if (IS_ENABLED(CONFIG_PCI_MSI)) {
+               pp->msi_irq = of_irq_get_byname(dev->of_node, "msi");
+               if (!pp->msi_irq) {
+                       dev_err(dev, "Failed to get MSI interrupt\n");
+                       return -ENODEV;
+               }
+       }
+
+       pm_runtime_enable(dev);
+
+       ret = pm_runtime_get_sync(dev);
+       if (ret < 0) {
+               dev_err(dev, "Failed to get runtime sync for PCIe dev: %d\n",
+                       ret);
+               goto fail_pm_get_sync;
+       }
+
+       ret = pinctrl_pm_select_default_state(dev);
+       if (ret < 0) {
+               dev_err(dev, "Failed to configure sideband pins: %d\n", ret);
+               goto fail_pinctrl;
+       }
+
+       tegra_pcie_init_controller(pcie);
+
+       pcie->link_state = tegra_pcie_dw_link_up(&pcie->pci);
+       if (!pcie->link_state) {
+               ret = -ENOMEDIUM;
+               goto fail_host_init;
+       }
+
+       name = devm_kasprintf(dev, GFP_KERNEL, "%pOFP", dev->of_node);
+       if (!name) {
+               ret = -ENOMEM;
+               goto fail_host_init;
+       }
+
+       pcie->debugfs = debugfs_create_dir(name, NULL);
+       if (!pcie->debugfs)
+               dev_err(dev, "Failed to create debugfs\n");
+       else
+               init_debugfs(pcie);
+
+       return ret;
+
+fail_host_init:
+       tegra_pcie_deinit_controller(pcie);
+fail_pinctrl:
+       pm_runtime_put_sync(dev);
+fail_pm_get_sync:
+       pm_runtime_disable(dev);
+       return ret;
+}
+
+static int tegra_pcie_dw_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct resource *atu_dma_res;
+       struct tegra_pcie_dw *pcie;
+       struct resource *dbi_res;
+       struct pcie_port *pp;
+       struct dw_pcie *pci;
+       struct phy **phys;
+       char *name;
+       int ret;
+       u32 i;
+
+       pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+       if (!pcie)
+               return -ENOMEM;
+
+       pci = &pcie->pci;
+       pci->dev = &pdev->dev;
+       pci->ops = &tegra_dw_pcie_ops;
+       pp = &pci->pp;
+       pcie->dev = &pdev->dev;
+
+       ret = tegra_pcie_dw_parse_dt(pcie);
+       if (ret < 0) {
+               dev_err(dev, "Failed to parse device tree: %d\n", ret);
+               return ret;
+       }
+
+       ret = tegra_pcie_get_slot_regulators(pcie);
+       if (ret < 0) {
+               dev_err(dev, "Failed to get slot regulators: %d\n", ret);
+               return ret;
+       }
+
+       pcie->pex_ctl_supply = devm_regulator_get(dev, "vddio-pex-ctl");
+       if (IS_ERR(pcie->pex_ctl_supply)) {
+               ret = PTR_ERR(pcie->pex_ctl_supply);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(dev, "Failed to get regulator: %ld\n",
+                               PTR_ERR(pcie->pex_ctl_supply));
+               return ret;
+       }
+
+       pcie->core_clk = devm_clk_get(dev, "core");
+       if (IS_ERR(pcie->core_clk)) {
+               dev_err(dev, "Failed to get core clock: %ld\n",
+                       PTR_ERR(pcie->core_clk));
+               return PTR_ERR(pcie->core_clk);
+       }
+
+       pcie->appl_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+                                                     "appl");
+       if (!pcie->appl_res) {
+               dev_err(dev, "Failed to find \"appl\" region\n");
+               return -ENODEV;
+       }
+
+       pcie->appl_base = devm_ioremap_resource(dev, pcie->appl_res);
+       if (IS_ERR(pcie->appl_base))
+               return PTR_ERR(pcie->appl_base);
+
+       pcie->core_apb_rst = devm_reset_control_get(dev, "apb");
+       if (IS_ERR(pcie->core_apb_rst)) {
+               dev_err(dev, "Failed to get APB reset: %ld\n",
+                       PTR_ERR(pcie->core_apb_rst));
+               return PTR_ERR(pcie->core_apb_rst);
+       }
+
+       phys = devm_kcalloc(dev, pcie->phy_count, sizeof(*phys), GFP_KERNEL);
+       if (!phys)
+               return -ENOMEM;
+
+       for (i = 0; i < pcie->phy_count; i++) {
+               name = kasprintf(GFP_KERNEL, "p2u-%u", i);
+               if (!name) {
+                       dev_err(dev, "Failed to create P2U string\n");
+                       return -ENOMEM;
+               }
+               phys[i] = devm_phy_get(dev, name);
+               kfree(name);
+               if (IS_ERR(phys[i])) {
+                       ret = PTR_ERR(phys[i]);
+                       if (ret != -EPROBE_DEFER)
+                               dev_err(dev, "Failed to get PHY: %d\n", ret);
+                       return ret;
+               }
+       }
+
+       pcie->phys = phys;
+
+       dbi_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
+       if (!dbi_res) {
+               dev_err(dev, "Failed to find \"dbi\" region\n");
+               return -ENODEV;
+       }
+       pcie->dbi_res = dbi_res;
+
+       pci->dbi_base = devm_ioremap_resource(dev, dbi_res);
+       if (IS_ERR(pci->dbi_base))
+               return PTR_ERR(pci->dbi_base);
+
+       /* Tegra HW locates DBI2 at a fixed offset from DBI */
+       pci->dbi_base2 = pci->dbi_base + 0x1000;
+
+       atu_dma_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+                                                  "atu_dma");
+       if (!atu_dma_res) {
+               dev_err(dev, "Failed to find \"atu_dma\" region\n");
+               return -ENODEV;
+       }
+       pcie->atu_dma_res = atu_dma_res;
+
+       pci->atu_base = devm_ioremap_resource(dev, atu_dma_res);
+       if (IS_ERR(pci->atu_base))
+               return PTR_ERR(pci->atu_base);
+
+       pcie->core_rst = devm_reset_control_get(dev, "core");
+       if (IS_ERR(pcie->core_rst)) {
+               dev_err(dev, "Failed to get core reset: %ld\n",
+                       PTR_ERR(pcie->core_rst));
+               return PTR_ERR(pcie->core_rst);
+       }
+
+       pp->irq = platform_get_irq_byname(pdev, "intr");
+       if (!pp->irq) {
+               dev_err(dev, "Failed to get \"intr\" interrupt\n");
+               return -ENODEV;
+       }
+
+       ret = devm_request_irq(dev, pp->irq, tegra_pcie_irq_handler,
+                              IRQF_SHARED, "tegra-pcie-intr", pcie);
+       if (ret) {
+               dev_err(dev, "Failed to request IRQ %d: %d\n", pp->irq, ret);
+               return ret;
+       }
+
+       pcie->bpmp = tegra_bpmp_get(dev);
+       if (IS_ERR(pcie->bpmp))
+               return PTR_ERR(pcie->bpmp);
+
+       platform_set_drvdata(pdev, pcie);
+
+       ret = tegra_pcie_config_rp(pcie);
+       if (ret && ret != -ENOMEDIUM)
+               goto fail;
+       else
+               return 0;
+
+fail:
+       tegra_bpmp_put(pcie->bpmp);
+       return ret;
+}
+
+static int tegra_pcie_dw_remove(struct platform_device *pdev)
+{
+       struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
+
+       if (!pcie->link_state)
+               return 0;
+
+       debugfs_remove_recursive(pcie->debugfs);
+       tegra_pcie_deinit_controller(pcie);
+       pm_runtime_put_sync(pcie->dev);
+       pm_runtime_disable(pcie->dev);
+       tegra_bpmp_put(pcie->bpmp);
+
+       return 0;
+}
+
+static int tegra_pcie_dw_suspend_late(struct device *dev)
+{
+       struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+       u32 val;
+
+       if (!pcie->link_state)
+               return 0;
+
+       /* Enable HW_HOT_RST mode */
+       val = appl_readl(pcie, APPL_CTRL);
+       val &= ~(APPL_CTRL_HW_HOT_RST_MODE_MASK <<
+                APPL_CTRL_HW_HOT_RST_MODE_SHIFT);
+       val |= APPL_CTRL_HW_HOT_RST_EN;
+       appl_writel(pcie, val, APPL_CTRL);
+
+       return 0;
+}
+
+static int tegra_pcie_dw_suspend_noirq(struct device *dev)
+{
+       struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+
+       if (!pcie->link_state)
+               return 0;
+
+       /* Save MSI interrupt vector */
+       pcie->msi_ctrl_int = dw_pcie_readl_dbi(&pcie->pci,
+                                              PORT_LOGIC_MSI_CTRL_INT_0_EN);
+       tegra_pcie_downstream_dev_to_D0(pcie);
+       tegra_pcie_dw_pme_turnoff(pcie);
+
+       return __deinit_controller(pcie);
+}
+
+static int tegra_pcie_dw_resume_noirq(struct device *dev)
+{
+       struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+       int ret;
+
+       if (!pcie->link_state)
+               return 0;
+
+       ret = tegra_pcie_config_controller(pcie, true);
+       if (ret < 0)
+               return ret;
+
+       ret = tegra_pcie_dw_host_init(&pcie->pci.pp);
+       if (ret < 0) {
+               dev_err(dev, "Failed to init host: %d\n", ret);
+               goto fail_host_init;
+       }
+
+       /* Restore MSI interrupt vector */
+       dw_pcie_writel_dbi(&pcie->pci, PORT_LOGIC_MSI_CTRL_INT_0_EN,
+                          pcie->msi_ctrl_int);
+
+       return 0;
+
+fail_host_init:
+       return __deinit_controller(pcie);
+}
+
+static int tegra_pcie_dw_resume_early(struct device *dev)
+{
+       struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+       u32 val;
+
+       if (!pcie->link_state)
+               return 0;
+
+       /* Disable HW_HOT_RST mode */
+       val = appl_readl(pcie, APPL_CTRL);
+       val &= ~(APPL_CTRL_HW_HOT_RST_MODE_MASK <<
+                APPL_CTRL_HW_HOT_RST_MODE_SHIFT);
+       val |= APPL_CTRL_HW_HOT_RST_MODE_IMDT_RST <<
+              APPL_CTRL_HW_HOT_RST_MODE_SHIFT;
+       val &= ~APPL_CTRL_HW_HOT_RST_EN;
+       appl_writel(pcie, val, APPL_CTRL);
+
+       return 0;
+}
+
+static void tegra_pcie_dw_shutdown(struct platform_device *pdev)
+{
+       struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
+
+       if (!pcie->link_state)
+               return;
+
+       debugfs_remove_recursive(pcie->debugfs);
+       tegra_pcie_downstream_dev_to_D0(pcie);
+
+       disable_irq(pcie->pci.pp.irq);
+       if (IS_ENABLED(CONFIG_PCI_MSI))
+               disable_irq(pcie->pci.pp.msi_irq);
+
+       tegra_pcie_dw_pme_turnoff(pcie);
+       __deinit_controller(pcie);
+}
+
+static const struct of_device_id tegra_pcie_dw_of_match[] = {
+       {
+               .compatible = "nvidia,tegra194-pcie",
+       },
+       {},
+};
+
+static const struct dev_pm_ops tegra_pcie_dw_pm_ops = {
+       .suspend_late = tegra_pcie_dw_suspend_late,
+       .suspend_noirq = tegra_pcie_dw_suspend_noirq,
+       .resume_noirq = tegra_pcie_dw_resume_noirq,
+       .resume_early = tegra_pcie_dw_resume_early,
+};
+
+static struct platform_driver tegra_pcie_dw_driver = {
+       .probe = tegra_pcie_dw_probe,
+       .remove = tegra_pcie_dw_remove,
+       .shutdown = tegra_pcie_dw_shutdown,
+       .driver = {
+               .name   = "tegra194-pcie",
+               .pm = &tegra_pcie_dw_pm_ops,
+               .of_match_table = tegra_pcie_dw_of_match,
+       },
+};
+module_platform_driver(tegra_pcie_dw_driver);
+
+MODULE_DEVICE_TABLE(of, tegra_pcie_dw_of_match);
+
+MODULE_AUTHOR("Vidya Sagar <vidyas@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA PCIe host controller driver");
+MODULE_LICENSE("GPL v2");
index c742881..c8cb9c5 100644 (file)
@@ -43,9 +43,8 @@ static struct pci_config_window *gen_pci_init(struct device *dev,
                goto err_out;
        }
 
-       err = devm_add_action(dev, gen_pci_unmap_cfg, cfg);
+       err = devm_add_action_or_reset(dev, gen_pci_unmap_cfg, cfg);
        if (err) {
-               gen_pci_unmap_cfg(cfg);
                goto err_out;
        }
        return cfg;
index 0ca73c8..f1f3002 100644 (file)
@@ -2809,6 +2809,48 @@ static void put_hvpcibus(struct hv_pcibus_device *hbus)
                complete(&hbus->remove_event);
 }
 
+#define HVPCI_DOM_MAP_SIZE (64 * 1024)
+static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE);
+
+/*
+ * PCI domain number 0 is used by emulated devices on Gen1 VMs, so define 0
+ * as invalid for passthrough PCI devices of this driver.
+ */
+#define HVPCI_DOM_INVALID 0
+
+/**
+ * hv_get_dom_num() - Get a valid PCI domain number
+ * Check if the PCI domain number is in use, and return another number if
+ * it is in use.
+ *
+ * @dom: Requested domain number
+ *
+ * return: domain number on success, HVPCI_DOM_INVALID on failure
+ */
+static u16 hv_get_dom_num(u16 dom)
+{
+       unsigned int i;
+
+       if (test_and_set_bit(dom, hvpci_dom_map) == 0)
+               return dom;
+
+       for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) {
+               if (test_and_set_bit(i, hvpci_dom_map) == 0)
+                       return i;
+       }
+
+       return HVPCI_DOM_INVALID;
+}
+
+/**
+ * hv_put_dom_num() - Mark the PCI domain number as free
+ * @dom: Domain number to be freed
+ */
+static void hv_put_dom_num(u16 dom)
+{
+       clear_bit(dom, hvpci_dom_map);
+}
+
 /**
  * hv_pci_probe() - New VMBus channel probe, for a root PCI bus
  * @hdev:      VMBus's tracking struct for this root PCI bus
@@ -2820,6 +2862,7 @@ static int hv_pci_probe(struct hv_device *hdev,
                        const struct hv_vmbus_device_id *dev_id)
 {
        struct hv_pcibus_device *hbus;
+       u16 dom_req, dom;
        char *name;
        int ret;
 
@@ -2835,19 +2878,34 @@ static int hv_pci_probe(struct hv_device *hdev,
        hbus->state = hv_pcibus_init;
 
        /*
-        * The PCI bus "domain" is what is called "segment" in ACPI and
-        * other specs.  Pull it from the instance ID, to get something
-        * unique.  Bytes 8 and 9 are what is used in Windows guests, so
-        * do the same thing for consistency.  Note that, since this code
-        * only runs in a Hyper-V VM, Hyper-V can (and does) guarantee
-        * that (1) the only domain in use for something that looks like
-        * a physical PCI bus (which is actually emulated by the
-        * hypervisor) is domain 0 and (2) there will be no overlap
-        * between domains derived from these instance IDs in the same
-        * VM.
+        * The PCI bus "domain" is what is called "segment" in ACPI and other
+        * specs. Pull it from the instance ID, to get something usually
+        * unique. In rare cases of collision, we will find out another number
+        * not in use.
+        *
+        * Note that, since this code only runs in a Hyper-V VM, Hyper-V
+        * together with this guest driver can guarantee that (1) The only
+        * domain used by Gen1 VMs for something that looks like a physical
+        * PCI bus (which is actually emulated by the hypervisor) is domain 0.
+        * (2) There will be no overlap between domains (after fixing possible
+        * collisions) in the same VM.
         */
-       hbus->sysdata.domain = hdev->dev_instance.b[9] |
-                              hdev->dev_instance.b[8] << 8;
+       dom_req = hdev->dev_instance.b[5] << 8 | hdev->dev_instance.b[4];
+       dom = hv_get_dom_num(dom_req);
+
+       if (dom == HVPCI_DOM_INVALID) {
+               dev_err(&hdev->device,
+                       "Unable to use dom# 0x%hx or other numbers", dom_req);
+               ret = -EINVAL;
+               goto free_bus;
+       }
+
+       if (dom != dom_req)
+               dev_info(&hdev->device,
+                        "PCI dom# 0x%hx has collision, using 0x%hx",
+                        dom_req, dom);
+
+       hbus->sysdata.domain = dom;
 
        hbus->hdev = hdev;
        refcount_set(&hbus->remove_lock, 1);
@@ -2862,7 +2920,7 @@ static int hv_pci_probe(struct hv_device *hdev,
                                           hbus->sysdata.domain);
        if (!hbus->wq) {
                ret = -ENOMEM;
-               goto free_bus;
+               goto free_dom;
        }
 
        ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
@@ -2946,6 +3004,8 @@ close:
        vmbus_close(hdev->channel);
 destroy_wq:
        destroy_workqueue(hbus->wq);
+free_dom:
+       hv_put_dom_num(hbus->sysdata.domain);
 free_bus:
        free_page((unsigned long)hbus);
        return ret;
@@ -3008,8 +3068,8 @@ static int hv_pci_remove(struct hv_device *hdev)
                /* Remove the bus from PCI's point of view. */
                pci_lock_rescan_remove();
                pci_stop_root_bus(hbus->pci_bus);
-               pci_remove_root_bus(hbus->pci_bus);
                hv_pci_remove_slots(hbus);
+               pci_remove_root_bus(hbus->pci_bus);
                pci_unlock_rescan_remove();
                hbus->state = hv_pcibus_removed;
        }
@@ -3027,6 +3087,9 @@ static int hv_pci_remove(struct hv_device *hdev)
        put_hvpcibus(hbus);
        wait_for_completion(&hbus->remove_event);
        destroy_workqueue(hbus->wq);
+
+       hv_put_dom_num(hbus->sysdata.domain);
+
        free_page((unsigned long)hbus);
        return 0;
 }
@@ -3058,6 +3121,9 @@ static void __exit exit_hv_pci_drv(void)
 
 static int __init init_hv_pci_drv(void)
 {
+       /* Set the invalid domain number's bit, so it will not be used */
+       set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
+
        /* Initialize PCI block r/w interface */
        hvpci_block_ops.read_block = hv_read_config_block;
        hvpci_block_ops.write_block = hv_write_config_block;
index 9a917b2..673a172 100644 (file)
@@ -2237,14 +2237,15 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
                err = of_pci_get_devfn(port);
                if (err < 0) {
                        dev_err(dev, "failed to parse address: %d\n", err);
-                       return err;
+                       goto err_node_put;
                }
 
                index = PCI_SLOT(err);
 
                if (index < 1 || index > soc->num_ports) {
                        dev_err(dev, "invalid port number: %d\n", index);
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto err_node_put;
                }
 
                index--;
@@ -2253,12 +2254,13 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
                if (err < 0) {
                        dev_err(dev, "failed to parse # of lanes: %d\n",
                                err);
-                       return err;
+                       goto err_node_put;
                }
 
                if (value > 16) {
                        dev_err(dev, "invalid # of lanes: %u\n", value);
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto err_node_put;
                }
 
                lanes |= value << (index << 3);
@@ -2272,13 +2274,15 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
                lane += value;
 
                rp = devm_kzalloc(dev, sizeof(*rp), GFP_KERNEL);
-               if (!rp)
-                       return -ENOMEM;
+               if (!rp) {
+                       err = -ENOMEM;
+                       goto err_node_put;
+               }
 
                err = of_address_to_resource(port, 0, &rp->regs);
                if (err < 0) {
                        dev_err(dev, "failed to parse address: %d\n", err);
-                       return err;
+                       goto err_node_put;
                }
 
                INIT_LIST_HEAD(&rp->list);
@@ -2330,6 +2334,10 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
                return err;
 
        return 0;
+
+err_node_put:
+       of_node_put(port);
+       return err;
 }
 
 /*
index 5a3550b..9ee6200 100644 (file)
@@ -93,12 +93,9 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
        pcie->need_ib_cfg = of_property_read_bool(np, "dma-ranges");
 
        /* PHY use is optional */
-       pcie->phy = devm_phy_get(dev, "pcie-phy");
-       if (IS_ERR(pcie->phy)) {
-               if (PTR_ERR(pcie->phy) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
-               pcie->phy = NULL;
-       }
+       pcie->phy = devm_phy_optional_get(dev, "pcie-phy");
+       if (IS_ERR(pcie->phy))
+               return PTR_ERR(pcie->phy);
 
        ret = devm_of_pci_get_host_bridge_resources(dev, 0, 0xff, &resources,
                                                    &iobase);
index 80601e1..626a7c3 100644 (file)
@@ -73,6 +73,7 @@
 #define PCIE_MSI_VECTOR                0x0c0
 
 #define PCIE_CONF_VEND_ID      0x100
+#define PCIE_CONF_DEVICE_ID    0x102
 #define PCIE_CONF_CLASS_ID     0x106
 
 #define PCIE_INT_MASK          0x420
@@ -141,12 +142,16 @@ struct mtk_pcie_port;
 /**
  * struct mtk_pcie_soc - differentiate between host generations
  * @need_fix_class_id: whether this host's class ID needed to be fixed or not
+ * @need_fix_device_id: whether this host's device ID needed to be fixed or not
+ * @device_id: device ID which this host need to be fixed
  * @ops: pointer to configuration access functions
  * @startup: pointer to controller setting functions
  * @setup_irq: pointer to initialize IRQ functions
  */
 struct mtk_pcie_soc {
        bool need_fix_class_id;
+       bool need_fix_device_id;
+       unsigned int device_id;
        struct pci_ops *ops;
        int (*startup)(struct mtk_pcie_port *port);
        int (*setup_irq)(struct mtk_pcie_port *port, struct device_node *node);
@@ -630,8 +635,6 @@ static void mtk_pcie_intr_handler(struct irq_desc *desc)
        }
 
        chained_irq_exit(irqchip, desc);
-
-       return;
 }
 
 static int mtk_pcie_setup_irq(struct mtk_pcie_port *port,
@@ -696,6 +699,9 @@ static int mtk_pcie_startup_port_v2(struct mtk_pcie_port *port)
                writew(val, port->base + PCIE_CONF_CLASS_ID);
        }
 
+       if (soc->need_fix_device_id)
+               writew(soc->device_id, port->base + PCIE_CONF_DEVICE_ID);
+
        /* 100ms timeout value should be enough for Gen1/2 training */
        err = readl_poll_timeout(port->base + PCIE_LINK_STATUS_V2, val,
                                 !!(val & PCIE_PORT_LINKUP_V2), 20,
@@ -1216,11 +1222,21 @@ static const struct mtk_pcie_soc mtk_pcie_soc_mt7622 = {
        .setup_irq = mtk_pcie_setup_irq,
 };
 
+static const struct mtk_pcie_soc mtk_pcie_soc_mt7629 = {
+       .need_fix_class_id = true,
+       .need_fix_device_id = true,
+       .device_id = PCI_DEVICE_ID_MEDIATEK_7629,
+       .ops = &mtk_pcie_ops_v2,
+       .startup = mtk_pcie_startup_port_v2,
+       .setup_irq = mtk_pcie_setup_irq,
+};
+
 static const struct of_device_id mtk_pcie_ids[] = {
        { .compatible = "mediatek,mt2701-pcie", .data = &mtk_pcie_soc_v1 },
        { .compatible = "mediatek,mt7623-pcie", .data = &mtk_pcie_soc_v1 },
        { .compatible = "mediatek,mt2712-pcie", .data = &mtk_pcie_soc_mt2712 },
        { .compatible = "mediatek,mt7622-pcie", .data = &mtk_pcie_soc_mt7622 },
+       { .compatible = "mediatek,mt7629-pcie", .data = &mtk_pcie_soc_mt7629 },
        {},
 };
 
index 672e633..a45a644 100644 (file)
@@ -88,6 +88,7 @@
 #define  AMAP_CTRL_TYPE_MASK           3
 
 #define PAB_EXT_PEX_AMAP_SIZEN(win)    PAB_EXT_REG_ADDR(0xbef0, win)
+#define PAB_EXT_PEX_AMAP_AXI_WIN(win)  PAB_EXT_REG_ADDR(0xb4a0, win)
 #define PAB_PEX_AMAP_AXI_WIN(win)      PAB_REG_ADDR(0x4ba4, win)
 #define PAB_PEX_AMAP_PEX_WIN_L(win)    PAB_REG_ADDR(0x4ba8, win)
 #define PAB_PEX_AMAP_PEX_WIN_H(win)    PAB_REG_ADDR(0x4bac, win)
@@ -462,7 +463,7 @@ static int mobiveil_pcie_parse_dt(struct mobiveil_pcie *pcie)
 }
 
 static void program_ib_windows(struct mobiveil_pcie *pcie, int win_num,
-                              u64 pci_addr, u32 type, u64 size)
+                              u64 cpu_addr, u64 pci_addr, u32 type, u64 size)
 {
        u32 value;
        u64 size64 = ~(size - 1);
@@ -482,7 +483,10 @@ static void program_ib_windows(struct mobiveil_pcie *pcie, int win_num,
        csr_writel(pcie, upper_32_bits(size64),
                   PAB_EXT_PEX_AMAP_SIZEN(win_num));
 
-       csr_writel(pcie, pci_addr, PAB_PEX_AMAP_AXI_WIN(win_num));
+       csr_writel(pcie, lower_32_bits(cpu_addr),
+                  PAB_PEX_AMAP_AXI_WIN(win_num));
+       csr_writel(pcie, upper_32_bits(cpu_addr),
+                  PAB_EXT_PEX_AMAP_AXI_WIN(win_num));
 
        csr_writel(pcie, lower_32_bits(pci_addr),
                   PAB_PEX_AMAP_PEX_WIN_L(win_num));
@@ -624,7 +628,7 @@ static int mobiveil_host_init(struct mobiveil_pcie *pcie)
                           CFG_WINDOW_TYPE, resource_size(pcie->ob_io_res));
 
        /* memory inbound translation window */
-       program_ib_windows(pcie, WIN_NUM_0, 0, MEM_WINDOW_TYPE, IB_WIN_SIZE);
+       program_ib_windows(pcie, WIN_NUM_0, 0, 0, MEM_WINDOW_TYPE, IB_WIN_SIZE);
 
        /* Get the I/O and memory ranges from DT */
        resource_list_for_each_entry(win, &pcie->resources) {
index 8d20f17..ef8e677 100644 (file)
@@ -608,29 +608,29 @@ static int rockchip_pcie_parse_host_dt(struct rockchip_pcie *rockchip)
 
        rockchip->vpcie12v = devm_regulator_get_optional(dev, "vpcie12v");
        if (IS_ERR(rockchip->vpcie12v)) {
-               if (PTR_ERR(rockchip->vpcie12v) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
+               if (PTR_ERR(rockchip->vpcie12v) != -ENODEV)
+                       return PTR_ERR(rockchip->vpcie12v);
                dev_info(dev, "no vpcie12v regulator found\n");
        }
 
        rockchip->vpcie3v3 = devm_regulator_get_optional(dev, "vpcie3v3");
        if (IS_ERR(rockchip->vpcie3v3)) {
-               if (PTR_ERR(rockchip->vpcie3v3) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
+               if (PTR_ERR(rockchip->vpcie3v3) != -ENODEV)
+                       return PTR_ERR(rockchip->vpcie3v3);
                dev_info(dev, "no vpcie3v3 regulator found\n");
        }
 
        rockchip->vpcie1v8 = devm_regulator_get_optional(dev, "vpcie1v8");
        if (IS_ERR(rockchip->vpcie1v8)) {
-               if (PTR_ERR(rockchip->vpcie1v8) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
+               if (PTR_ERR(rockchip->vpcie1v8) != -ENODEV)
+                       return PTR_ERR(rockchip->vpcie1v8);
                dev_info(dev, "no vpcie1v8 regulator found\n");
        }
 
        rockchip->vpcie0v9 = devm_regulator_get_optional(dev, "vpcie0v9");
        if (IS_ERR(rockchip->vpcie0v9)) {
-               if (PTR_ERR(rockchip->vpcie0v9) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
+               if (PTR_ERR(rockchip->vpcie0v9) != -ENODEV)
+                       return PTR_ERR(rockchip->vpcie0v9);
                dev_info(dev, "no vpcie0v9 regulator found\n");
        }
 
index 4575e0c..a35d3f3 100644 (file)
@@ -31,6 +31,9 @@
 #define PCI_REG_VMLOCK         0x70
 #define MB2_SHADOW_EN(vmlock)  (vmlock & 0x2)
 
+#define MB2_SHADOW_OFFSET      0x2000
+#define MB2_SHADOW_SIZE                16
+
 enum vmd_features {
        /*
         * Device may contain registers which hint the physical location of the
@@ -94,6 +97,7 @@ struct vmd_dev {
        struct resource         resources[3];
        struct irq_domain       *irq_domain;
        struct pci_bus          *bus;
+       u8                      busn_start;
 
        struct dma_map_ops      dma_ops;
        struct dma_domain       dma_domain;
@@ -440,7 +444,8 @@ static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus,
                                  unsigned int devfn, int reg, int len)
 {
        char __iomem *addr = vmd->cfgbar +
-                            (bus->number << 20) + (devfn << 12) + reg;
+                            ((bus->number - vmd->busn_start) << 20) +
+                            (devfn << 12) + reg;
 
        if ((addr - vmd->cfgbar) + len >=
            resource_size(&vmd->dev->resource[VMD_CFGBAR]))
@@ -563,7 +568,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
        unsigned long flags;
        LIST_HEAD(resources);
        resource_size_t offset[2] = {0};
-       resource_size_t membar2_offset = 0x2000, busn_start = 0;
+       resource_size_t membar2_offset = 0x2000;
        struct pci_bus *child;
 
        /*
@@ -576,7 +581,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
                u32 vmlock;
                int ret;
 
-               membar2_offset = 0x2018;
+               membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
                ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock);
                if (ret || vmlock == ~0)
                        return -ENODEV;
@@ -588,9 +593,9 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
                        if (!membar2)
                                return -ENOMEM;
                        offset[0] = vmd->dev->resource[VMD_MEMBAR1].start -
-                                               readq(membar2 + 0x2008);
+                                       readq(membar2 + MB2_SHADOW_OFFSET);
                        offset[1] = vmd->dev->resource[VMD_MEMBAR2].start -
-                                               readq(membar2 + 0x2010);
+                                       readq(membar2 + MB2_SHADOW_OFFSET + 8);
                        pci_iounmap(vmd->dev, membar2);
                }
        }
@@ -606,14 +611,14 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
                pci_read_config_dword(vmd->dev, PCI_REG_VMCONFIG, &vmconfig);
                if (BUS_RESTRICT_CAP(vmcap) &&
                    (BUS_RESTRICT_CFG(vmconfig) == 0x1))
-                       busn_start = 128;
+                       vmd->busn_start = 128;
        }
 
        res = &vmd->dev->resource[VMD_CFGBAR];
        vmd->resources[0] = (struct resource) {
                .name  = "VMD CFGBAR",
-               .start = busn_start,
-               .end   = busn_start + (resource_size(res) >> 20) - 1,
+               .start = vmd->busn_start,
+               .end   = vmd->busn_start + (resource_size(res) >> 20) - 1,
                .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED,
        };
 
@@ -681,8 +686,8 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
        pci_add_resource_offset(&resources, &vmd->resources[1], offset[0]);
        pci_add_resource_offset(&resources, &vmd->resources[2], offset[1]);
 
-       vmd->bus = pci_create_root_bus(&vmd->dev->dev, busn_start, &vmd_ops,
-                                      sd, &resources);
+       vmd->bus = pci_create_root_bus(&vmd->dev->dev, vmd->busn_start,
+                                      &vmd_ops, sd, &resources);
        if (!vmd->bus) {
                pci_free_resource_list(&resources);
                irq_domain_remove(vmd->irq_domain);
index 603eadf..d0559d2 100644 (file)
@@ -563,7 +563,6 @@ cleanup_slots(void)
        }
 cleanup_null:
        up_write(&list_rwsem);
-       return;
 }
 
 int
index 16bbb18..b8aacb4 100644 (file)
@@ -173,7 +173,6 @@ static void pci_print_IRQ_route(void)
                dbg("%d %d %d %d\n", tbus, tdevice >> 3, tdevice & 0x7, tslot);
 
        }
-       return;
 }
 
 
index b7f4e1f..68de958 100644 (file)
@@ -1872,8 +1872,6 @@ static void interrupt_event_handler(struct controller *ctrl)
                        }
                }               /* End of FOR loop */
        }
-
-       return;
 }
 
 
@@ -1943,8 +1941,6 @@ void cpqhp_pushbutton_thread(struct timer_list *t)
 
                p_slot->state = STATIC_STATE;
        }
-
-       return;
 }
 
 
index 918ff8d..70e879b 100644 (file)
 
 #ifndef CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM
 
-static inline void compaq_nvram_init(void __iomem *rom_start)
-{
-       return;
-}
+static inline void compaq_nvram_init(void __iomem *rom_start) { }
 
 static inline int compaq_nvram_load(void __iomem *rom_start, struct controller *ctrl)
 {
index 5e8caf7..5c93aa1 100644 (file)
@@ -1941,6 +1941,7 @@ static int __init update_bridge_ranges(struct bus_node **bus)
                                                break;
                                        case PCI_HEADER_TYPE_BRIDGE:
                                                function = 0x8;
+                                               /* fall through */
                                        case PCI_HEADER_TYPE_MULTIBRIDGE:
                                                /* We assume here that only 1 bus behind the bridge
                                                   TO DO: add functionality for several:
index 8c51a04..654c972 100644 (file)
@@ -110,9 +110,9 @@ struct controller {
  *
  * @OFF_STATE: slot is powered off, no subordinate devices are enumerated
  * @BLINKINGON_STATE: slot will be powered on after the 5 second delay,
- *     green led is blinking
+ *     Power Indicator is blinking
  * @BLINKINGOFF_STATE: slot will be powered off after the 5 second delay,
- *     green led is blinking
+ *     Power Indicator is blinking
  * @POWERON_STATE: slot is currently powering on
  * @POWEROFF_STATE: slot is currently powering off
  * @ON_STATE: slot is powered on, subordinate devices have been enumerated
@@ -167,12 +167,11 @@ int pciehp_power_on_slot(struct controller *ctrl);
 void pciehp_power_off_slot(struct controller *ctrl);
 void pciehp_get_power_status(struct controller *ctrl, u8 *status);
 
-void pciehp_set_attention_status(struct controller *ctrl, u8 status);
+#define INDICATOR_NOOP -1      /* Leave indicator unchanged */
+void pciehp_set_indicators(struct controller *ctrl, int pwr, int attn);
+
 void pciehp_get_latch_status(struct controller *ctrl, u8 *status);
 int pciehp_query_power_fault(struct controller *ctrl);
-void pciehp_green_led_on(struct controller *ctrl);
-void pciehp_green_led_off(struct controller *ctrl);
-void pciehp_green_led_blink(struct controller *ctrl);
 bool pciehp_card_present(struct controller *ctrl);
 bool pciehp_card_present_or_link_active(struct controller *ctrl);
 int pciehp_check_link_status(struct controller *ctrl);
index 6ad0d86..b3122c1 100644 (file)
@@ -95,15 +95,20 @@ static void cleanup_slot(struct controller *ctrl)
 }
 
 /*
- * set_attention_status - Turns the Amber LED for a slot on, off or blink
+ * set_attention_status - Turns the Attention Indicator on, off or blinking
  */
 static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
 {
        struct controller *ctrl = to_ctrl(hotplug_slot);
        struct pci_dev *pdev = ctrl->pcie->port;
 
+       if (status)
+               status <<= PCI_EXP_SLTCTL_ATTN_IND_SHIFT;
+       else
+               status = PCI_EXP_SLTCTL_ATTN_IND_OFF;
+
        pci_config_pm_runtime_get(pdev);
-       pciehp_set_attention_status(ctrl, status);
+       pciehp_set_indicators(ctrl, INDICATOR_NOOP, status);
        pci_config_pm_runtime_put(pdev);
        return 0;
 }
index 631ced0..21af7b1 100644 (file)
 
 static void set_slot_off(struct controller *ctrl)
 {
-       /* turn off slot, turn on Amber LED, turn off Green LED if supported*/
+       /*
+        * Turn off slot, turn on attention indicator, turn off power
+        * indicator
+        */
        if (POWER_CTRL(ctrl)) {
                pciehp_power_off_slot(ctrl);
 
@@ -42,8 +45,8 @@ static void set_slot_off(struct controller *ctrl)
                msleep(1000);
        }
 
-       pciehp_green_led_off(ctrl);
-       pciehp_set_attention_status(ctrl, 1);
+       pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+                             PCI_EXP_SLTCTL_ATTN_IND_ON);
 }
 
 /**
@@ -65,7 +68,8 @@ static int board_added(struct controller *ctrl)
                        return retval;
        }
 
-       pciehp_green_led_blink(ctrl);
+       pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
+                             INDICATOR_NOOP);
 
        /* Check link training status */
        retval = pciehp_check_link_status(ctrl);
@@ -90,8 +94,8 @@ static int board_added(struct controller *ctrl)
                }
        }
 
-       pciehp_green_led_on(ctrl);
-       pciehp_set_attention_status(ctrl, 0);
+       pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
+                             PCI_EXP_SLTCTL_ATTN_IND_OFF);
        return 0;
 
 err_exit:
@@ -100,7 +104,7 @@ err_exit:
 }
 
 /**
- * remove_board - Turns off slot and LEDs
+ * remove_board - Turn off slot and Power Indicator
  * @ctrl: PCIe hotplug controller where board is being removed
  * @safe_removal: whether the board is safely removed (versus surprise removed)
  */
@@ -123,8 +127,8 @@ static void remove_board(struct controller *ctrl, bool safe_removal)
                           &ctrl->pending_events);
        }
 
-       /* turn off Green LED */
-       pciehp_green_led_off(ctrl);
+       pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+                             INDICATOR_NOOP);
 }
 
 static int pciehp_enable_slot(struct controller *ctrl);
@@ -171,9 +175,9 @@ void pciehp_handle_button_press(struct controller *ctrl)
                        ctrl_info(ctrl, "Slot(%s) Powering on due to button press\n",
                                  slot_name(ctrl));
                }
-               /* blink green LED and turn off amber */
-               pciehp_green_led_blink(ctrl);
-               pciehp_set_attention_status(ctrl, 0);
+               /* blink power indicator and turn off attention */
+               pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
+                                     PCI_EXP_SLTCTL_ATTN_IND_OFF);
                schedule_delayed_work(&ctrl->button_work, 5 * HZ);
                break;
        case BLINKINGOFF_STATE:
@@ -187,12 +191,13 @@ void pciehp_handle_button_press(struct controller *ctrl)
                cancel_delayed_work(&ctrl->button_work);
                if (ctrl->state == BLINKINGOFF_STATE) {
                        ctrl->state = ON_STATE;
-                       pciehp_green_led_on(ctrl);
+                       pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
+                                             PCI_EXP_SLTCTL_ATTN_IND_OFF);
                } else {
                        ctrl->state = OFF_STATE;
-                       pciehp_green_led_off(ctrl);
+                       pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+                                             PCI_EXP_SLTCTL_ATTN_IND_OFF);
                }
-               pciehp_set_attention_status(ctrl, 0);
                ctrl_info(ctrl, "Slot(%s): Action canceled due to button press\n",
                          slot_name(ctrl));
                break;
@@ -310,7 +315,9 @@ static int pciehp_enable_slot(struct controller *ctrl)
        pm_runtime_get_sync(&ctrl->pcie->port->dev);
        ret = __pciehp_enable_slot(ctrl);
        if (ret && ATTN_BUTTN(ctrl))
-               pciehp_green_led_off(ctrl); /* may be blinking */
+               /* may be blinking */
+               pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+                                     INDICATOR_NOOP);
        pm_runtime_put(&ctrl->pcie->port->dev);
 
        mutex_lock(&ctrl->state_lock);
index bd990e3..1a522c1 100644 (file)
@@ -418,65 +418,40 @@ int pciehp_set_raw_indicator_status(struct hotplug_slot *hotplug_slot,
        return 0;
 }
 
-void pciehp_set_attention_status(struct controller *ctrl, u8 value)
+/**
+ * pciehp_set_indicators() - set attention indicator, power indicator, or both
+ * @ctrl: PCIe hotplug controller
+ * @pwr: one of:
+ *     PCI_EXP_SLTCTL_PWR_IND_ON
+ *     PCI_EXP_SLTCTL_PWR_IND_BLINK
+ *     PCI_EXP_SLTCTL_PWR_IND_OFF
+ * @attn: one of:
+ *     PCI_EXP_SLTCTL_ATTN_IND_ON
+ *     PCI_EXP_SLTCTL_ATTN_IND_BLINK
+ *     PCI_EXP_SLTCTL_ATTN_IND_OFF
+ *
+ * Either @pwr or @attn can also be INDICATOR_NOOP to leave that indicator
+ * unchanged.
+ */
+void pciehp_set_indicators(struct controller *ctrl, int pwr, int attn)
 {
-       u16 slot_cmd;
+       u16 cmd = 0, mask = 0;
 
-       if (!ATTN_LED(ctrl))
-               return;
-
-       switch (value) {
-       case 0:         /* turn off */
-               slot_cmd = PCI_EXP_SLTCTL_ATTN_IND_OFF;
-               break;
-       case 1:         /* turn on */
-               slot_cmd = PCI_EXP_SLTCTL_ATTN_IND_ON;
-               break;
-       case 2:         /* turn blink */
-               slot_cmd = PCI_EXP_SLTCTL_ATTN_IND_BLINK;
-               break;
-       default:
-               return;
+       if (PWR_LED(ctrl) && pwr != INDICATOR_NOOP) {
+               cmd |= (pwr & PCI_EXP_SLTCTL_PIC);
+               mask |= PCI_EXP_SLTCTL_PIC;
        }
-       pcie_write_cmd_nowait(ctrl, slot_cmd, PCI_EXP_SLTCTL_AIC);
-       ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
-                pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd);
-}
 
-void pciehp_green_led_on(struct controller *ctrl)
-{
-       if (!PWR_LED(ctrl))
-               return;
-
-       pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
-                             PCI_EXP_SLTCTL_PIC);
-       ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
-                pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
-                PCI_EXP_SLTCTL_PWR_IND_ON);
-}
-
-void pciehp_green_led_off(struct controller *ctrl)
-{
-       if (!PWR_LED(ctrl))
-               return;
-
-       pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
-                             PCI_EXP_SLTCTL_PIC);
-       ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
-                pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
-                PCI_EXP_SLTCTL_PWR_IND_OFF);
-}
-
-void pciehp_green_led_blink(struct controller *ctrl)
-{
-       if (!PWR_LED(ctrl))
-               return;
+       if (ATTN_LED(ctrl) && attn != INDICATOR_NOOP) {
+               cmd |= (attn & PCI_EXP_SLTCTL_AIC);
+               mask |= PCI_EXP_SLTCTL_AIC;
+       }
 
-       pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
-                             PCI_EXP_SLTCTL_PIC);
-       ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
-                pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
-                PCI_EXP_SLTCTL_PWR_IND_BLINK);
+       if (cmd) {
+               pcie_write_cmd_nowait(ctrl, cmd, mask);
+               ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
+                        pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd);
+       }
 }
 
 int pciehp_power_on_slot(struct controller *ctrl)
@@ -638,8 +613,8 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
        if ((events & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
                ctrl->power_fault_detected = 1;
                ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl));
-               pciehp_set_attention_status(ctrl, 1);
-               pciehp_green_led_off(ctrl);
+               pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+                                     PCI_EXP_SLTCTL_ATTN_IND_ON);
        }
 
        /*
index 182f9e3..977946e 100644 (file)
@@ -473,7 +473,6 @@ int __init rpadlpar_io_init(void)
 void rpadlpar_io_exit(void)
 {
        dlpar_sysfs_exit();
-       return;
 }
 
 module_init(rpadlpar_io_init);
index c3899ee..18627bb 100644 (file)
@@ -408,7 +408,6 @@ static void __exit cleanup_slots(void)
                pci_hp_deregister(&slot->hotplug_slot);
                dealloc_slot_struct(slot);
        }
-       return;
 }
 
 static int __init rpaphp_init(void)
index 525fd3f..b3f972e 100644 (file)
@@ -240,6 +240,173 @@ void pci_iov_remove_virtfn(struct pci_dev *dev, int id)
        pci_dev_put(dev);
 }
 
+static ssize_t sriov_totalvfs_show(struct device *dev,
+                                  struct device_attribute *attr,
+                                  char *buf)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+
+       return sprintf(buf, "%u\n", pci_sriov_get_totalvfs(pdev));
+}
+
+static ssize_t sriov_numvfs_show(struct device *dev,
+                                struct device_attribute *attr,
+                                char *buf)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+
+       return sprintf(buf, "%u\n", pdev->sriov->num_VFs);
+}
+
+/*
+ * num_vfs > 0; number of VFs to enable
+ * num_vfs = 0; disable all VFs
+ *
+ * Note: SRIOV spec does not allow partial VF
+ *      disable, so it's all or none.
+ */
+static ssize_t sriov_numvfs_store(struct device *dev,
+                                 struct device_attribute *attr,
+                                 const char *buf, size_t count)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+       int ret;
+       u16 num_vfs;
+
+       ret = kstrtou16(buf, 0, &num_vfs);
+       if (ret < 0)
+               return ret;
+
+       if (num_vfs > pci_sriov_get_totalvfs(pdev))
+               return -ERANGE;
+
+       device_lock(&pdev->dev);
+
+       if (num_vfs == pdev->sriov->num_VFs)
+               goto exit;
+
+       /* is PF driver loaded w/callback */
+       if (!pdev->driver || !pdev->driver->sriov_configure) {
+               pci_info(pdev, "Driver does not support SRIOV configuration via sysfs\n");
+               ret = -ENOENT;
+               goto exit;
+       }
+
+       if (num_vfs == 0) {
+               /* disable VFs */
+               ret = pdev->driver->sriov_configure(pdev, 0);
+               goto exit;
+       }
+
+       /* enable VFs */
+       if (pdev->sriov->num_VFs) {
+               pci_warn(pdev, "%d VFs already enabled. Disable before enabling %d VFs\n",
+                        pdev->sriov->num_VFs, num_vfs);
+               ret = -EBUSY;
+               goto exit;
+       }
+
+       ret = pdev->driver->sriov_configure(pdev, num_vfs);
+       if (ret < 0)
+               goto exit;
+
+       if (ret != num_vfs)
+               pci_warn(pdev, "%d VFs requested; only %d enabled\n",
+                        num_vfs, ret);
+
+exit:
+       device_unlock(&pdev->dev);
+
+       if (ret < 0)
+               return ret;
+
+       return count;
+}
+
+static ssize_t sriov_offset_show(struct device *dev,
+                                struct device_attribute *attr,
+                                char *buf)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+
+       return sprintf(buf, "%u\n", pdev->sriov->offset);
+}
+
+static ssize_t sriov_stride_show(struct device *dev,
+                                struct device_attribute *attr,
+                                char *buf)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+
+       return sprintf(buf, "%u\n", pdev->sriov->stride);
+}
+
+static ssize_t sriov_vf_device_show(struct device *dev,
+                                   struct device_attribute *attr,
+                                   char *buf)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+
+       return sprintf(buf, "%x\n", pdev->sriov->vf_device);
+}
+
+static ssize_t sriov_drivers_autoprobe_show(struct device *dev,
+                                           struct device_attribute *attr,
+                                           char *buf)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+
+       return sprintf(buf, "%u\n", pdev->sriov->drivers_autoprobe);
+}
+
+static ssize_t sriov_drivers_autoprobe_store(struct device *dev,
+                                            struct device_attribute *attr,
+                                            const char *buf, size_t count)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+       bool drivers_autoprobe;
+
+       if (kstrtobool(buf, &drivers_autoprobe) < 0)
+               return -EINVAL;
+
+       pdev->sriov->drivers_autoprobe = drivers_autoprobe;
+
+       return count;
+}
+
+static DEVICE_ATTR_RO(sriov_totalvfs);
+static DEVICE_ATTR_RW(sriov_numvfs);
+static DEVICE_ATTR_RO(sriov_offset);
+static DEVICE_ATTR_RO(sriov_stride);
+static DEVICE_ATTR_RO(sriov_vf_device);
+static DEVICE_ATTR_RW(sriov_drivers_autoprobe);
+
+static struct attribute *sriov_dev_attrs[] = {
+       &dev_attr_sriov_totalvfs.attr,
+       &dev_attr_sriov_numvfs.attr,
+       &dev_attr_sriov_offset.attr,
+       &dev_attr_sriov_stride.attr,
+       &dev_attr_sriov_vf_device.attr,
+       &dev_attr_sriov_drivers_autoprobe.attr,
+       NULL,
+};
+
+static umode_t sriov_attrs_are_visible(struct kobject *kobj,
+                                      struct attribute *a, int n)
+{
+       struct device *dev = kobj_to_dev(kobj);
+
+       if (!dev_is_pf(dev))
+               return 0;
+
+       return a->mode;
+}
+
+const struct attribute_group sriov_dev_attr_group = {
+       .attrs = sriov_dev_attrs,
+       .is_visible = sriov_attrs_are_visible,
+};
+
 int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 {
        return 0;
@@ -557,8 +724,8 @@ static void sriov_restore_state(struct pci_dev *dev)
        ctrl |= iov->ctrl & PCI_SRIOV_CTRL_ARI;
        pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, ctrl);
 
-       for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++)
-               pci_update_resource(dev, i);
+       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+               pci_update_resource(dev, i + PCI_IOV_RESOURCES);
 
        pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
        pci_iov_set_numvfs(dev, iov->num_VFs);
index bc7b27a..36891e7 100644 (file)
@@ -353,7 +353,7 @@ EXPORT_SYMBOL_GPL(devm_of_pci_get_host_bridge_resources);
 /**
  * of_irq_parse_pci - Resolve the interrupt for a PCI device
  * @pdev:       the device whose interrupt is to be resolved
- * @out_irq:    structure of_irq filled by this function
+ * @out_irq:    structure of_phandle_args filled by this function
  *
  * This function resolves the PCI interrupt for a given PCI device. If a
  * device-node exists for a given pci_dev, it will use normal OF tree
index 2344762..0608aae 100644 (file)
 #include <linux/percpu-refcount.h>
 #include <linux/random.h>
 #include <linux/seq_buf.h>
-#include <linux/iommu.h>
+#include <linux/xarray.h>
+
+enum pci_p2pdma_map_type {
+       PCI_P2PDMA_MAP_UNKNOWN = 0,
+       PCI_P2PDMA_MAP_NOT_SUPPORTED,
+       PCI_P2PDMA_MAP_BUS_ADDR,
+       PCI_P2PDMA_MAP_THRU_HOST_BRIDGE,
+};
 
 struct pci_p2pdma {
        struct gen_pool *pool;
        bool p2pmem_published;
+       struct xarray map_types;
 };
 
+struct pci_p2pdma_pagemap {
+       struct dev_pagemap pgmap;
+       struct pci_dev *provider;
+       u64 bus_offset;
+};
+
+static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap)
+{
+       return container_of(pgmap, struct pci_p2pdma_pagemap, pgmap);
+}
+
 static ssize_t size_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
 {
@@ -87,6 +106,7 @@ static void pci_p2pdma_release(void *data)
 
        gen_pool_destroy(p2pdma->pool);
        sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
+       xa_destroy(&p2pdma->map_types);
 }
 
 static int pci_p2pdma_setup(struct pci_dev *pdev)
@@ -98,6 +118,8 @@ static int pci_p2pdma_setup(struct pci_dev *pdev)
        if (!p2p)
                return -ENOMEM;
 
+       xa_init(&p2p->map_types);
+
        p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev));
        if (!p2p->pool)
                goto out;
@@ -135,6 +157,7 @@ out:
 int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
                            u64 offset)
 {
+       struct pci_p2pdma_pagemap *p2p_pgmap;
        struct dev_pagemap *pgmap;
        void *addr;
        int error;
@@ -157,14 +180,18 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
                        return error;
        }
 
-       pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL);
-       if (!pgmap)
+       p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL);
+       if (!p2p_pgmap)
                return -ENOMEM;
+
+       pgmap = &p2p_pgmap->pgmap;
        pgmap->res.start = pci_resource_start(pdev, bar) + offset;
        pgmap->res.end = pgmap->res.start + size - 1;
        pgmap->res.flags = pci_resource_flags(pdev, bar);
        pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
-       pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
+
+       p2p_pgmap->provider = pdev;
+       p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) -
                pci_resource_start(pdev, bar);
 
        addr = devm_memremap_pages(&pdev->dev, pgmap);
@@ -246,19 +273,32 @@ static void seq_buf_print_bus_devfn(struct seq_buf *buf, struct pci_dev *pdev)
        seq_buf_printf(buf, "%s;", pci_name(pdev));
 }
 
-/*
- * If we can't find a common upstream bridge take a look at the root
- * complex and compare it to a whitelist of known good hardware.
- */
-static bool root_complex_whitelist(struct pci_dev *dev)
+static const struct pci_p2pdma_whitelist_entry {
+       unsigned short vendor;
+       unsigned short device;
+       enum {
+               REQ_SAME_HOST_BRIDGE    = 1 << 0,
+       } flags;
+} pci_p2pdma_whitelist[] = {
+       /* AMD ZEN */
+       {PCI_VENDOR_ID_AMD,     0x1450, 0},
+
+       /* Intel Xeon E5/Core i7 */
+       {PCI_VENDOR_ID_INTEL,   0x3c00, REQ_SAME_HOST_BRIDGE},
+       {PCI_VENDOR_ID_INTEL,   0x3c01, REQ_SAME_HOST_BRIDGE},
+       /* Intel Xeon E7 v3/Xeon E5 v3/Core i7 */
+       {PCI_VENDOR_ID_INTEL,   0x2f00, REQ_SAME_HOST_BRIDGE},
+       {PCI_VENDOR_ID_INTEL,   0x2f01, REQ_SAME_HOST_BRIDGE},
+       {}
+};
+
+static bool __host_bridge_whitelist(struct pci_host_bridge *host,
+                                   bool same_host_bridge)
 {
-       struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
        struct pci_dev *root = pci_get_slot(host->bus, PCI_DEVFN(0, 0));
+       const struct pci_p2pdma_whitelist_entry *entry;
        unsigned short vendor, device;
 
-       if (iommu_present(dev->dev.bus))
-               return false;
-
        if (!root)
                return false;
 
@@ -266,65 +306,49 @@ static bool root_complex_whitelist(struct pci_dev *dev)
        device = root->device;
        pci_dev_put(root);
 
-       /* AMD ZEN host bridges can do peer to peer */
-       if (vendor == PCI_VENDOR_ID_AMD && device == 0x1450)
+       for (entry = pci_p2pdma_whitelist; entry->vendor; entry++) {
+               if (vendor != entry->vendor || device != entry->device)
+                       continue;
+               if (entry->flags & REQ_SAME_HOST_BRIDGE && !same_host_bridge)
+                       return false;
+
                return true;
+       }
 
        return false;
 }
 
 /*
- * Find the distance through the nearest common upstream bridge between
- * two PCI devices.
- *
- * If the two devices are the same device then 0 will be returned.
- *
- * If there are two virtual functions of the same device behind the same
- * bridge port then 2 will be returned (one step down to the PCIe switch,
- * then one step back to the same device).
- *
- * In the case where two devices are connected to the same PCIe switch, the
- * value 4 will be returned. This corresponds to the following PCI tree:
- *
- *     -+  Root Port
- *      \+ Switch Upstream Port
- *       +-+ Switch Downstream Port
- *       + \- Device A
- *       \-+ Switch Downstream Port
- *         \- Device B
- *
- * The distance is 4 because we traverse from Device A through the downstream
- * port of the switch, to the common upstream port, back up to the second
- * downstream port and then to Device B.
- *
- * Any two devices that don't have a common upstream bridge will return -1.
- * In this way devices on separate PCIe root ports will be rejected, which
- * is what we want for peer-to-peer seeing each PCIe root port defines a
- * separate hierarchy domain and there's no way to determine whether the root
- * complex supports forwarding between them.
- *
- * In the case where two devices are connected to different PCIe switches,
- * this function will still return a positive distance as long as both
- * switches eventually have a common upstream bridge. Note this covers
- * the case of using multiple PCIe switches to achieve a desired level of
- * fan-out from a root port. The exact distance will be a function of the
- * number of switches between Device A and Device B.
- *
- * If a bridge which has any ACS redirection bits set is in the path
- * then this functions will return -2. This is so we reject any
- * cases where the TLPs are forwarded up into the root complex.
- * In this case, a list of all infringing bridge addresses will be
- * populated in acs_list (assuming it's non-null) for printk purposes.
+ * If we can't find a common upstream bridge take a look at the root
+ * complex and compare it to a whitelist of known good hardware.
  */
-static int upstream_bridge_distance(struct pci_dev *provider,
-                                   struct pci_dev *client,
-                                   struct seq_buf *acs_list)
+static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b)
+{
+       struct pci_host_bridge *host_a = pci_find_host_bridge(a->bus);
+       struct pci_host_bridge *host_b = pci_find_host_bridge(b->bus);
+
+       if (host_a == host_b)
+               return __host_bridge_whitelist(host_a, true);
+
+       if (__host_bridge_whitelist(host_a, false) &&
+           __host_bridge_whitelist(host_b, false))
+               return true;
+
+       return false;
+}
+
+static enum pci_p2pdma_map_type
+__upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client,
+               int *dist, bool *acs_redirects, struct seq_buf *acs_list)
 {
        struct pci_dev *a = provider, *b = client, *bb;
        int dist_a = 0;
        int dist_b = 0;
        int acs_cnt = 0;
 
+       if (acs_redirects)
+               *acs_redirects = false;
+
        /*
         * Note, we don't need to take references to devices returned by
         * pci_upstream_bridge() seeing we hold a reference to a child
@@ -353,15 +377,10 @@ static int upstream_bridge_distance(struct pci_dev *provider,
                dist_a++;
        }
 
-       /*
-        * Allow the connection if both devices are on a whitelisted root
-        * complex, but add an arbitrary large value to the distance.
-        */
-       if (root_complex_whitelist(provider) &&
-           root_complex_whitelist(client))
-               return 0x1000 + dist_a + dist_b;
+       if (dist)
+               *dist = dist_a + dist_b;
 
-       return -1;
+       return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE;
 
 check_b_path_acs:
        bb = b;
@@ -378,33 +397,110 @@ check_b_path_acs:
                bb = pci_upstream_bridge(bb);
        }
 
-       if (acs_cnt)
-               return -2;
+       if (dist)
+               *dist = dist_a + dist_b;
+
+       if (acs_cnt) {
+               if (acs_redirects)
+                       *acs_redirects = true;
+
+               return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE;
+       }
+
+       return PCI_P2PDMA_MAP_BUS_ADDR;
+}
+
+static unsigned long map_types_idx(struct pci_dev *client)
+{
+       return (pci_domain_nr(client->bus) << 16) |
+               (client->bus->number << 8) | client->devfn;
+}
+
+/*
+ * Find the distance through the nearest common upstream bridge between
+ * two PCI devices.
+ *
+ * If the two devices are the same device then 0 will be returned.
+ *
+ * If there are two virtual functions of the same device behind the same
+ * bridge port then 2 will be returned (one step down to the PCIe switch,
+ * then one step back to the same device).
+ *
+ * In the case where two devices are connected to the same PCIe switch, the
+ * value 4 will be returned. This corresponds to the following PCI tree:
+ *
+ *     -+  Root Port
+ *      \+ Switch Upstream Port
+ *       +-+ Switch Downstream Port
+ *       + \- Device A
+ *       \-+ Switch Downstream Port
+ *         \- Device B
+ *
+ * The distance is 4 because we traverse from Device A through the downstream
+ * port of the switch, to the common upstream port, back up to the second
+ * downstream port and then to Device B.
+ *
+ * Any two devices that cannot communicate using p2pdma will return
+ * PCI_P2PDMA_MAP_NOT_SUPPORTED.
+ *
+ * Any two devices that have a data path that goes through the host bridge
+ * will consult a whitelist. If the host bridges are on the whitelist,
+ * this function will return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE.
+ *
+ * If either bridge is not on the whitelist this function returns
+ * PCI_P2PDMA_MAP_NOT_SUPPORTED.
+ *
+ * If a bridge which has any ACS redirection bits set is in the path,
+ * acs_redirects will be set to true. In this case, a list of all infringing
+ * bridge addresses will be populated in acs_list (assuming it's non-null)
+ * for printk purposes.
+ */
+static enum pci_p2pdma_map_type
+upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client,
+               int *dist, bool *acs_redirects, struct seq_buf *acs_list)
+{
+       enum pci_p2pdma_map_type map_type;
+
+       map_type = __upstream_bridge_distance(provider, client, dist,
+                                             acs_redirects, acs_list);
+
+       if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) {
+               if (!host_bridge_whitelist(provider, client))
+                       map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED;
+       }
+
+       if (provider->p2pdma)
+               xa_store(&provider->p2pdma->map_types, map_types_idx(client),
+                        xa_mk_value(map_type), GFP_KERNEL);
 
-       return dist_a + dist_b;
+       return map_type;
 }
 
-static int upstream_bridge_distance_warn(struct pci_dev *provider,
-                                        struct pci_dev *client)
+static enum pci_p2pdma_map_type
+upstream_bridge_distance_warn(struct pci_dev *provider, struct pci_dev *client,
+                             int *dist)
 {
        struct seq_buf acs_list;
+       bool acs_redirects;
        int ret;
 
        seq_buf_init(&acs_list, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
        if (!acs_list.buffer)
                return -ENOMEM;
 
-       ret = upstream_bridge_distance(provider, client, &acs_list);
-       if (ret == -2) {
-               pci_warn(client, "cannot be used for peer-to-peer DMA as ACS redirect is set between the client and provider (%s)\n",
+       ret = upstream_bridge_distance(provider, client, dist, &acs_redirects,
+                                      &acs_list);
+       if (acs_redirects) {
+               pci_warn(client, "ACS redirect is set between the client and provider (%s)\n",
                         pci_name(provider));
                /* Drop final semicolon */
                acs_list.buffer[acs_list.len-1] = 0;
                pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n",
                         acs_list.buffer);
+       }
 
-       } else if (ret < 0) {
-               pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge\n",
+       if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED) {
+               pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge or whitelisted host bridge\n",
                         pci_name(provider));
        }
 
@@ -421,22 +517,22 @@ static int upstream_bridge_distance_warn(struct pci_dev *provider,
  * @num_clients: number of clients in the array
  * @verbose: if true, print warnings for devices when we return -1
  *
- * Returns -1 if any of the clients are not compatible (behind the same
- * root port as the provider), otherwise returns a positive number where
- * a lower number is the preferable choice. (If there's one client
- * that's the same as the provider it will return 0, which is best choice).
+ * Returns -1 if any of the clients are not compatible, otherwise returns a
+ * positive number where a lower number is the preferable choice. (If there's
+ * one client that's the same as the provider it will return 0, which is best
+ * choice).
  *
- * For now, "compatible" means the provider and the clients are all behind
- * the same PCI root port. This cuts out cases that may work but is safest
- * for the user. Future work can expand this to white-list root complexes that
- * can safely forward between each ports.
+ * "compatible" means the provider and the clients are either all behind
+ * the same PCI root port or the host bridges connected to each of the devices
+ * are listed in the 'pci_p2pdma_whitelist'.
  */
 int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
                             int num_clients, bool verbose)
 {
        bool not_supported = false;
        struct pci_dev *pci_client;
-       int distance = 0;
+       int total_dist = 0;
+       int distance;
        int i, ret;
 
        if (num_clients == 0)
@@ -461,26 +557,26 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
 
                if (verbose)
                        ret = upstream_bridge_distance_warn(provider,
-                                                           pci_client);
+                                       pci_client, &distance);
                else
                        ret = upstream_bridge_distance(provider, pci_client,
-                                                      NULL);
+                                                      &distance, NULL, NULL);
 
                pci_dev_put(pci_client);
 
-               if (ret < 0)
+               if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED)
                        not_supported = true;
 
                if (not_supported && !verbose)
                        break;
 
-               distance += ret;
+               total_dist += distance;
        }
 
        if (not_supported)
                return -1;
 
-       return distance;
+       return total_dist;
 }
 EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many);
 
@@ -706,21 +802,19 @@ void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
 }
 EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
 
-/**
- * pci_p2pdma_map_sg - map a PCI peer-to-peer scatterlist for DMA
- * @dev: device doing the DMA request
- * @sg: scatter list to map
- * @nents: elements in the scatterlist
- * @dir: DMA direction
- *
- * Scatterlists mapped with this function should not be unmapped in any way.
- *
- * Returns the number of SG entries mapped or 0 on error.
- */
-int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-                     enum dma_data_direction dir)
+static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct pci_dev *provider,
+                                                   struct pci_dev *client)
+{
+       if (!provider->p2pdma)
+               return PCI_P2PDMA_MAP_NOT_SUPPORTED;
+
+       return xa_to_value(xa_load(&provider->p2pdma->map_types,
+                                  map_types_idx(client)));
+}
+
+static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap,
+               struct device *dev, struct scatterlist *sg, int nents)
 {
-       struct dev_pagemap *pgmap;
        struct scatterlist *s;
        phys_addr_t paddr;
        int i;
@@ -736,16 +830,80 @@ int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
                return 0;
 
        for_each_sg(sg, s, nents, i) {
-               pgmap = sg_page(s)->pgmap;
                paddr = sg_phys(s);
 
-               s->dma_address = paddr - pgmap->pci_p2pdma_bus_offset;
+               s->dma_address = paddr - p2p_pgmap->bus_offset;
                sg_dma_len(s) = s->length;
        }
 
        return nents;
 }
-EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg);
+
+/**
+ * pci_p2pdma_map_sg - map a PCI peer-to-peer scatterlist for DMA
+ * @dev: device doing the DMA request
+ * @sg: scatter list to map
+ * @nents: elements in the scatterlist
+ * @dir: DMA direction
+ * @attrs: DMA attributes passed to dma_map_sg() (if called)
+ *
+ * Scatterlists mapped with this function should be unmapped using
+ * pci_p2pdma_unmap_sg_attrs().
+ *
+ * Returns the number of SG entries mapped or 0 on error.
+ */
+int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
+               int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+       struct pci_p2pdma_pagemap *p2p_pgmap =
+               to_p2p_pgmap(sg_page(sg)->pgmap);
+       struct pci_dev *client;
+
+       if (WARN_ON_ONCE(!dev_is_pci(dev)))
+               return 0;
+
+       client = to_pci_dev(dev);
+
+       switch (pci_p2pdma_map_type(p2p_pgmap->provider, client)) {
+       case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
+               return dma_map_sg_attrs(dev, sg, nents, dir, attrs);
+       case PCI_P2PDMA_MAP_BUS_ADDR:
+               return __pci_p2pdma_map_sg(p2p_pgmap, dev, sg, nents);
+       default:
+               WARN_ON_ONCE(1);
+               return 0;
+       }
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs);
+
+/**
+ * pci_p2pdma_unmap_sg - unmap a PCI peer-to-peer scatterlist that was
+ *     mapped with pci_p2pdma_map_sg()
+ * @dev: device doing the DMA request
+ * @sg: scatter list to map
+ * @nents: number of elements returned by pci_p2pdma_map_sg()
+ * @dir: DMA direction
+ * @attrs: DMA attributes passed to dma_unmap_sg() (if called)
+ */
+void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
+               int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+       struct pci_p2pdma_pagemap *p2p_pgmap =
+               to_p2p_pgmap(sg_page(sg)->pgmap);
+       enum pci_p2pdma_map_type map_type;
+       struct pci_dev *client;
+
+       if (WARN_ON_ONCE(!dev_is_pci(dev)))
+               return;
+
+       client = to_pci_dev(dev);
+
+       map_type = pci_p2pdma_map_type(p2p_pgmap->provider, client);
+
+       if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
+               dma_unmap_sg_attrs(dev, sg, nents, dir, attrs);
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs);
 
 /**
  * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store
index 45049f5..0c02d50 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/msi.h>
 #include <linux/pci_hotplug.h>
 #include <linux/module.h>
-#include <linux/pci-aspm.h>
 #include <linux/pci-acpi.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_qos.h>
@@ -118,8 +117,58 @@ phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle)
        return (phys_addr_t)mcfg_addr;
 }
 
+/* _HPX PCI Setting Record (Type 0); same as _HPP */
+struct hpx_type0 {
+       u32 revision;           /* Not present in _HPP */
+       u8  cache_line_size;    /* Not applicable to PCIe */
+       u8  latency_timer;      /* Not applicable to PCIe */
+       u8  enable_serr;
+       u8  enable_perr;
+};
+
+static struct hpx_type0 pci_default_type0 = {
+       .revision = 1,
+       .cache_line_size = 8,
+       .latency_timer = 0x40,
+       .enable_serr = 0,
+       .enable_perr = 0,
+};
+
+static void program_hpx_type0(struct pci_dev *dev, struct hpx_type0 *hpx)
+{
+       u16 pci_cmd, pci_bctl;
+
+       if (!hpx)
+               hpx = &pci_default_type0;
+
+       if (hpx->revision > 1) {
+               pci_warn(dev, "PCI settings rev %d not supported; using defaults\n",
+                        hpx->revision);
+               hpx = &pci_default_type0;
+       }
+
+       pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, hpx->cache_line_size);
+       pci_write_config_byte(dev, PCI_LATENCY_TIMER, hpx->latency_timer);
+       pci_read_config_word(dev, PCI_COMMAND, &pci_cmd);
+       if (hpx->enable_serr)
+               pci_cmd |= PCI_COMMAND_SERR;
+       if (hpx->enable_perr)
+               pci_cmd |= PCI_COMMAND_PARITY;
+       pci_write_config_word(dev, PCI_COMMAND, pci_cmd);
+
+       /* Program bridge control value */
+       if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+               pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER,
+                                     hpx->latency_timer);
+               pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &pci_bctl);
+               if (hpx->enable_perr)
+                       pci_bctl |= PCI_BRIDGE_CTL_PARITY;
+               pci_write_config_word(dev, PCI_BRIDGE_CONTROL, pci_bctl);
+       }
+}
+
 static acpi_status decode_type0_hpx_record(union acpi_object *record,
-                                          struct hpp_type0 *hpx0)
+                                          struct hpx_type0 *hpx0)
 {
        int i;
        union acpi_object *fields = record->package.elements;
@@ -146,8 +195,30 @@ static acpi_status decode_type0_hpx_record(union acpi_object *record,
        return AE_OK;
 }
 
+/* _HPX PCI-X Setting Record (Type 1) */
+struct hpx_type1 {
+       u32 revision;
+       u8  max_mem_read;
+       u8  avg_max_split;
+       u16 tot_max_split;
+};
+
+static void program_hpx_type1(struct pci_dev *dev, struct hpx_type1 *hpx)
+{
+       int pos;
+
+       if (!hpx)
+               return;
+
+       pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+       if (!pos)
+               return;
+
+       pci_warn(dev, "PCI-X settings not supported\n");
+}
+
 static acpi_status decode_type1_hpx_record(union acpi_object *record,
-                                          struct hpp_type1 *hpx1)
+                                          struct hpx_type1 *hpx1)
 {
        int i;
        union acpi_object *fields = record->package.elements;
@@ -173,8 +244,130 @@ static acpi_status decode_type1_hpx_record(union acpi_object *record,
        return AE_OK;
 }
 
+static bool pcie_root_rcb_set(struct pci_dev *dev)
+{
+       struct pci_dev *rp = pcie_find_root_port(dev);
+       u16 lnkctl;
+
+       if (!rp)
+               return false;
+
+       pcie_capability_read_word(rp, PCI_EXP_LNKCTL, &lnkctl);
+       if (lnkctl & PCI_EXP_LNKCTL_RCB)
+               return true;
+
+       return false;
+}
+
+/* _HPX PCI Express Setting Record (Type 2) */
+struct hpx_type2 {
+       u32 revision;
+       u32 unc_err_mask_and;
+       u32 unc_err_mask_or;
+       u32 unc_err_sever_and;
+       u32 unc_err_sever_or;
+       u32 cor_err_mask_and;
+       u32 cor_err_mask_or;
+       u32 adv_err_cap_and;
+       u32 adv_err_cap_or;
+       u16 pci_exp_devctl_and;
+       u16 pci_exp_devctl_or;
+       u16 pci_exp_lnkctl_and;
+       u16 pci_exp_lnkctl_or;
+       u32 sec_unc_err_sever_and;
+       u32 sec_unc_err_sever_or;
+       u32 sec_unc_err_mask_and;
+       u32 sec_unc_err_mask_or;
+};
+
+static void program_hpx_type2(struct pci_dev *dev, struct hpx_type2 *hpx)
+{
+       int pos;
+       u32 reg32;
+
+       if (!hpx)
+               return;
+
+       if (!pci_is_pcie(dev))
+               return;
+
+       if (hpx->revision > 1) {
+               pci_warn(dev, "PCIe settings rev %d not supported\n",
+                        hpx->revision);
+               return;
+       }
+
+       /*
+        * Don't allow _HPX to change MPS or MRRS settings.  We manage
+        * those to make sure they're consistent with the rest of the
+        * platform.
+        */
+       hpx->pci_exp_devctl_and |= PCI_EXP_DEVCTL_PAYLOAD |
+                                   PCI_EXP_DEVCTL_READRQ;
+       hpx->pci_exp_devctl_or &= ~(PCI_EXP_DEVCTL_PAYLOAD |
+                                   PCI_EXP_DEVCTL_READRQ);
+
+       /* Initialize Device Control Register */
+       pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
+                       ~hpx->pci_exp_devctl_and, hpx->pci_exp_devctl_or);
+
+       /* Initialize Link Control Register */
+       if (pcie_cap_has_lnkctl(dev)) {
+
+               /*
+                * If the Root Port supports Read Completion Boundary of
+                * 128, set RCB to 128.  Otherwise, clear it.
+                */
+               hpx->pci_exp_lnkctl_and |= PCI_EXP_LNKCTL_RCB;
+               hpx->pci_exp_lnkctl_or &= ~PCI_EXP_LNKCTL_RCB;
+               if (pcie_root_rcb_set(dev))
+                       hpx->pci_exp_lnkctl_or |= PCI_EXP_LNKCTL_RCB;
+
+               pcie_capability_clear_and_set_word(dev, PCI_EXP_LNKCTL,
+                       ~hpx->pci_exp_lnkctl_and, hpx->pci_exp_lnkctl_or);
+       }
+
+       /* Find Advanced Error Reporting Enhanced Capability */
+       pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+       if (!pos)
+               return;
+
+       /* Initialize Uncorrectable Error Mask Register */
+       pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &reg32);
+       reg32 = (reg32 & hpx->unc_err_mask_and) | hpx->unc_err_mask_or;
+       pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, reg32);
+
+       /* Initialize Uncorrectable Error Severity Register */
+       pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &reg32);
+       reg32 = (reg32 & hpx->unc_err_sever_and) | hpx->unc_err_sever_or;
+       pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, reg32);
+
+       /* Initialize Correctable Error Mask Register */
+       pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &reg32);
+       reg32 = (reg32 & hpx->cor_err_mask_and) | hpx->cor_err_mask_or;
+       pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg32);
+
+       /* Initialize Advanced Error Capabilities and Control Register */
+       pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
+       reg32 = (reg32 & hpx->adv_err_cap_and) | hpx->adv_err_cap_or;
+
+       /* Don't enable ECRC generation or checking if unsupported */
+       if (!(reg32 & PCI_ERR_CAP_ECRC_GENC))
+               reg32 &= ~PCI_ERR_CAP_ECRC_GENE;
+       if (!(reg32 & PCI_ERR_CAP_ECRC_CHKC))
+               reg32 &= ~PCI_ERR_CAP_ECRC_CHKE;
+       pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
+
+       /*
+        * FIXME: The following two registers are not supported yet.
+        *
+        *   o Secondary Uncorrectable Error Severity Register
+        *   o Secondary Uncorrectable Error Mask Register
+        */
+}
+
 static acpi_status decode_type2_hpx_record(union acpi_object *record,
-                                          struct hpp_type2 *hpx2)
+                                          struct hpx_type2 *hpx2)
 {
        int i;
        union acpi_object *fields = record->package.elements;
@@ -213,6 +406,164 @@ static acpi_status decode_type2_hpx_record(union acpi_object *record,
        return AE_OK;
 }
 
+/* _HPX PCI Express Setting Record (Type 3) */
+struct hpx_type3 {
+       u16 device_type;
+       u16 function_type;
+       u16 config_space_location;
+       u16 pci_exp_cap_id;
+       u16 pci_exp_cap_ver;
+       u16 pci_exp_vendor_id;
+       u16 dvsec_id;
+       u16 dvsec_rev;
+       u16 match_offset;
+       u32 match_mask_and;
+       u32 match_value;
+       u16 reg_offset;
+       u32 reg_mask_and;
+       u32 reg_mask_or;
+};
+
+enum hpx_type3_dev_type {
+       HPX_TYPE_ENDPOINT       = BIT(0),
+       HPX_TYPE_LEG_END        = BIT(1),
+       HPX_TYPE_RC_END         = BIT(2),
+       HPX_TYPE_RC_EC          = BIT(3),
+       HPX_TYPE_ROOT_PORT      = BIT(4),
+       HPX_TYPE_UPSTREAM       = BIT(5),
+       HPX_TYPE_DOWNSTREAM     = BIT(6),
+       HPX_TYPE_PCI_BRIDGE     = BIT(7),
+       HPX_TYPE_PCIE_BRIDGE    = BIT(8),
+};
+
+static u16 hpx3_device_type(struct pci_dev *dev)
+{
+       u16 pcie_type = pci_pcie_type(dev);
+       const int pcie_to_hpx3_type[] = {
+               [PCI_EXP_TYPE_ENDPOINT]    = HPX_TYPE_ENDPOINT,
+               [PCI_EXP_TYPE_LEG_END]     = HPX_TYPE_LEG_END,
+               [PCI_EXP_TYPE_RC_END]      = HPX_TYPE_RC_END,
+               [PCI_EXP_TYPE_RC_EC]       = HPX_TYPE_RC_EC,
+               [PCI_EXP_TYPE_ROOT_PORT]   = HPX_TYPE_ROOT_PORT,
+               [PCI_EXP_TYPE_UPSTREAM]    = HPX_TYPE_UPSTREAM,
+               [PCI_EXP_TYPE_DOWNSTREAM]  = HPX_TYPE_DOWNSTREAM,
+               [PCI_EXP_TYPE_PCI_BRIDGE]  = HPX_TYPE_PCI_BRIDGE,
+               [PCI_EXP_TYPE_PCIE_BRIDGE] = HPX_TYPE_PCIE_BRIDGE,
+       };
+
+       if (pcie_type >= ARRAY_SIZE(pcie_to_hpx3_type))
+               return 0;
+
+       return pcie_to_hpx3_type[pcie_type];
+}
+
+enum hpx_type3_fn_type {
+       HPX_FN_NORMAL           = BIT(0),
+       HPX_FN_SRIOV_PHYS       = BIT(1),
+       HPX_FN_SRIOV_VIRT       = BIT(2),
+};
+
+static u8 hpx3_function_type(struct pci_dev *dev)
+{
+       if (dev->is_virtfn)
+               return HPX_FN_SRIOV_VIRT;
+       else if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV) > 0)
+               return HPX_FN_SRIOV_PHYS;
+       else
+               return HPX_FN_NORMAL;
+}
+
+static bool hpx3_cap_ver_matches(u8 pcie_cap_id, u8 hpx3_cap_id)
+{
+       u8 cap_ver = hpx3_cap_id & 0xf;
+
+       if ((hpx3_cap_id & BIT(4)) && cap_ver >= pcie_cap_id)
+               return true;
+       else if (cap_ver == pcie_cap_id)
+               return true;
+
+       return false;
+}
+
+enum hpx_type3_cfg_loc {
+       HPX_CFG_PCICFG          = 0,
+       HPX_CFG_PCIE_CAP        = 1,
+       HPX_CFG_PCIE_CAP_EXT    = 2,
+       HPX_CFG_VEND_CAP        = 3,
+       HPX_CFG_DVSEC           = 4,
+       HPX_CFG_MAX,
+};
+
+static void program_hpx_type3_register(struct pci_dev *dev,
+                                      const struct hpx_type3 *reg)
+{
+       u32 match_reg, write_reg, header, orig_value;
+       u16 pos;
+
+       if (!(hpx3_device_type(dev) & reg->device_type))
+               return;
+
+       if (!(hpx3_function_type(dev) & reg->function_type))
+               return;
+
+       switch (reg->config_space_location) {
+       case HPX_CFG_PCICFG:
+               pos = 0;
+               break;
+       case HPX_CFG_PCIE_CAP:
+               pos = pci_find_capability(dev, reg->pci_exp_cap_id);
+               if (pos == 0)
+                       return;
+
+               break;
+       case HPX_CFG_PCIE_CAP_EXT:
+               pos = pci_find_ext_capability(dev, reg->pci_exp_cap_id);
+               if (pos == 0)
+                       return;
+
+               pci_read_config_dword(dev, pos, &header);
+               if (!hpx3_cap_ver_matches(PCI_EXT_CAP_VER(header),
+                                         reg->pci_exp_cap_ver))
+                       return;
+
+               break;
+       case HPX_CFG_VEND_CAP:  /* Fall through */
+       case HPX_CFG_DVSEC:     /* Fall through */
+       default:
+               pci_warn(dev, "Encountered _HPX type 3 with unsupported config space location");
+               return;
+       }
+
+       pci_read_config_dword(dev, pos + reg->match_offset, &match_reg);
+
+       if ((match_reg & reg->match_mask_and) != reg->match_value)
+               return;
+
+       pci_read_config_dword(dev, pos + reg->reg_offset, &write_reg);
+       orig_value = write_reg;
+       write_reg &= reg->reg_mask_and;
+       write_reg |= reg->reg_mask_or;
+
+       if (orig_value == write_reg)
+               return;
+
+       pci_write_config_dword(dev, pos + reg->reg_offset, write_reg);
+
+       pci_dbg(dev, "Applied _HPX3 at [0x%x]: 0x%08x -> 0x%08x",
+               pos, orig_value, write_reg);
+}
+
+static void program_hpx_type3(struct pci_dev *dev, struct hpx_type3 *hpx)
+{
+       if (!hpx)
+               return;
+
+       if (!pci_is_pcie(dev))
+               return;
+
+       program_hpx_type3_register(dev, hpx);
+}
+
 static void parse_hpx3_register(struct hpx_type3 *hpx3_reg,
                                union acpi_object *reg_fields)
 {
@@ -233,8 +584,7 @@ static void parse_hpx3_register(struct hpx_type3 *hpx3_reg,
 }
 
 static acpi_status program_type3_hpx_record(struct pci_dev *dev,
-                                          union acpi_object *record,
-                                          const struct hotplug_program_ops *hp_ops)
+                                          union acpi_object *record)
 {
        union acpi_object *fields = record->package.elements;
        u32 desc_count, expected_length, revision;
@@ -258,7 +608,7 @@ static acpi_status program_type3_hpx_record(struct pci_dev *dev,
                for (i = 0; i < desc_count; i++) {
                        reg_fields = fields + 3 + i * 14;
                        parse_hpx3_register(&hpx3, reg_fields);
-                       hp_ops->program_type3(dev, &hpx3);
+                       program_hpx_type3(dev, &hpx3);
                }
 
                break;
@@ -271,15 +621,14 @@ static acpi_status program_type3_hpx_record(struct pci_dev *dev,
        return AE_OK;
 }
 
-static acpi_status acpi_run_hpx(struct pci_dev *dev, acpi_handle handle,
-                               const struct hotplug_program_ops *hp_ops)
+static acpi_status acpi_run_hpx(struct pci_dev *dev, acpi_handle handle)
 {
        acpi_status status;
        struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
        union acpi_object *package, *record, *fields;
-       struct hpp_type0 hpx0;
-       struct hpp_type1 hpx1;
-       struct hpp_type2 hpx2;
+       struct hpx_type0 hpx0;
+       struct hpx_type1 hpx1;
+       struct hpx_type2 hpx2;
        u32 type;
        int i;
 
@@ -314,24 +663,24 @@ static acpi_status acpi_run_hpx(struct pci_dev *dev, acpi_handle handle,
                        status = decode_type0_hpx_record(record, &hpx0);
                        if (ACPI_FAILURE(status))
                                goto exit;
-                       hp_ops->program_type0(dev, &hpx0);
+                       program_hpx_type0(dev, &hpx0);
                        break;
                case 1:
                        memset(&hpx1, 0, sizeof(hpx1));
                        status = decode_type1_hpx_record(record, &hpx1);
                        if (ACPI_FAILURE(status))
                                goto exit;
-                       hp_ops->program_type1(dev, &hpx1);
+                       program_hpx_type1(dev, &hpx1);
                        break;
                case 2:
                        memset(&hpx2, 0, sizeof(hpx2));
                        status = decode_type2_hpx_record(record, &hpx2);
                        if (ACPI_FAILURE(status))
                                goto exit;
-                       hp_ops->program_type2(dev, &hpx2);
+                       program_hpx_type2(dev, &hpx2);
                        break;
                case 3:
-                       status = program_type3_hpx_record(dev, record, hp_ops);
+                       status = program_type3_hpx_record(dev, record);
                        if (ACPI_FAILURE(status))
                                goto exit;
                        break;
@@ -347,16 +696,15 @@ static acpi_status acpi_run_hpx(struct pci_dev *dev, acpi_handle handle,
        return status;
 }
 
-static acpi_status acpi_run_hpp(struct pci_dev *dev, acpi_handle handle,
-                               const struct hotplug_program_ops *hp_ops)
+static acpi_status acpi_run_hpp(struct pci_dev *dev, acpi_handle handle)
 {
        acpi_status status;
        struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
        union acpi_object *package, *fields;
-       struct hpp_type0 hpp0;
+       struct hpx_type0 hpx0;
        int i;
 
-       memset(&hpp0, 0, sizeof(hpp0));
+       memset(&hpx0, 0, sizeof(hpx0));
 
        status = acpi_evaluate_object(handle, "_HPP", NULL, &buffer);
        if (ACPI_FAILURE(status))
@@ -377,26 +725,24 @@ static acpi_status acpi_run_hpp(struct pci_dev *dev, acpi_handle handle,
                }
        }
 
-       hpp0.revision        = 1;
-       hpp0.cache_line_size = fields[0].integer.value;
-       hpp0.latency_timer   = fields[1].integer.value;
-       hpp0.enable_serr     = fields[2].integer.value;
-       hpp0.enable_perr     = fields[3].integer.value;
+       hpx0.revision        = 1;
+       hpx0.cache_line_size = fields[0].integer.value;
+       hpx0.latency_timer   = fields[1].integer.value;
+       hpx0.enable_serr     = fields[2].integer.value;
+       hpx0.enable_perr     = fields[3].integer.value;
 
-       hp_ops->program_type0(dev, &hpp0);
+       program_hpx_type0(dev, &hpx0);
 
 exit:
        kfree(buffer.pointer);
        return status;
 }
 
-/* pci_get_hp_params
+/* pci_acpi_program_hp_params
  *
  * @dev - the pci_dev for which we want parameters
- * @hpp - allocated by the caller
  */
-int pci_acpi_program_hp_params(struct pci_dev *dev,
-                              const struct hotplug_program_ops *hp_ops)
+int pci_acpi_program_hp_params(struct pci_dev *dev)
 {
        acpi_status status;
        acpi_handle handle, phandle;
@@ -419,10 +765,10 @@ int pci_acpi_program_hp_params(struct pci_dev *dev,
         * this pci dev.
         */
        while (handle) {
-               status = acpi_run_hpx(dev, handle, hp_ops);
+               status = acpi_run_hpx(dev, handle);
                if (ACPI_SUCCESS(status))
                        return 0;
-               status = acpi_run_hpp(dev, handle, hp_ops);
+               status = acpi_run_hpp(dev, handle);
                if (ACPI_SUCCESS(status))
                        return 0;
                if (acpi_is_root_bridge(handle))
index 06083b8..5fd9010 100644 (file)
@@ -38,7 +38,7 @@ struct pci_bridge_reg_behavior {
        u32 rsvd;
 };
 
-const static struct pci_bridge_reg_behavior pci_regs_behavior[] = {
+static const struct pci_bridge_reg_behavior pci_regs_behavior[] = {
        [PCI_VENDOR_ID / 4] = { .ro = ~0 },
        [PCI_COMMAND / 4] = {
                .rw = (PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
@@ -173,7 +173,7 @@ const static struct pci_bridge_reg_behavior pci_regs_behavior[] = {
        },
 };
 
-const static struct pci_bridge_reg_behavior pcie_cap_regs_behavior[] = {
+static const struct pci_bridge_reg_behavior pcie_cap_regs_behavior[] = {
        [PCI_CAP_LIST_ID / 4] = {
                /*
                 * Capability ID, Next Capability Pointer and
index 965c721..7934129 100644 (file)
@@ -464,9 +464,7 @@ static ssize_t dev_rescan_store(struct device *dev,
        }
        return count;
 }
-static struct device_attribute dev_rescan_attr = __ATTR(rescan,
-                                                       (S_IWUSR|S_IWGRP),
-                                                       NULL, dev_rescan_store);
+static DEVICE_ATTR_WO(dev_rescan);
 
 static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
                            const char *buf, size_t count)
@@ -480,13 +478,12 @@ static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
                pci_stop_and_remove_bus_device_locked(to_pci_dev(dev));
        return count;
 }
-static struct device_attribute dev_remove_attr = __ATTR_IGNORE_LOCKDEP(remove,
-                                                       (S_IWUSR|S_IWGRP),
-                                                       NULL, remove_store);
+static DEVICE_ATTR_IGNORE_LOCKDEP(remove, 0220, NULL,
+                                 remove_store);
 
-static ssize_t dev_bus_rescan_store(struct device *dev,
-                                   struct device_attribute *attr,
-                                   const char *buf, size_t count)
+static ssize_t bus_rescan_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
 {
        unsigned long val;
        struct pci_bus *bus = to_pci_bus(dev);
@@ -504,7 +501,7 @@ static ssize_t dev_bus_rescan_store(struct device *dev,
        }
        return count;
 }
-static DEVICE_ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_bus_rescan_store);
+static DEVICE_ATTR_WO(bus_rescan);
 
 #if defined(CONFIG_PM) && defined(CONFIG_ACPI)
 static ssize_t d3cold_allowed_store(struct device *dev,
@@ -551,154 +548,6 @@ static ssize_t devspec_show(struct device *dev,
 static DEVICE_ATTR_RO(devspec);
 #endif
 
-#ifdef CONFIG_PCI_IOV
-static ssize_t sriov_totalvfs_show(struct device *dev,
-                                  struct device_attribute *attr,
-                                  char *buf)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-
-       return sprintf(buf, "%u\n", pci_sriov_get_totalvfs(pdev));
-}
-
-
-static ssize_t sriov_numvfs_show(struct device *dev,
-                                struct device_attribute *attr,
-                                char *buf)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-
-       return sprintf(buf, "%u\n", pdev->sriov->num_VFs);
-}
-
-/*
- * num_vfs > 0; number of VFs to enable
- * num_vfs = 0; disable all VFs
- *
- * Note: SRIOV spec doesn't allow partial VF
- *       disable, so it's all or none.
- */
-static ssize_t sriov_numvfs_store(struct device *dev,
-                                 struct device_attribute *attr,
-                                 const char *buf, size_t count)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-       int ret;
-       u16 num_vfs;
-
-       ret = kstrtou16(buf, 0, &num_vfs);
-       if (ret < 0)
-               return ret;
-
-       if (num_vfs > pci_sriov_get_totalvfs(pdev))
-               return -ERANGE;
-
-       device_lock(&pdev->dev);
-
-       if (num_vfs == pdev->sriov->num_VFs)
-               goto exit;
-
-       /* is PF driver loaded w/callback */
-       if (!pdev->driver || !pdev->driver->sriov_configure) {
-               pci_info(pdev, "Driver doesn't support SRIOV configuration via sysfs\n");
-               ret = -ENOENT;
-               goto exit;
-       }
-
-       if (num_vfs == 0) {
-               /* disable VFs */
-               ret = pdev->driver->sriov_configure(pdev, 0);
-               goto exit;
-       }
-
-       /* enable VFs */
-       if (pdev->sriov->num_VFs) {
-               pci_warn(pdev, "%d VFs already enabled. Disable before enabling %d VFs\n",
-                        pdev->sriov->num_VFs, num_vfs);
-               ret = -EBUSY;
-               goto exit;
-       }
-
-       ret = pdev->driver->sriov_configure(pdev, num_vfs);
-       if (ret < 0)
-               goto exit;
-
-       if (ret != num_vfs)
-               pci_warn(pdev, "%d VFs requested; only %d enabled\n",
-                        num_vfs, ret);
-
-exit:
-       device_unlock(&pdev->dev);
-
-       if (ret < 0)
-               return ret;
-
-       return count;
-}
-
-static ssize_t sriov_offset_show(struct device *dev,
-                                struct device_attribute *attr,
-                                char *buf)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-
-       return sprintf(buf, "%u\n", pdev->sriov->offset);
-}
-
-static ssize_t sriov_stride_show(struct device *dev,
-                                struct device_attribute *attr,
-                                char *buf)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-
-       return sprintf(buf, "%u\n", pdev->sriov->stride);
-}
-
-static ssize_t sriov_vf_device_show(struct device *dev,
-                                   struct device_attribute *attr,
-                                   char *buf)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-
-       return sprintf(buf, "%x\n", pdev->sriov->vf_device);
-}
-
-static ssize_t sriov_drivers_autoprobe_show(struct device *dev,
-                                           struct device_attribute *attr,
-                                           char *buf)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-
-       return sprintf(buf, "%u\n", pdev->sriov->drivers_autoprobe);
-}
-
-static ssize_t sriov_drivers_autoprobe_store(struct device *dev,
-                                            struct device_attribute *attr,
-                                            const char *buf, size_t count)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-       bool drivers_autoprobe;
-
-       if (kstrtobool(buf, &drivers_autoprobe) < 0)
-               return -EINVAL;
-
-       pdev->sriov->drivers_autoprobe = drivers_autoprobe;
-
-       return count;
-}
-
-static struct device_attribute sriov_totalvfs_attr = __ATTR_RO(sriov_totalvfs);
-static struct device_attribute sriov_numvfs_attr =
-               __ATTR(sriov_numvfs, (S_IRUGO|S_IWUSR|S_IWGRP),
-                      sriov_numvfs_show, sriov_numvfs_store);
-static struct device_attribute sriov_offset_attr = __ATTR_RO(sriov_offset);
-static struct device_attribute sriov_stride_attr = __ATTR_RO(sriov_stride);
-static struct device_attribute sriov_vf_device_attr = __ATTR_RO(sriov_vf_device);
-static struct device_attribute sriov_drivers_autoprobe_attr =
-               __ATTR(sriov_drivers_autoprobe, (S_IRUGO|S_IWUSR|S_IWGRP),
-                      sriov_drivers_autoprobe_show, sriov_drivers_autoprobe_store);
-#endif /* CONFIG_PCI_IOV */
-
 static ssize_t driver_override_store(struct device *dev,
                                     struct device_attribute *attr,
                                     const char *buf, size_t count)
@@ -792,7 +641,7 @@ static struct attribute *pcie_dev_attrs[] = {
 };
 
 static struct attribute *pcibus_attrs[] = {
-       &dev_attr_rescan.attr,
+       &dev_attr_bus_rescan.attr,
        &dev_attr_cpuaffinity.attr,
        &dev_attr_cpulistaffinity.attr,
        NULL,
@@ -820,7 +669,7 @@ static ssize_t boot_vga_show(struct device *dev, struct device_attribute *attr,
                !!(pdev->resource[PCI_ROM_RESOURCE].flags &
                   IORESOURCE_ROM_SHADOW));
 }
-static struct device_attribute vga_attr = __ATTR_RO(boot_vga);
+static DEVICE_ATTR_RO(boot_vga);
 
 static ssize_t pci_read_config(struct file *filp, struct kobject *kobj,
                               struct bin_attribute *bin_attr, char *buf,
@@ -906,6 +755,11 @@ static ssize_t pci_write_config(struct file *filp, struct kobject *kobj,
        unsigned int size = count;
        loff_t init_off = off;
        u8 *data = (u8 *) buf;
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+       if (ret)
+               return ret;
 
        if (off > dev->cfg_size)
                return 0;
@@ -1085,7 +939,7 @@ void pci_create_legacy_files(struct pci_bus *b)
        sysfs_bin_attr_init(b->legacy_io);
        b->legacy_io->attr.name = "legacy_io";
        b->legacy_io->size = 0xffff;
-       b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
+       b->legacy_io->attr.mode = 0600;
        b->legacy_io->read = pci_read_legacy_io;
        b->legacy_io->write = pci_write_legacy_io;
        b->legacy_io->mmap = pci_mmap_legacy_io;
@@ -1099,7 +953,7 @@ void pci_create_legacy_files(struct pci_bus *b)
        sysfs_bin_attr_init(b->legacy_mem);
        b->legacy_mem->attr.name = "legacy_mem";
        b->legacy_mem->size = 1024*1024;
-       b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
+       b->legacy_mem->attr.mode = 0600;
        b->legacy_mem->mmap = pci_mmap_legacy_mem;
        pci_adjust_legacy_attr(b, pci_mmap_mem);
        error = device_create_bin_file(&b->dev, b->legacy_mem);
@@ -1167,6 +1021,11 @@ static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
        int bar = (unsigned long)attr->private;
        enum pci_mmap_state mmap_type;
        struct resource *res = &pdev->resource[bar];
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+       if (ret)
+               return ret;
 
        if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start))
                return -EINVAL;
@@ -1243,6 +1102,12 @@ static ssize_t pci_write_resource_io(struct file *filp, struct kobject *kobj,
                                     struct bin_attribute *attr, char *buf,
                                     loff_t off, size_t count)
 {
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+       if (ret)
+               return ret;
+
        return pci_resource_io(filp, kobj, attr, buf, off, count, true);
 }
 
@@ -1306,7 +1171,7 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine)
                }
        }
        res_attr->attr.name = res_attr_name;
-       res_attr->attr.mode = S_IRUSR | S_IWUSR;
+       res_attr->attr.mode = 0600;
        res_attr->size = pci_resource_len(pdev, num);
        res_attr->private = (void *)(unsigned long)num;
        retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
@@ -1419,7 +1284,7 @@ static ssize_t pci_read_rom(struct file *filp, struct kobject *kobj,
 static const struct bin_attribute pci_config_attr = {
        .attr = {
                .name = "config",
-               .mode = S_IRUGO | S_IWUSR,
+               .mode = 0644,
        },
        .size = PCI_CFG_SPACE_SIZE,
        .read = pci_read_config,
@@ -1429,7 +1294,7 @@ static const struct bin_attribute pci_config_attr = {
 static const struct bin_attribute pcie_config_attr = {
        .attr = {
                .name = "config",
-               .mode = S_IRUGO | S_IWUSR,
+               .mode = 0644,
        },
        .size = PCI_CFG_SPACE_EXP_SIZE,
        .read = pci_read_config,
@@ -1458,7 +1323,7 @@ static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
        return count;
 }
 
-static struct device_attribute reset_attr = __ATTR(reset, 0200, NULL, reset_store);
+static DEVICE_ATTR(reset, 0200, NULL, reset_store);
 
 static int pci_create_capabilities_sysfs(struct pci_dev *dev)
 {
@@ -1468,7 +1333,7 @@ static int pci_create_capabilities_sysfs(struct pci_dev *dev)
        pcie_aspm_create_sysfs_dev_files(dev);
 
        if (dev->reset_fn) {
-               retval = device_create_file(&dev->dev, &reset_attr);
+               retval = device_create_file(&dev->dev, &dev_attr_reset);
                if (retval)
                        goto error;
        }
@@ -1511,7 +1376,7 @@ int __must_check pci_create_sysfs_dev_files(struct pci_dev *pdev)
                sysfs_bin_attr_init(attr);
                attr->size = rom_size;
                attr->attr.name = "rom";
-               attr->attr.mode = S_IRUSR | S_IWUSR;
+               attr->attr.mode = 0600;
                attr->read = pci_read_rom;
                attr->write = pci_write_rom;
                retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
@@ -1553,7 +1418,7 @@ static void pci_remove_capabilities_sysfs(struct pci_dev *dev)
        pcie_vpd_remove_sysfs_dev_files(dev);
        pcie_aspm_remove_sysfs_dev_files(dev);
        if (dev->reset_fn) {
-               device_remove_file(&dev->dev, &reset_attr);
+               device_remove_file(&dev->dev, &dev_attr_reset);
                dev->reset_fn = 0;
        }
 }
@@ -1606,7 +1471,7 @@ static int __init pci_sysfs_init(void)
 late_initcall(pci_sysfs_init);
 
 static struct attribute *pci_dev_dev_attrs[] = {
-       &vga_attr.attr,
+       &dev_attr_boot_vga.attr,
        NULL,
 };
 
@@ -1616,7 +1481,7 @@ static umode_t pci_dev_attrs_are_visible(struct kobject *kobj,
        struct device *dev = kobj_to_dev(kobj);
        struct pci_dev *pdev = to_pci_dev(dev);
 
-       if (a == &vga_attr.attr)
+       if (a == &dev_attr_boot_vga.attr)
                if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
                        return 0;
 
@@ -1624,8 +1489,8 @@ static umode_t pci_dev_attrs_are_visible(struct kobject *kobj,
 }
 
 static struct attribute *pci_dev_hp_attrs[] = {
-       &dev_remove_attr.attr,
-       &dev_rescan_attr.attr,
+       &dev_attr_remove.attr,
+       &dev_attr_dev_rescan.attr,
        NULL,
 };
 
@@ -1697,34 +1562,6 @@ static const struct attribute_group pci_dev_hp_attr_group = {
        .is_visible = pci_dev_hp_attrs_are_visible,
 };
 
-#ifdef CONFIG_PCI_IOV
-static struct attribute *sriov_dev_attrs[] = {
-       &sriov_totalvfs_attr.attr,
-       &sriov_numvfs_attr.attr,
-       &sriov_offset_attr.attr,
-       &sriov_stride_attr.attr,
-       &sriov_vf_device_attr.attr,
-       &sriov_drivers_autoprobe_attr.attr,
-       NULL,
-};
-
-static umode_t sriov_attrs_are_visible(struct kobject *kobj,
-                                      struct attribute *a, int n)
-{
-       struct device *dev = kobj_to_dev(kobj);
-
-       if (!dev_is_pf(dev))
-               return 0;
-
-       return a->mode;
-}
-
-static const struct attribute_group sriov_dev_attr_group = {
-       .attrs = sriov_dev_attrs,
-       .is_visible = sriov_attrs_are_visible,
-};
-#endif /* CONFIG_PCI_IOV */
-
 static const struct attribute_group pci_dev_attr_group = {
        .attrs = pci_dev_dev_attrs,
        .is_visible = pci_dev_attrs_are_visible,
index 1b27b5a..a97e257 100644 (file)
@@ -890,8 +890,8 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
 
        pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
        dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
-       if (dev->current_state != state && printk_ratelimit())
-               pci_info(dev, "Refused to change power state, currently in D%d\n",
+       if (dev->current_state != state)
+               pci_info_ratelimited(dev, "Refused to change power state, currently in D%d\n",
                         dev->current_state);
 
        /*
@@ -958,19 +958,6 @@ void pci_refresh_power_state(struct pci_dev *dev)
        pci_update_current_state(dev, dev->current_state);
 }
 
-/**
- * pci_power_up - Put the given device into D0 forcibly
- * @dev: PCI device to power up
- */
-void pci_power_up(struct pci_dev *dev)
-{
-       if (platform_pci_power_manageable(dev))
-               platform_pci_set_power_state(dev, PCI_D0);
-
-       pci_raw_set_power_state(dev, PCI_D0);
-       pci_update_current_state(dev, PCI_D0);
-}
-
 /**
  * pci_platform_power_transition - Use platform to change device power state
  * @dev: PCI device to handle.
@@ -1153,6 +1140,17 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
 }
 EXPORT_SYMBOL(pci_set_power_state);
 
+/**
+ * pci_power_up - Put the given device into D0 forcibly
+ * @dev: PCI device to power up
+ */
+void pci_power_up(struct pci_dev *dev)
+{
+       __pci_start_power_transition(dev, PCI_D0);
+       pci_raw_set_power_state(dev, PCI_D0);
+       pci_update_current_state(dev, PCI_D0);
+}
+
 /**
  * pci_choose_state - Choose the power state of a PCI device
  * @dev: PCI device to be suspended
@@ -1443,7 +1441,7 @@ static void pci_restore_rebar_state(struct pci_dev *pdev)
                pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
                bar_idx = ctrl & PCI_REBAR_CTRL_BAR_IDX;
                res = pdev->resource + bar_idx;
-               size = order_base_2((resource_size(res) >> 20) | 1) - 1;
+               size = ilog2(resource_size(res)) - 20;
                ctrl &= ~PCI_REBAR_CTRL_BAR_SIZE;
                ctrl |= size << PCI_REBAR_CTRL_BAR_SHIFT;
                pci_write_config_dword(pdev, pos + PCI_REBAR_CTRL, ctrl);
@@ -3581,7 +3579,7 @@ int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask)
                }
 
                /* Ensure upstream ports don't block AtomicOps on egress */
-               if (!bridge->has_secondary_link) {
+               if (pci_pcie_type(bridge) == PCI_EXP_TYPE_UPSTREAM) {
                        pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2,
                                                   &ctl2);
                        if (ctl2 & PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK)
@@ -5923,8 +5921,19 @@ resource_size_t __weak pcibios_default_alignment(void)
        return 0;
 }
 
-#define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
-static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
+/*
+ * Arches that don't want to expose struct resource to userland as-is in
+ * sysfs and /proc can implement their own pci_resource_to_user().
+ */
+void __weak pci_resource_to_user(const struct pci_dev *dev, int bar,
+                                const struct resource *rsrc,
+                                resource_size_t *start, resource_size_t *end)
+{
+       *start = rsrc->start;
+       *end = rsrc->end;
+}
+
+static char *resource_alignment_param;
 static DEFINE_SPINLOCK(resource_alignment_lock);
 
 /**
@@ -5945,7 +5954,7 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
 
        spin_lock(&resource_alignment_lock);
        p = resource_alignment_param;
-       if (!*p && !align)
+       if (!p || !*p)
                goto out;
        if (pci_has_flag(PCI_PROBE_ONLY)) {
                align = 0;
@@ -6109,35 +6118,41 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev)
        }
 }
 
-static ssize_t pci_set_resource_alignment_param(const char *buf, size_t count)
+static ssize_t resource_alignment_show(struct bus_type *bus, char *buf)
 {
-       if (count > RESOURCE_ALIGNMENT_PARAM_SIZE - 1)
-               count = RESOURCE_ALIGNMENT_PARAM_SIZE - 1;
-       spin_lock(&resource_alignment_lock);
-       strncpy(resource_alignment_param, buf, count);
-       resource_alignment_param[count] = '\0';
-       spin_unlock(&resource_alignment_lock);
-       return count;
-}
+       size_t count = 0;
 
-static ssize_t pci_get_resource_alignment_param(char *buf, size_t size)
-{
-       size_t count;
        spin_lock(&resource_alignment_lock);
-       count = snprintf(buf, size, "%s", resource_alignment_param);
+       if (resource_alignment_param)
+               count = snprintf(buf, PAGE_SIZE, "%s", resource_alignment_param);
        spin_unlock(&resource_alignment_lock);
-       return count;
-}
 
-static ssize_t resource_alignment_show(struct bus_type *bus, char *buf)
-{
-       return pci_get_resource_alignment_param(buf, PAGE_SIZE);
+       /*
+        * When set by the command line, resource_alignment_param will not
+        * have a trailing line feed, which is ugly. So conditionally add
+        * it here.
+        */
+       if (count >= 2 && buf[count - 2] != '\n' && count < PAGE_SIZE - 1) {
+               buf[count - 1] = '\n';
+               buf[count++] = 0;
+       }
+
+       return count;
 }
 
 static ssize_t resource_alignment_store(struct bus_type *bus,
                                        const char *buf, size_t count)
 {
-       return pci_set_resource_alignment_param(buf, count);
+       char *param = kstrndup(buf, count, GFP_KERNEL);
+
+       if (!param)
+               return -ENOMEM;
+
+       spin_lock(&resource_alignment_lock);
+       kfree(resource_alignment_param);
+       resource_alignment_param = param;
+       spin_unlock(&resource_alignment_lock);
+       return count;
 }
 
 static BUS_ATTR_RW(resource_alignment);
@@ -6266,8 +6281,7 @@ static int __init pci_setup(char *str)
                        } else if (!strncmp(str, "cbmemsize=", 10)) {
                                pci_cardbus_mem_size = memparse(str + 10, &str);
                        } else if (!strncmp(str, "resource_alignment=", 19)) {
-                               pci_set_resource_alignment_param(str + 19,
-                                                       strlen(str + 19));
+                               resource_alignment_param = str + 19;
                        } else if (!strncmp(str, "ecrc=", 5)) {
                                pcie_ecrc_get_policy(str + 5);
                        } else if (!strncmp(str, "hpiosize=", 9)) {
@@ -6302,15 +6316,18 @@ static int __init pci_setup(char *str)
 early_param("pci", pci_setup);
 
 /*
- * 'disable_acs_redir_param' is initialized in pci_setup(), above, to point
- * to data in the __initdata section which will be freed after the init
- * sequence is complete. We can't allocate memory in pci_setup() because some
- * architectures do not have any memory allocation service available during
- * an early_param() call. So we allocate memory and copy the variable here
- * before the init section is freed.
+ * 'resource_alignment_param' and 'disable_acs_redir_param' are initialized
+ * in pci_setup(), above, to point to data in the __initdata section which
+ * will be freed after the init sequence is complete. We can't allocate memory
+ * in pci_setup() because some architectures do not have any memory allocation
+ * service available during an early_param() call. So we allocate memory and
+ * copy the variable here before the init section is freed.
+ *
  */
 static int __init pci_realloc_setup_params(void)
 {
+       resource_alignment_param = kstrdup(resource_alignment_param,
+                                          GFP_KERNEL);
        disable_acs_redir_param = kstrdup(disable_acs_redir_param, GFP_KERNEL);
 
        return 0;
index d22d1b8..3f6947e 100644 (file)
@@ -39,6 +39,11 @@ int pci_probe_reset_function(struct pci_dev *dev);
 int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
 int pci_bus_error_reset(struct pci_dev *dev);
 
+#define PCI_PM_D2_DELAY         200
+#define PCI_PM_D3_WAIT          10
+#define PCI_PM_D3COLD_WAIT      100
+#define PCI_PM_BUS_WAIT         50
+
 /**
  * struct pci_platform_pm_ops - Firmware PM callbacks
  *
@@ -84,6 +89,8 @@ void pci_power_up(struct pci_dev *dev);
 void pci_disable_enabled_device(struct pci_dev *dev);
 int pci_finish_runtime_suspend(struct pci_dev *dev);
 void pcie_clear_root_pme_status(struct pci_dev *dev);
+bool pci_check_pme_status(struct pci_dev *dev);
+void pci_pme_wakeup_bus(struct pci_bus *bus);
 int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
 void pci_pme_restore(struct pci_dev *dev);
 bool pci_dev_need_resume(struct pci_dev *dev);
@@ -118,11 +125,25 @@ static inline bool pci_power_manageable(struct pci_dev *pci_dev)
        return !pci_has_subordinate(pci_dev) || pci_dev->bridge_d3;
 }
 
+static inline bool pcie_downstream_port(const struct pci_dev *dev)
+{
+       int type = pci_pcie_type(dev);
+
+       return type == PCI_EXP_TYPE_ROOT_PORT ||
+              type == PCI_EXP_TYPE_DOWNSTREAM ||
+              type == PCI_EXP_TYPE_PCIE_BRIDGE;
+}
+
 int pci_vpd_init(struct pci_dev *dev);
 void pci_vpd_release(struct pci_dev *dev);
 void pcie_vpd_create_sysfs_dev_files(struct pci_dev *dev);
 void pcie_vpd_remove_sysfs_dev_files(struct pci_dev *dev);
 
+/* PCI Virtual Channel */
+int pci_save_vc_state(struct pci_dev *dev);
+void pci_restore_vc_state(struct pci_dev *dev);
+void pci_allocate_vc_save_buffers(struct pci_dev *dev);
+
 /* PCI /proc functions */
 #ifdef CONFIG_PROC_FS
 int pci_proc_attach_device(struct pci_dev *dev);
@@ -196,6 +217,9 @@ extern const struct attribute_group *pcibus_groups[];
 extern const struct device_type pci_dev_type;
 extern const struct attribute_group *pci_bus_groups[];
 
+extern unsigned long pci_hotplug_io_size;
+extern unsigned long pci_hotplug_mem_size;
+extern unsigned long pci_hotplug_bus_size;
 
 /**
  * pci_match_one_device - Tell if a PCI device structure has a matching
@@ -236,6 +260,9 @@ enum pci_bar_type {
        pci_bar_mem64,          /* A 64-bit memory BAR */
 };
 
+struct device *pci_get_host_bridge_device(struct pci_dev *dev);
+void pci_put_host_bridge_device(struct device *dev);
+
 int pci_configure_extended_tags(struct pci_dev *dev, void *ign);
 bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl,
                                int crs_timeout);
@@ -256,6 +283,8 @@ bool pci_bus_clip_resource(struct pci_dev *dev, int idx);
 
 void pci_reassigndev_resource_alignment(struct pci_dev *dev);
 void pci_disable_bridge_window(struct pci_dev *dev);
+struct pci_bus *pci_bus_get(struct pci_bus *bus);
+void pci_bus_put(struct pci_bus *bus);
 
 /* PCIe link information */
 #define PCIE_SPEED2STR(speed) \
@@ -279,6 +308,7 @@ u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed,
                           enum pcie_link_width *width);
 void __pcie_print_link_status(struct pci_dev *dev, bool verbose);
 void pcie_report_downtraining(struct pci_dev *dev);
+void pcie_update_link_speed(struct pci_bus *bus, u16 link_status);
 
 /* Single Root I/O Virtualization */
 struct pci_sriov {
@@ -418,11 +448,12 @@ static inline void pci_restore_dpc_state(struct pci_dev *dev) {}
 #endif
 
 #ifdef CONFIG_PCI_ATS
+/* Address Translation Service */
+void pci_ats_init(struct pci_dev *dev);
 void pci_restore_ats_state(struct pci_dev *dev);
 #else
-static inline void pci_restore_ats_state(struct pci_dev *dev)
-{
-}
+static inline void pci_ats_init(struct pci_dev *d) { }
+static inline void pci_restore_ats_state(struct pci_dev *dev) { }
 #endif /* CONFIG_PCI_ATS */
 
 #ifdef CONFIG_PCI_IOV
@@ -433,7 +464,7 @@ void pci_iov_update_resource(struct pci_dev *dev, int resno);
 resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno);
 void pci_restore_iov_state(struct pci_dev *dev);
 int pci_iov_bus_range(struct pci_bus *bus);
-
+extern const struct attribute_group sriov_dev_attr_group;
 #else
 static inline int pci_iov_init(struct pci_dev *dev)
 {
@@ -518,10 +549,21 @@ static inline void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev) { }
 static inline void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev) { }
 #endif
 
+#ifdef CONFIG_PCIE_ECRC
+void pcie_set_ecrc_checking(struct pci_dev *dev);
+void pcie_ecrc_get_policy(char *str);
+#else
+static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
+static inline void pcie_ecrc_get_policy(char *str) { }
+#endif
+
 #ifdef CONFIG_PCIE_PTM
 void pci_ptm_init(struct pci_dev *dev);
+int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
 #else
 static inline void pci_ptm_init(struct pci_dev *dev) { }
+static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
+{ return -EINVAL; }
 #endif
 
 struct pci_dev_reset_methods {
@@ -558,6 +600,10 @@ struct device_node;
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
 int of_get_pci_domain_nr(struct device_node *node);
 int of_pci_get_max_link_speed(struct device_node *node);
+void pci_set_of_node(struct pci_dev *dev);
+void pci_release_of_node(struct pci_dev *dev);
+void pci_set_bus_of_node(struct pci_bus *bus);
+void pci_release_bus_of_node(struct pci_bus *bus);
 
 #else
 static inline int
@@ -577,6 +623,11 @@ of_pci_get_max_link_speed(struct device_node *node)
 {
        return -EINVAL;
 }
+
+static inline void pci_set_of_node(struct pci_dev *dev) { }
+static inline void pci_release_of_node(struct pci_dev *dev) { }
+static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
+static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
 #endif /* CONFIG_OF */
 
 #if defined(CONFIG_OF_ADDRESS)
@@ -607,4 +658,13 @@ static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { }
 static inline void pci_aer_clear_device_status(struct pci_dev *dev) { }
 #endif
 
+#ifdef CONFIG_ACPI
+int pci_acpi_program_hp_params(struct pci_dev *dev);
+#else
+static inline int pci_acpi_program_hp_params(struct pci_dev *dev)
+{
+       return -ENODEV;
+}
+#endif
+
 #endif /* DRIVERS_PCI_H */
index 464f8f9..652ef23 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/slab.h>
 #include <linux/jiffies.h>
 #include <linux/delay.h>
-#include <linux/pci-aspm.h>
 #include "../pci.h"
 
 #ifdef MODULE_PARAM_PREFIX
@@ -913,10 +912,10 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
 
        /*
         * We allocate pcie_link_state for the component on the upstream
-        * end of a Link, so there's nothing to do unless this device has a
-        * Link on its secondary side.
+        * end of a Link, so there's nothing to do unless this device is
+        * downstream port.
         */
-       if (!pdev->has_secondary_link)
+       if (!pcie_downstream_port(pdev))
                return;
 
        /* VIA has a strange chipset, root port is under a bridge */
@@ -1070,7 +1069,7 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
        if (!pci_is_pcie(pdev))
                return 0;
 
-       if (pdev->has_secondary_link)
+       if (pcie_downstream_port(pdev))
                parent = pdev;
        if (!parent || !parent->link_state)
                return -EINVAL;
index 773197a..b0e6048 100644 (file)
@@ -166,7 +166,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
        driver = pcie_port_find_service(dev, service);
        if (driver && driver->reset_link) {
                status = driver->reset_link(dev);
-       } else if (dev->has_secondary_link) {
+       } else if (pcie_downstream_port(dev)) {
                status = default_reset_link(dev);
        } else {
                pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
index dbeeb38..3d5271a 100644 (file)
@@ -1426,26 +1426,38 @@ void set_pcie_port_type(struct pci_dev *pdev)
        pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, &reg16);
        pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD;
 
+       parent = pci_upstream_bridge(pdev);
+       if (!parent)
+               return;
+
        /*
-        * A Root Port or a PCI-to-PCIe bridge is always the upstream end
-        * of a Link.  No PCIe component has two Links.  Two Links are
-        * connected by a Switch that has a Port on each Link and internal
-        * logic to connect the two Ports.
+        * Some systems do not identify their upstream/downstream ports
+        * correctly so detect impossible configurations here and correct
+        * the port type accordingly.
         */
        type = pci_pcie_type(pdev);
-       if (type == PCI_EXP_TYPE_ROOT_PORT ||
-           type == PCI_EXP_TYPE_PCIE_BRIDGE)
-               pdev->has_secondary_link = 1;
-       else if (type == PCI_EXP_TYPE_UPSTREAM ||
-                type == PCI_EXP_TYPE_DOWNSTREAM) {
-               parent = pci_upstream_bridge(pdev);
-
+       if (type == PCI_EXP_TYPE_DOWNSTREAM) {
                /*
-                * Usually there's an upstream device (Root Port or Switch
-                * Downstream Port), but we can't assume one exists.
+                * If pdev claims to be downstream port but the parent
+                * device is also downstream port assume pdev is actually
+                * upstream port.
                 */
-               if (parent && !parent->has_secondary_link)
-                       pdev->has_secondary_link = 1;
+               if (pcie_downstream_port(parent)) {
+                       pci_info(pdev, "claims to be downstream port but is acting as upstream port, correcting type\n");
+                       pdev->pcie_flags_reg &= ~PCI_EXP_FLAGS_TYPE;
+                       pdev->pcie_flags_reg |= PCI_EXP_TYPE_UPSTREAM;
+               }
+       } else if (type == PCI_EXP_TYPE_UPSTREAM) {
+               /*
+                * If pdev claims to be upstream port but the parent
+                * device is also upstream port assume pdev is actually
+                * downstream port.
+                */
+               if (pci_pcie_type(parent) == PCI_EXP_TYPE_UPSTREAM) {
+                       pci_info(pdev, "claims to be upstream port but is acting as downstream port, correcting type\n");
+                       pdev->pcie_flags_reg &= ~PCI_EXP_FLAGS_TYPE;
+                       pdev->pcie_flags_reg |= PCI_EXP_TYPE_DOWNSTREAM;
+               }
        }
 }
 
@@ -1915,275 +1927,6 @@ static void pci_configure_mps(struct pci_dev *dev)
                 p_mps, mps, mpss);
 }
 
-static struct hpp_type0 pci_default_type0 = {
-       .revision = 1,
-       .cache_line_size = 8,
-       .latency_timer = 0x40,
-       .enable_serr = 0,
-       .enable_perr = 0,
-};
-
-static void program_hpp_type0(struct pci_dev *dev, struct hpp_type0 *hpp)
-{
-       u16 pci_cmd, pci_bctl;
-
-       if (!hpp)
-               hpp = &pci_default_type0;
-
-       if (hpp->revision > 1) {
-               pci_warn(dev, "PCI settings rev %d not supported; using defaults\n",
-                        hpp->revision);
-               hpp = &pci_default_type0;
-       }
-
-       pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, hpp->cache_line_size);
-       pci_write_config_byte(dev, PCI_LATENCY_TIMER, hpp->latency_timer);
-       pci_read_config_word(dev, PCI_COMMAND, &pci_cmd);
-       if (hpp->enable_serr)
-               pci_cmd |= PCI_COMMAND_SERR;
-       if (hpp->enable_perr)
-               pci_cmd |= PCI_COMMAND_PARITY;
-       pci_write_config_word(dev, PCI_COMMAND, pci_cmd);
-
-       /* Program bridge control value */
-       if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
-               pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER,
-                                     hpp->latency_timer);
-               pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &pci_bctl);
-               if (hpp->enable_perr)
-                       pci_bctl |= PCI_BRIDGE_CTL_PARITY;
-               pci_write_config_word(dev, PCI_BRIDGE_CONTROL, pci_bctl);
-       }
-}
-
-static void program_hpp_type1(struct pci_dev *dev, struct hpp_type1 *hpp)
-{
-       int pos;
-
-       if (!hpp)
-               return;
-
-       pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
-       if (!pos)
-               return;
-
-       pci_warn(dev, "PCI-X settings not supported\n");
-}
-
-static bool pcie_root_rcb_set(struct pci_dev *dev)
-{
-       struct pci_dev *rp = pcie_find_root_port(dev);
-       u16 lnkctl;
-
-       if (!rp)
-               return false;
-
-       pcie_capability_read_word(rp, PCI_EXP_LNKCTL, &lnkctl);
-       if (lnkctl & PCI_EXP_LNKCTL_RCB)
-               return true;
-
-       return false;
-}
-
-static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
-{
-       int pos;
-       u32 reg32;
-
-       if (!hpp)
-               return;
-
-       if (!pci_is_pcie(dev))
-               return;
-
-       if (hpp->revision > 1) {
-               pci_warn(dev, "PCIe settings rev %d not supported\n",
-                        hpp->revision);
-               return;
-       }
-
-       /*
-        * Don't allow _HPX to change MPS or MRRS settings.  We manage
-        * those to make sure they're consistent with the rest of the
-        * platform.
-        */
-       hpp->pci_exp_devctl_and |= PCI_EXP_DEVCTL_PAYLOAD |
-                                   PCI_EXP_DEVCTL_READRQ;
-       hpp->pci_exp_devctl_or &= ~(PCI_EXP_DEVCTL_PAYLOAD |
-                                   PCI_EXP_DEVCTL_READRQ);
-
-       /* Initialize Device Control Register */
-       pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
-                       ~hpp->pci_exp_devctl_and, hpp->pci_exp_devctl_or);
-
-       /* Initialize Link Control Register */
-       if (pcie_cap_has_lnkctl(dev)) {
-
-               /*
-                * If the Root Port supports Read Completion Boundary of
-                * 128, set RCB to 128.  Otherwise, clear it.
-                */
-               hpp->pci_exp_lnkctl_and |= PCI_EXP_LNKCTL_RCB;
-               hpp->pci_exp_lnkctl_or &= ~PCI_EXP_LNKCTL_RCB;
-               if (pcie_root_rcb_set(dev))
-                       hpp->pci_exp_lnkctl_or |= PCI_EXP_LNKCTL_RCB;
-
-               pcie_capability_clear_and_set_word(dev, PCI_EXP_LNKCTL,
-                       ~hpp->pci_exp_lnkctl_and, hpp->pci_exp_lnkctl_or);
-       }
-
-       /* Find Advanced Error Reporting Enhanced Capability */
-       pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
-       if (!pos)
-               return;
-
-       /* Initialize Uncorrectable Error Mask Register */
-       pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &reg32);
-       reg32 = (reg32 & hpp->unc_err_mask_and) | hpp->unc_err_mask_or;
-       pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, reg32);
-
-       /* Initialize Uncorrectable Error Severity Register */
-       pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &reg32);
-       reg32 = (reg32 & hpp->unc_err_sever_and) | hpp->unc_err_sever_or;
-       pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, reg32);
-
-       /* Initialize Correctable Error Mask Register */
-       pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &reg32);
-       reg32 = (reg32 & hpp->cor_err_mask_and) | hpp->cor_err_mask_or;
-       pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg32);
-
-       /* Initialize Advanced Error Capabilities and Control Register */
-       pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
-       reg32 = (reg32 & hpp->adv_err_cap_and) | hpp->adv_err_cap_or;
-
-       /* Don't enable ECRC generation or checking if unsupported */
-       if (!(reg32 & PCI_ERR_CAP_ECRC_GENC))
-               reg32 &= ~PCI_ERR_CAP_ECRC_GENE;
-       if (!(reg32 & PCI_ERR_CAP_ECRC_CHKC))
-               reg32 &= ~PCI_ERR_CAP_ECRC_CHKE;
-       pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
-
-       /*
-        * FIXME: The following two registers are not supported yet.
-        *
-        *   o Secondary Uncorrectable Error Severity Register
-        *   o Secondary Uncorrectable Error Mask Register
-        */
-}
-
-static u16 hpx3_device_type(struct pci_dev *dev)
-{
-       u16 pcie_type = pci_pcie_type(dev);
-       const int pcie_to_hpx3_type[] = {
-               [PCI_EXP_TYPE_ENDPOINT]    = HPX_TYPE_ENDPOINT,
-               [PCI_EXP_TYPE_LEG_END]     = HPX_TYPE_LEG_END,
-               [PCI_EXP_TYPE_RC_END]      = HPX_TYPE_RC_END,
-               [PCI_EXP_TYPE_RC_EC]       = HPX_TYPE_RC_EC,
-               [PCI_EXP_TYPE_ROOT_PORT]   = HPX_TYPE_ROOT_PORT,
-               [PCI_EXP_TYPE_UPSTREAM]    = HPX_TYPE_UPSTREAM,
-               [PCI_EXP_TYPE_DOWNSTREAM]  = HPX_TYPE_DOWNSTREAM,
-               [PCI_EXP_TYPE_PCI_BRIDGE]  = HPX_TYPE_PCI_BRIDGE,
-               [PCI_EXP_TYPE_PCIE_BRIDGE] = HPX_TYPE_PCIE_BRIDGE,
-       };
-
-       if (pcie_type >= ARRAY_SIZE(pcie_to_hpx3_type))
-               return 0;
-
-       return pcie_to_hpx3_type[pcie_type];
-}
-
-static u8 hpx3_function_type(struct pci_dev *dev)
-{
-       if (dev->is_virtfn)
-               return HPX_FN_SRIOV_VIRT;
-       else if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV) > 0)
-               return HPX_FN_SRIOV_PHYS;
-       else
-               return HPX_FN_NORMAL;
-}
-
-static bool hpx3_cap_ver_matches(u8 pcie_cap_id, u8 hpx3_cap_id)
-{
-       u8 cap_ver = hpx3_cap_id & 0xf;
-
-       if ((hpx3_cap_id & BIT(4)) && cap_ver >= pcie_cap_id)
-               return true;
-       else if (cap_ver == pcie_cap_id)
-               return true;
-
-       return false;
-}
-
-static void program_hpx_type3_register(struct pci_dev *dev,
-                                      const struct hpx_type3 *reg)
-{
-       u32 match_reg, write_reg, header, orig_value;
-       u16 pos;
-
-       if (!(hpx3_device_type(dev) & reg->device_type))
-               return;
-
-       if (!(hpx3_function_type(dev) & reg->function_type))
-               return;
-
-       switch (reg->config_space_location) {
-       case HPX_CFG_PCICFG:
-               pos = 0;
-               break;
-       case HPX_CFG_PCIE_CAP:
-               pos = pci_find_capability(dev, reg->pci_exp_cap_id);
-               if (pos == 0)
-                       return;
-
-               break;
-       case HPX_CFG_PCIE_CAP_EXT:
-               pos = pci_find_ext_capability(dev, reg->pci_exp_cap_id);
-               if (pos == 0)
-                       return;
-
-               pci_read_config_dword(dev, pos, &header);
-               if (!hpx3_cap_ver_matches(PCI_EXT_CAP_VER(header),
-                                         reg->pci_exp_cap_ver))
-                       return;
-
-               break;
-       case HPX_CFG_VEND_CAP:  /* Fall through */
-       case HPX_CFG_DVSEC:     /* Fall through */
-       default:
-               pci_warn(dev, "Encountered _HPX type 3 with unsupported config space location");
-               return;
-       }
-
-       pci_read_config_dword(dev, pos + reg->match_offset, &match_reg);
-
-       if ((match_reg & reg->match_mask_and) != reg->match_value)
-               return;
-
-       pci_read_config_dword(dev, pos + reg->reg_offset, &write_reg);
-       orig_value = write_reg;
-       write_reg &= reg->reg_mask_and;
-       write_reg |= reg->reg_mask_or;
-
-       if (orig_value == write_reg)
-               return;
-
-       pci_write_config_dword(dev, pos + reg->reg_offset, write_reg);
-
-       pci_dbg(dev, "Applied _HPX3 at [0x%x]: 0x%08x -> 0x%08x",
-               pos, orig_value, write_reg);
-}
-
-static void program_hpx_type3(struct pci_dev *dev, struct hpx_type3 *hpx3)
-{
-       if (!hpx3)
-               return;
-
-       if (!pci_is_pcie(dev))
-               return;
-
-       program_hpx_type3_register(dev, hpx3);
-}
-
 int pci_configure_extended_tags(struct pci_dev *dev, void *ign)
 {
        struct pci_host_bridge *host;
@@ -2364,13 +2107,6 @@ static void pci_configure_serr(struct pci_dev *dev)
 
 static void pci_configure_device(struct pci_dev *dev)
 {
-       static const struct hotplug_program_ops hp_ops = {
-               .program_type0 = program_hpp_type0,
-               .program_type1 = program_hpp_type1,
-               .program_type2 = program_hpp_type2,
-               .program_type3 = program_hpx_type3,
-       };
-
        pci_configure_mps(dev);
        pci_configure_extended_tags(dev, NULL);
        pci_configure_relaxed_ordering(dev);
@@ -2378,7 +2114,7 @@ static void pci_configure_device(struct pci_dev *dev)
        pci_configure_eetlp_prefix(dev);
        pci_configure_serr(dev);
 
-       pci_acpi_program_hp_params(dev, &hp_ops);
+       pci_acpi_program_hp_params(dev);
 }
 
 static void pci_release_capabilities(struct pci_dev *dev)
@@ -2759,12 +2495,8 @@ static int only_one_child(struct pci_bus *bus)
         * A PCIe Downstream Port normally leads to a Link with only Device
         * 0 on it (PCIe spec r3.1, sec 7.3.1).  As an optimization, scan
         * only for Device 0 in that situation.
-        *
-        * Checking has_secondary_link is a hack to identify Downstream
-        * Ports because sometimes Switches are configured such that the
-        * PCIe Port Type labels are backwards.
         */
-       if (bridge && pci_is_pcie(bridge) && bridge->has_secondary_link)
+       if (bridge && pci_is_pcie(bridge) && pcie_downstream_port(bridge))
                return 1;
 
        return 0;
index fe7fe67..5495537 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/seq_file.h>
 #include <linux/capability.h>
 #include <linux/uaccess.h>
+#include <linux/security.h>
 #include <asm/byteorder.h>
 #include "pci.h"
 
@@ -115,7 +116,11 @@ static ssize_t proc_bus_pci_write(struct file *file, const char __user *buf,
        struct pci_dev *dev = PDE_DATA(ino);
        int pos = *ppos;
        int size = dev->cfg_size;
-       int cnt;
+       int cnt, ret;
+
+       ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+       if (ret)
+               return ret;
 
        if (pos >= size)
                return 0;
@@ -196,6 +201,10 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd,
 #endif /* HAVE_PCI_MMAP */
        int ret = 0;
 
+       ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+       if (ret)
+               return ret;
+
        switch (cmd) {
        case PCIIOC_CONTROLLER:
                ret = pci_domain_nr(dev->bus);
@@ -238,7 +247,8 @@ static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
        struct pci_filp_private *fpriv = file->private_data;
        int i, ret, write_combine = 0, res_bit = IORESOURCE_MEM;
 
-       if (!capable(CAP_SYS_RAWIO))
+       if (!capable(CAP_SYS_RAWIO) ||
+           security_locked_down(LOCKDOWN_PCI_ACCESS))
                return -EPERM;
 
        if (fpriv->mmap_state == pci_mmap_io) {
index 44c4ae1..320255e 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/delay.h>
 #include <linux/acpi.h>
 #include <linux/dmi.h>
-#include <linux/pci-aspm.h>
 #include <linux/ioport.h>
 #include <linux/sched.h>
 #include <linux/ktime.h>
@@ -2592,6 +2591,59 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
                        PCI_DEVICE_ID_NVIDIA_NVENET_15,
                        nvenet_msi_disable);
 
+/*
+ * PCIe spec r4.0 sec 7.7.1.2 and sec 7.7.2.2 say that if MSI/MSI-X is enabled,
+ * then the device can't use INTx interrupts. Tegra's PCIe root ports don't
+ * generate MSI interrupts for PME and AER events instead only INTx interrupts
+ * are generated. Though Tegra's PCIe root ports can generate MSI interrupts
+ * for other events, since PCIe specificiation doesn't support using a mix of
+ * INTx and MSI/MSI-X, it is required to disable MSI interrupts to avoid port
+ * service drivers registering their respective ISRs for MSIs.
+ */
+static void pci_quirk_nvidia_tegra_disable_rp_msi(struct pci_dev *dev)
+{
+       dev->no_msi = 1;
+}
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x1ad0,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x1ad1,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x1ad2,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf0,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf1,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1c,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1d,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e12,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e13,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0fae,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0faf,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x10e5,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x10e6,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+
 /*
  * Some versions of the MCP55 bridge from Nvidia have a legacy IRQ routing
  * config register.  This register controls the routing of legacy
@@ -2925,6 +2977,24 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0x10a1,
                        quirk_msi_intx_disable_qca_bug);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0xe091,
                        quirk_msi_intx_disable_qca_bug);
+
+/*
+ * Amazon's Annapurna Labs 1c36:0031 Root Ports don't support MSI-X, so it
+ * should be disabled on platforms where the device (mistakenly) advertises it.
+ *
+ * Notice that this quirk also disables MSI (which may work, but hasn't been
+ * tested), since currently there is no standard way to disable only MSI-X.
+ *
+ * The 0031 device id is reused for other non Root Port device types,
+ * therefore the quirk is registered for the PCI_CLASS_BRIDGE_PCI class.
+ */
+static void quirk_al_msi_disable(struct pci_dev *dev)
+{
+       dev->no_msi = 1;
+       pci_warn(dev, "Disabling MSI/MSI-X\n");
+}
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031,
+                             PCI_CLASS_BRIDGE_PCI, 8, quirk_al_msi_disable);
 #endif /* CONFIG_PCI_MSI */
 
 /*
@@ -4366,6 +4436,24 @@ static int pci_quirk_qcom_rp_acs(struct pci_dev *dev, u16 acs_flags)
        return ret;
 }
 
+static int pci_quirk_al_acs(struct pci_dev *dev, u16 acs_flags)
+{
+       if (pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT)
+               return -ENOTTY;
+
+       /*
+        * Amazon's Annapurna Labs root ports don't include an ACS capability,
+        * but do include ACS-like functionality. The hardware doesn't support
+        * peer-to-peer transactions via the root port and each has a unique
+        * segment number.
+        *
+        * Additionally, the root ports cannot send traffic to each other.
+        */
+       acs_flags &= ~(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+
+       return acs_flags ? 0 : 1;
+}
+
 /*
  * Sunrise Point PCH root ports implement ACS, but unfortunately as shown in
  * the datasheet (Intel 100 Series Chipset Family PCH Datasheet, Vol. 2,
@@ -4466,6 +4554,19 @@ static int pci_quirk_mf_endpoint_acs(struct pci_dev *dev, u16 acs_flags)
        return acs_flags ? 0 : 1;
 }
 
+static int pci_quirk_brcm_acs(struct pci_dev *dev, u16 acs_flags)
+{
+       /*
+        * iProc PAXB Root Ports don't advertise an ACS capability, but
+        * they do not allow peer-to-peer transactions between Root Ports.
+        * Allow each Root Port to be in a separate IOMMU group by masking
+        * SV/RR/CR/UF bits.
+        */
+       acs_flags &= ~(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+
+       return acs_flags ? 0 : 1;
+}
+
 static const struct pci_dev_acs_enabled {
        u16 vendor;
        u16 device;
@@ -4559,6 +4660,9 @@ static const struct pci_dev_acs_enabled {
        { PCI_VENDOR_ID_AMPERE, 0xE00A, pci_quirk_xgene_acs },
        { PCI_VENDOR_ID_AMPERE, 0xE00B, pci_quirk_xgene_acs },
        { PCI_VENDOR_ID_AMPERE, 0xE00C, pci_quirk_xgene_acs },
+       { PCI_VENDOR_ID_BROADCOM, 0xD714, pci_quirk_brcm_acs },
+       /* Amazon Annapurna Labs */
+       { PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031, pci_quirk_al_acs },
        { 0 }
 };
 
index 7f4e658..bade140 100644 (file)
@@ -15,7 +15,6 @@
 #include "pci.h"
 
 DECLARE_RWSEM(pci_bus_sem);
-EXPORT_SYMBOL_GPL(pci_bus_sem);
 
 /*
  * pci_for_each_dma_alias - Iterate over DMA aliases for a device
index 79b1fa6..e7dbe21 100644 (file)
@@ -1662,8 +1662,8 @@ static int iov_resources_unassigned(struct pci_dev *dev, void *data)
        int i;
        bool *unassigned = data;
 
-       for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++) {
-               struct resource *r = &dev->resource[i];
+       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+               struct resource *r = &dev->resource[i + PCI_IOV_RESOURCES];
                struct pci_bus_region region;
 
                /* Not assigned or rejected by kernel? */
index d96626c..31e3955 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/errno.h>
 #include <linux/pci.h>
+#include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include "pci.h"
@@ -90,7 +91,8 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn,
        u32 dword;
        int err = 0;
 
-       if (!capable(CAP_SYS_ADMIN))
+       if (!capable(CAP_SYS_ADMIN) ||
+           security_locked_down(LOCKDOWN_PCI_ACCESS))
                return -EPERM;
 
        dev = pci_get_domain_bus_and_slot(0, bus, dfn);
index 5acd9c0..5486f87 100644 (file)
@@ -13,6 +13,8 @@
 #include <linux/pci_regs.h>
 #include <linux/types.h>
 
+#include "pci.h"
+
 /**
  * pci_vc_save_restore_dwords - Save or restore a series of dwords
  * @dev: device
@@ -105,7 +107,7 @@ static void pci_vc_enable(struct pci_dev *dev, int pos, int res)
        struct pci_dev *link = NULL;
 
        /* Enable VCs from the downstream device */
-       if (!dev->has_secondary_link)
+       if (!pci_is_pcie(dev) || !pcie_downstream_port(dev))
                return;
 
        ctrl_pos = pos + PCI_VC_RES_CTRL + (res * PCI_CAP_VC_PER_VC_SIZEOF);
@@ -409,7 +411,6 @@ void pci_restore_vc_state(struct pci_dev *dev)
  * For each type of VC capability, VC/VC9/MFVC, find the capability, size
  * it, and allocate a buffer for save/restore.
  */
-
 void pci_allocate_vc_save_buffers(struct pci_dev *dev)
 {
        int i;
index 4963c2e..7915d10 100644 (file)
@@ -571,6 +571,12 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, quirk_blacklist_vpd);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, PCI_ANY_ID,
                quirk_blacklist_vpd);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_QLOGIC, 0x2261, quirk_blacklist_vpd);
+/*
+ * The Amazon Annapurna Labs 0x0031 device id is reused for other non Root Port
+ * device types, so the quirk is registered for the PCI_CLASS_BRIDGE_PCI class.
+ */
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031,
+                             PCI_CLASS_BRIDGE_PCI, 8, quirk_blacklist_vpd);
 
 /*
  * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the
index abd0299..629359f 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/pci.h>
 #include <linux/ioport.h>
 #include <linux/io.h>
+#include <linux/security.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
@@ -1575,6 +1576,10 @@ static ssize_t pccard_store_cis(struct file *filp, struct kobject *kobj,
        struct pcmcia_socket *s;
        int error;
 
+       error = security_locked_down(LOCKDOWN_PCMCIA_CIS);
+       if (error)
+               return error;
+
        s = to_socket(container_of(kobj, struct device, kobj));
 
        if (off)
index e516967..f9817c3 100644 (file)
@@ -7,3 +7,10 @@ config PHY_TEGRA_XUSB
 
          To compile this driver as a module, choose M here: the module will
          be called phy-tegra-xusb.
+
+config PHY_TEGRA194_P2U
+       tristate "NVIDIA Tegra194 PIPE2UPHY PHY driver"
+       depends on ARCH_TEGRA_194_SOC || COMPILE_TEST
+       select GENERIC_PHY
+       help
+         Enable this to support the P2U (PIPE to UPHY) that is part of Tegra 19x SOCs.
index 64ccaea..320dd38 100644 (file)
@@ -6,3 +6,4 @@ phy-tegra-xusb-$(CONFIG_ARCH_TEGRA_124_SOC) += xusb-tegra124.o
 phy-tegra-xusb-$(CONFIG_ARCH_TEGRA_132_SOC) += xusb-tegra124.o
 phy-tegra-xusb-$(CONFIG_ARCH_TEGRA_210_SOC) += xusb-tegra210.o
 phy-tegra-xusb-$(CONFIG_ARCH_TEGRA_186_SOC) += xusb-tegra186.o
+obj-$(CONFIG_PHY_TEGRA194_P2U) += phy-tegra194-p2u.o
diff --git a/drivers/phy/tegra/phy-tegra194-p2u.c b/drivers/phy/tegra/phy-tegra194-p2u.c
new file mode 100644 (file)
index 0000000..7042bed
--- /dev/null
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * P2U (PIPE to UPHY) driver for Tegra T194 SoC
+ *
+ * Copyright (C) 2019 NVIDIA Corporation.
+ *
+ * Author: Vidya Sagar <vidyas@nvidia.com>
+ */
+
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/phy/phy.h>
+
+#define P2U_PERIODIC_EQ_CTRL_GEN3      0xc0
+#define P2U_PERIODIC_EQ_CTRL_GEN3_PERIODIC_EQ_EN               BIT(0)
+#define P2U_PERIODIC_EQ_CTRL_GEN3_INIT_PRESET_EQ_TRAIN_EN      BIT(1)
+#define P2U_PERIODIC_EQ_CTRL_GEN4      0xc4
+#define P2U_PERIODIC_EQ_CTRL_GEN4_INIT_PRESET_EQ_TRAIN_EN      BIT(1)
+
+#define P2U_RX_DEBOUNCE_TIME                           0xa4
+#define P2U_RX_DEBOUNCE_TIME_DEBOUNCE_TIMER_MASK       0xffff
+#define P2U_RX_DEBOUNCE_TIME_DEBOUNCE_TIMER_VAL                160
+
+struct tegra_p2u {
+       void __iomem *base;
+};
+
+static inline void p2u_writel(struct tegra_p2u *phy, const u32 value,
+                             const u32 reg)
+{
+       writel_relaxed(value, phy->base + reg);
+}
+
+static inline u32 p2u_readl(struct tegra_p2u *phy, const u32 reg)
+{
+       return readl_relaxed(phy->base + reg);
+}
+
+static int tegra_p2u_power_on(struct phy *x)
+{
+       struct tegra_p2u *phy = phy_get_drvdata(x);
+       u32 val;
+
+       val = p2u_readl(phy, P2U_PERIODIC_EQ_CTRL_GEN3);
+       val &= ~P2U_PERIODIC_EQ_CTRL_GEN3_PERIODIC_EQ_EN;
+       val |= P2U_PERIODIC_EQ_CTRL_GEN3_INIT_PRESET_EQ_TRAIN_EN;
+       p2u_writel(phy, val, P2U_PERIODIC_EQ_CTRL_GEN3);
+
+       val = p2u_readl(phy, P2U_PERIODIC_EQ_CTRL_GEN4);
+       val |= P2U_PERIODIC_EQ_CTRL_GEN4_INIT_PRESET_EQ_TRAIN_EN;
+       p2u_writel(phy, val, P2U_PERIODIC_EQ_CTRL_GEN4);
+
+       val = p2u_readl(phy, P2U_RX_DEBOUNCE_TIME);
+       val &= ~P2U_RX_DEBOUNCE_TIME_DEBOUNCE_TIMER_MASK;
+       val |= P2U_RX_DEBOUNCE_TIME_DEBOUNCE_TIMER_VAL;
+       p2u_writel(phy, val, P2U_RX_DEBOUNCE_TIME);
+
+       return 0;
+}
+
+static const struct phy_ops ops = {
+       .power_on = tegra_p2u_power_on,
+       .owner = THIS_MODULE,
+};
+
+static int tegra_p2u_probe(struct platform_device *pdev)
+{
+       struct phy_provider *phy_provider;
+       struct device *dev = &pdev->dev;
+       struct phy *generic_phy;
+       struct tegra_p2u *phy;
+       struct resource *res;
+
+       phy = devm_kzalloc(dev, sizeof(*phy), GFP_KERNEL);
+       if (!phy)
+               return -ENOMEM;
+
+       res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ctl");
+       phy->base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(phy->base))
+               return PTR_ERR(phy->base);
+
+       platform_set_drvdata(pdev, phy);
+
+       generic_phy = devm_phy_create(dev, NULL, &ops);
+       if (IS_ERR(generic_phy))
+               return PTR_ERR(generic_phy);
+
+       phy_set_drvdata(generic_phy, phy);
+
+       phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+       if (IS_ERR(phy_provider))
+               return PTR_ERR(phy_provider);
+
+       return 0;
+}
+
+static const struct of_device_id tegra_p2u_id_table[] = {
+       {
+               .compatible = "nvidia,tegra194-p2u",
+       },
+       {}
+};
+MODULE_DEVICE_TABLE(of, tegra_p2u_id_table);
+
+static struct platform_driver tegra_p2u_driver = {
+       .probe = tegra_p2u_probe,
+       .driver = {
+               .name = "tegra194-p2u",
+               .of_match_table = tegra_p2u_id_table,
+       },
+};
+module_platform_driver(tegra_p2u_driver);
+
+MODULE_AUTHOR("Vidya Sagar <vidyas@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA Tegra194 PIPE2UPHY PHY driver");
+MODULE_LICENSE("GPL v2");
index 648ddb7..c6800d2 100644 (file)
@@ -87,7 +87,7 @@ FUNC_GROUP_DECL(MACLINK3, L23);
 
 #define K25 7
 SIG_EXPR_LIST_DECL_SESG(K25, MACLINK4, MACLINK4, SIG_DESC_SET(SCU410, 7));
-SIG_EXPR_LIST_DECL_SESG(K25, SDA14, SDA14, SIG_DESC_SET(SCU4B0, 7));
+SIG_EXPR_LIST_DECL_SESG(K25, SDA14, I2C14, SIG_DESC_SET(SCU4B0, 7));
 PIN_DECL_2(K25, GPIOA7, MACLINK4, SDA14);
 FUNC_GROUP_DECL(MACLINK4, K25);
 
@@ -1262,13 +1262,13 @@ GROUP_DECL(SPI1, AB11, AC11, AA11);
 #define AD11 206
 SIG_EXPR_LIST_DECL_SEMG(AD11, SPI1DQ2, QSPI1, SPI1, SIG_DESC_SET(SCU438, 14));
 SIG_EXPR_LIST_DECL_SEMG(AD11, TXD13, UART13G1, UART13,
-                       SIG_DESC_SET(SCU438, 14));
+                       SIG_DESC_CLEAR(SCU4B8, 2), SIG_DESC_SET(SCU4D8, 14));
 PIN_DECL_2(AD11, GPIOZ6, SPI1DQ2, TXD13);
 
 #define AF10 207
 SIG_EXPR_LIST_DECL_SEMG(AF10, SPI1DQ3, QSPI1, SPI1, SIG_DESC_SET(SCU438, 15));
 SIG_EXPR_LIST_DECL_SEMG(AF10, RXD13, UART13G1, UART13,
-                       SIG_DESC_SET(SCU438, 15));
+                       SIG_DESC_CLEAR(SCU4B8, 3), SIG_DESC_SET(SCU4D8, 15));
 PIN_DECL_2(AF10, GPIOZ7, SPI1DQ3, RXD13);
 
 GROUP_DECL(QSPI1, AB11, AC11, AA11, AD11, AF10);
@@ -1440,91 +1440,85 @@ FUNC_GROUP_DECL(RGMII2, D4, C2, C1, D3, E4, F5, D2, E3, D1, F4, E2, E1);
 FUNC_GROUP_DECL(RMII2, D4, C2, C1, D3, D2, D1, F4, E2, E1);
 
 #define AB4 232
-SIG_EXPR_LIST_DECL_SESG(AB4, SD3CLK, SD3, SIG_DESC_SET(SCU400, 24));
-PIN_DECL_1(AB4, GPIO18D0, SD3CLK);
+SIG_EXPR_LIST_DECL_SEMG(AB4, EMMCCLK, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 24));
+PIN_DECL_1(AB4, GPIO18D0, EMMCCLK);
 
 #define AA4 233
-SIG_EXPR_LIST_DECL_SESG(AA4, SD3CMD, SD3, SIG_DESC_SET(SCU400, 25));
-PIN_DECL_1(AA4, GPIO18D1, SD3CMD);
+SIG_EXPR_LIST_DECL_SEMG(AA4, EMMCCMD, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 25));
+PIN_DECL_1(AA4, GPIO18D1, EMMCCMD);
 
 #define AC4 234
-SIG_EXPR_LIST_DECL_SESG(AC4, SD3DAT0, SD3, SIG_DESC_SET(SCU400, 26));
-PIN_DECL_1(AC4, GPIO18D2, SD3DAT0);
+SIG_EXPR_LIST_DECL_SEMG(AC4, EMMCDAT0, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 26));
+PIN_DECL_1(AC4, GPIO18D2, EMMCDAT0);
 
 #define AA5 235
-SIG_EXPR_LIST_DECL_SESG(AA5, SD3DAT1, SD3, SIG_DESC_SET(SCU400, 27));
-PIN_DECL_1(AA5, GPIO18D3, SD3DAT1);
+SIG_EXPR_LIST_DECL_SEMG(AA5, EMMCDAT1, EMMCG4, EMMC, SIG_DESC_SET(SCU400, 27));
+PIN_DECL_1(AA5, GPIO18D3, EMMCDAT1);
 
 #define Y5 236
-SIG_EXPR_LIST_DECL_SESG(Y5, SD3DAT2, SD3, SIG_DESC_SET(SCU400, 28));
-PIN_DECL_1(Y5, GPIO18D4, SD3DAT2);
+SIG_EXPR_LIST_DECL_SEMG(Y5, EMMCDAT2, EMMCG4, EMMC, SIG_DESC_SET(SCU400, 28));
+PIN_DECL_1(Y5, GPIO18D4, EMMCDAT2);
 
 #define AB5 237
-SIG_EXPR_LIST_DECL_SESG(AB5, SD3DAT3, SD3, SIG_DESC_SET(SCU400, 29));
-PIN_DECL_1(AB5, GPIO18D5, SD3DAT3);
+SIG_EXPR_LIST_DECL_SEMG(AB5, EMMCDAT3, EMMCG4, EMMC, SIG_DESC_SET(SCU400, 29));
+PIN_DECL_1(AB5, GPIO18D5, EMMCDAT3);
 
 #define AB6 238
-SIG_EXPR_LIST_DECL_SESG(AB6, SD3CD, SD3, SIG_DESC_SET(SCU400, 30));
-PIN_DECL_1(AB6, GPIO18D6, SD3CD);
+SIG_EXPR_LIST_DECL_SEMG(AB6, EMMCCD, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 30));
+PIN_DECL_1(AB6, GPIO18D6, EMMCCD);
 
 #define AC5 239
-SIG_EXPR_LIST_DECL_SESG(AC5, SD3WP, SD3, SIG_DESC_SET(SCU400, 31));
-PIN_DECL_1(AC5, GPIO18D7, SD3WP);
+SIG_EXPR_LIST_DECL_SEMG(AC5, EMMCWP, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 31));
+PIN_DECL_1(AC5, GPIO18D7, EMMCWP);
 
-FUNC_GROUP_DECL(SD3, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5);
+GROUP_DECL(EMMCG1, AB4, AA4, AC4, AB6, AC5);
+GROUP_DECL(EMMCG4, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5);
 
 #define Y1 240
 SIG_EXPR_LIST_DECL_SEMG(Y1, FWSPIDCS, FWSPID, FWSPID, SIG_DESC_SET(SCU500, 3));
 SIG_EXPR_LIST_DECL_SESG(Y1, VBCS, VB, SIG_DESC_SET(SCU500, 5));
-SIG_EXPR_LIST_DECL_SESG(Y1, SD3DAT4, SD3DAT4, SIG_DESC_SET(SCU404, 0));
-PIN_DECL_3(Y1, GPIO18E0, FWSPIDCS, VBCS, SD3DAT4);
-FUNC_GROUP_DECL(SD3DAT4, Y1);
+SIG_EXPR_LIST_DECL_SEMG(Y1, EMMCDAT4, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 0));
+PIN_DECL_3(Y1, GPIO18E0, FWSPIDCS, VBCS, EMMCDAT4);
 
 #define Y2 241
 SIG_EXPR_LIST_DECL_SEMG(Y2, FWSPIDCK, FWSPID, FWSPID, SIG_DESC_SET(SCU500, 3));
 SIG_EXPR_LIST_DECL_SESG(Y2, VBCK, VB, SIG_DESC_SET(SCU500, 5));
-SIG_EXPR_LIST_DECL_SESG(Y2, SD3DAT5, SD3DAT5, SIG_DESC_SET(SCU404, 1));
-PIN_DECL_3(Y2, GPIO18E1, FWSPIDCK, VBCK, SD3DAT5);
-FUNC_GROUP_DECL(SD3DAT5, Y2);
+SIG_EXPR_LIST_DECL_SEMG(Y2, EMMCDAT5, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 1));
+PIN_DECL_3(Y2, GPIO18E1, FWSPIDCK, VBCK, EMMCDAT5);
 
 #define Y3 242
 SIG_EXPR_LIST_DECL_SEMG(Y3, FWSPIDMOSI, FWSPID, FWSPID,
                        SIG_DESC_SET(SCU500, 3));
 SIG_EXPR_LIST_DECL_SESG(Y3, VBMOSI, VB, SIG_DESC_SET(SCU500, 5));
-SIG_EXPR_LIST_DECL_SESG(Y3, SD3DAT6, SD3DAT6, SIG_DESC_SET(SCU404, 2));
-PIN_DECL_3(Y3, GPIO18E2, FWSPIDMOSI, VBMOSI, SD3DAT6);
-FUNC_GROUP_DECL(SD3DAT6, Y3);
+SIG_EXPR_LIST_DECL_SEMG(Y3, EMMCDAT6, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 2));
+PIN_DECL_3(Y3, GPIO18E2, FWSPIDMOSI, VBMOSI, EMMCDAT6);
 
 #define Y4 243
 SIG_EXPR_LIST_DECL_SEMG(Y4, FWSPIDMISO, FWSPID, FWSPID,
                        SIG_DESC_SET(SCU500, 3));
 SIG_EXPR_LIST_DECL_SESG(Y4, VBMISO, VB, SIG_DESC_SET(SCU500, 5));
-SIG_EXPR_LIST_DECL_SESG(Y4, SD3DAT7, SD3DAT7, SIG_DESC_SET(SCU404, 3));
-PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, SD3DAT7);
-FUNC_GROUP_DECL(SD3DAT7, Y4);
+SIG_EXPR_LIST_DECL_SEMG(Y4, EMMCDAT7, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 3));
+PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, EMMCDAT7);
 
 GROUP_DECL(FWSPID, Y1, Y2, Y3, Y4);
 GROUP_DECL(FWQSPID, Y1, Y2, Y3, Y4, AE12, AF12);
+GROUP_DECL(EMMCG8, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5, Y1, Y2, Y3, Y4);
 FUNC_DECL_2(FWSPID, FWSPID, FWQSPID);
 FUNC_GROUP_DECL(VB, Y1, Y2, Y3, Y4);
-
+FUNC_DECL_3(EMMC, EMMCG1, EMMCG4, EMMCG8);
 /*
  * FIXME: Confirm bits and priorities are the right way around for the
  * following 4 pins
  */
 #define AF25 244
-SIG_EXPR_LIST_DECL_SEMG(AF25, I3C3SCL, I3C3, I3C3, SIG_DESC_SET(SCU438, 20),
-                       SIG_DESC_SET(SCU4D8, 20));
-SIG_EXPR_LIST_DECL_SESG(AF25, FSI1CLK, FSI1, SIG_DESC_CLEAR(SCU438, 20),
-                       SIG_DESC_SET(SCU4D8, 20));
+SIG_EXPR_LIST_DECL_SEMG(AF25, I3C3SCL, I3C3, I3C3, SIG_DESC_SET(SCU438, 20));
+SIG_EXPR_LIST_DECL_SESG(AF25, FSI1CLK, FSI1, SIG_DESC_SET(SCU4D8, 20));
 PIN_DECL_(AF25, SIG_EXPR_LIST_PTR(AF25, I3C3SCL),
          SIG_EXPR_LIST_PTR(AF25, FSI1CLK));
 
 #define AE26 245
-SIG_EXPR_LIST_DECL_SEMG(AE26, I3C3SDA, I3C3, I3C3, SIG_DESC_SET(SCU438, 21),
-                       SIG_DESC_SET(SCU4D8, 21));
-SIG_EXPR_LIST_DECL_SESG(AE26, FSI1DATA, FSI1, SIG_DESC_CLEAR(SCU438, 21),
-                       SIG_DESC_SET(SCU4D8, 21));
+SIG_EXPR_LIST_DECL_SEMG(AE26, I3C3SDA, I3C3, I3C3, SIG_DESC_SET(SCU438, 21));
+SIG_EXPR_LIST_DECL_SESG(AE26, FSI1DATA, FSI1, SIG_DESC_SET(SCU4D8, 21));
 PIN_DECL_(AE26, SIG_EXPR_LIST_PTR(AE26, I3C3SDA),
          SIG_EXPR_LIST_PTR(AE26, FSI1DATA));
 
@@ -1533,18 +1527,14 @@ FUNC_DECL_2(I3C3, HVI3C3, I3C3);
 FUNC_GROUP_DECL(FSI1, AF25, AE26);
 
 #define AE25 246
-SIG_EXPR_LIST_DECL_SEMG(AE25, I3C4SCL, I3C4, I3C4, SIG_DESC_SET(SCU438, 22),
-                       SIG_DESC_SET(SCU4D8, 22));
-SIG_EXPR_LIST_DECL_SESG(AE25, FSI2CLK, FSI2, SIG_DESC_CLEAR(SCU438, 22),
-                       SIG_DESC_SET(SCU4D8, 22));
+SIG_EXPR_LIST_DECL_SEMG(AE25, I3C4SCL, I3C4, I3C4, SIG_DESC_SET(SCU438, 22));
+SIG_EXPR_LIST_DECL_SESG(AE25, FSI2CLK, FSI2, SIG_DESC_SET(SCU4D8, 22));
 PIN_DECL_(AE25, SIG_EXPR_LIST_PTR(AE25, I3C4SCL),
          SIG_EXPR_LIST_PTR(AE25, FSI2CLK));
 
 #define AF24 247
-SIG_EXPR_LIST_DECL_SEMG(AF24, I3C4SDA, I3C4, I3C4, SIG_DESC_SET(SCU438, 23),
-                       SIG_DESC_SET(SCU4D8, 23));
-SIG_EXPR_LIST_DECL_SESG(AF24, FSI2DATA, FSI2, SIG_DESC_CLEAR(SCU438, 23),
-                       SIG_DESC_SET(SCU4D8, 23));
+SIG_EXPR_LIST_DECL_SEMG(AF24, I3C4SDA, I3C4, I3C4, SIG_DESC_SET(SCU438, 23));
+SIG_EXPR_LIST_DECL_SESG(AF24, FSI2DATA, FSI2, SIG_DESC_SET(SCU4D8, 23));
 PIN_DECL_(AF24, SIG_EXPR_LIST_PTR(AF24, I3C4SDA),
          SIG_EXPR_LIST_PTR(AF24, FSI2DATA));
 
@@ -1574,6 +1564,8 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = {
        ASPEED_PINCTRL_PIN(A3),
        ASPEED_PINCTRL_PIN(AA11),
        ASPEED_PINCTRL_PIN(AA12),
+       ASPEED_PINCTRL_PIN(AA16),
+       ASPEED_PINCTRL_PIN(AA17),
        ASPEED_PINCTRL_PIN(AA23),
        ASPEED_PINCTRL_PIN(AA24),
        ASPEED_PINCTRL_PIN(AA25),
@@ -1585,6 +1577,8 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = {
        ASPEED_PINCTRL_PIN(AB11),
        ASPEED_PINCTRL_PIN(AB12),
        ASPEED_PINCTRL_PIN(AB15),
+       ASPEED_PINCTRL_PIN(AB16),
+       ASPEED_PINCTRL_PIN(AB17),
        ASPEED_PINCTRL_PIN(AB18),
        ASPEED_PINCTRL_PIN(AB19),
        ASPEED_PINCTRL_PIN(AB22),
@@ -1602,6 +1596,7 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = {
        ASPEED_PINCTRL_PIN(AC11),
        ASPEED_PINCTRL_PIN(AC12),
        ASPEED_PINCTRL_PIN(AC15),
+       ASPEED_PINCTRL_PIN(AC16),
        ASPEED_PINCTRL_PIN(AC17),
        ASPEED_PINCTRL_PIN(AC18),
        ASPEED_PINCTRL_PIN(AC19),
@@ -1619,6 +1614,7 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = {
        ASPEED_PINCTRL_PIN(AD12),
        ASPEED_PINCTRL_PIN(AD14),
        ASPEED_PINCTRL_PIN(AD15),
+       ASPEED_PINCTRL_PIN(AD16),
        ASPEED_PINCTRL_PIN(AD19),
        ASPEED_PINCTRL_PIN(AD20),
        ASPEED_PINCTRL_PIN(AD22),
@@ -1634,8 +1630,11 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = {
        ASPEED_PINCTRL_PIN(AE12),
        ASPEED_PINCTRL_PIN(AE14),
        ASPEED_PINCTRL_PIN(AE15),
+       ASPEED_PINCTRL_PIN(AE16),
        ASPEED_PINCTRL_PIN(AE18),
        ASPEED_PINCTRL_PIN(AE19),
+       ASPEED_PINCTRL_PIN(AE25),
+       ASPEED_PINCTRL_PIN(AE26),
        ASPEED_PINCTRL_PIN(AE7),
        ASPEED_PINCTRL_PIN(AE8),
        ASPEED_PINCTRL_PIN(AF10),
@@ -1643,6 +1642,8 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = {
        ASPEED_PINCTRL_PIN(AF12),
        ASPEED_PINCTRL_PIN(AF14),
        ASPEED_PINCTRL_PIN(AF15),
+       ASPEED_PINCTRL_PIN(AF24),
+       ASPEED_PINCTRL_PIN(AF25),
        ASPEED_PINCTRL_PIN(AF7),
        ASPEED_PINCTRL_PIN(AF8),
        ASPEED_PINCTRL_PIN(AF9),
@@ -1792,17 +1793,6 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = {
        ASPEED_PINCTRL_PIN(Y3),
        ASPEED_PINCTRL_PIN(Y4),
        ASPEED_PINCTRL_PIN(Y5),
-       ASPEED_PINCTRL_PIN(AB16),
-       ASPEED_PINCTRL_PIN(AA17),
-       ASPEED_PINCTRL_PIN(AB17),
-       ASPEED_PINCTRL_PIN(AE16),
-       ASPEED_PINCTRL_PIN(AC16),
-       ASPEED_PINCTRL_PIN(AA16),
-       ASPEED_PINCTRL_PIN(AD16),
-       ASPEED_PINCTRL_PIN(AF25),
-       ASPEED_PINCTRL_PIN(AE26),
-       ASPEED_PINCTRL_PIN(AE25),
-       ASPEED_PINCTRL_PIN(AF24),
 };
 
 static const struct aspeed_pin_group aspeed_g6_groups[] = {
@@ -1976,11 +1966,9 @@ static const struct aspeed_pin_group aspeed_g6_groups[] = {
        ASPEED_PINCTRL_GROUP(SALT9G1),
        ASPEED_PINCTRL_GROUP(SD1),
        ASPEED_PINCTRL_GROUP(SD2),
-       ASPEED_PINCTRL_GROUP(SD3),
-       ASPEED_PINCTRL_GROUP(SD3DAT4),
-       ASPEED_PINCTRL_GROUP(SD3DAT5),
-       ASPEED_PINCTRL_GROUP(SD3DAT6),
-       ASPEED_PINCTRL_GROUP(SD3DAT7),
+       ASPEED_PINCTRL_GROUP(EMMCG1),
+       ASPEED_PINCTRL_GROUP(EMMCG4),
+       ASPEED_PINCTRL_GROUP(EMMCG8),
        ASPEED_PINCTRL_GROUP(SGPM1),
        ASPEED_PINCTRL_GROUP(SGPS1),
        ASPEED_PINCTRL_GROUP(SIOONCTRL),
@@ -2059,6 +2047,7 @@ static const struct aspeed_pin_function aspeed_g6_functions[] = {
        ASPEED_PINCTRL_FUNC(ADC8),
        ASPEED_PINCTRL_FUNC(ADC9),
        ASPEED_PINCTRL_FUNC(BMCINT),
+       ASPEED_PINCTRL_FUNC(EMMC),
        ASPEED_PINCTRL_FUNC(ESPI),
        ASPEED_PINCTRL_FUNC(ESPIALT),
        ASPEED_PINCTRL_FUNC(FSI1),
@@ -2191,11 +2180,6 @@ static const struct aspeed_pin_function aspeed_g6_functions[] = {
        ASPEED_PINCTRL_FUNC(SALT9),
        ASPEED_PINCTRL_FUNC(SD1),
        ASPEED_PINCTRL_FUNC(SD2),
-       ASPEED_PINCTRL_FUNC(SD3),
-       ASPEED_PINCTRL_FUNC(SD3DAT4),
-       ASPEED_PINCTRL_FUNC(SD3DAT5),
-       ASPEED_PINCTRL_FUNC(SD3DAT6),
-       ASPEED_PINCTRL_FUNC(SD3DAT7),
        ASPEED_PINCTRL_FUNC(SGPM1),
        ASPEED_PINCTRL_FUNC(SGPS1),
        ASPEED_PINCTRL_FUNC(SIOONCTRL),
index a2c0d52..140c5ce 100644 (file)
@@ -508,7 +508,7 @@ struct aspeed_pin_desc {
  * @idx: The bit index in the register
  */
 #define SIG_DESC_SET(reg, idx) SIG_DESC_IP_BIT(ASPEED_IP_SCU, reg, idx, 1)
-#define SIG_DESC_CLEAR(reg, idx) SIG_DESC_IP_BIT(ASPEED_IP_SCU, reg, idx, 0)
+#define SIG_DESC_CLEAR(reg, idx) { ASPEED_IP_SCU, reg, BIT_MASK(idx), 0, 0 }
 
 #define SIG_DESC_LIST_SYM(sig, group) sig_descs_ ## sig ## _ ## group
 #define SIG_DESC_LIST_DECL(sig, group, ...) \
@@ -738,6 +738,7 @@ struct aspeed_pin_desc {
        static const char *FUNC_SYM(func)[] = { __VA_ARGS__ }
 
 #define FUNC_DECL_2(func, one, two) FUNC_DECL_(func, #one, #two)
+#define FUNC_DECL_3(func, one, two, three) FUNC_DECL_(func, #one, #two, #three)
 
 #define FUNC_GROUP_DECL(func, ...) \
        GROUP_DECL(func, __VA_ARGS__); \
index 6f7d3a2..42f7ab3 100644 (file)
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2014-2017 Broadcom
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 /*
@@ -853,7 +845,7 @@ static int iproc_gpio_probe(struct platform_device *pdev)
 
        /* optional GPIO interrupt support */
        irq = platform_get_irq(pdev, 0);
-       if (irq) {
+       if (irq > 0) {
                struct irq_chip *irqc;
                struct gpio_irq_chip *girq;
 
index 2bf6af7..9fabc45 100644 (file)
@@ -640,8 +640,8 @@ static int ns2_pinmux_enable(struct pinctrl_dev *pctrl_dev,
        const struct ns2_pin_function *func;
        const struct ns2_pin_group *grp;
 
-       if (grp_select > pinctrl->num_groups ||
-               func_select > pinctrl->num_functions)
+       if (grp_select >= pinctrl->num_groups ||
+               func_select >= pinctrl->num_functions)
                return -EINVAL;
 
        func = &pinctrl->functions[func_select];
index 44f8ccd..9dfdc27 100644 (file)
@@ -43,7 +43,7 @@ static const struct berlin_desc_group as370_soc_pinctrl_groups[] = {
                        BERLIN_PINCTRL_FUNCTION(0x0, "gpio"), /* GPIO5 */
                        BERLIN_PINCTRL_FUNCTION(0x1, "i2s1"), /* DO3 */
                        BERLIN_PINCTRL_FUNCTION(0x2, "pwm"), /* PWM5 */
-                       BERLIN_PINCTRL_FUNCTION(0x3, "spififib"), /* SPDIFIB */
+                       BERLIN_PINCTRL_FUNCTION(0x3, "spdifib"), /* SPDIFIB */
                        BERLIN_PINCTRL_FUNCTION(0x4, "spdifo"), /* SPDIFO */
                        BERLIN_PINCTRL_FUNCTION(0x5, "phy")), /* DBG5 */
        BERLIN_PINCTRL_GROUP("I2S1_MCLK", 0x0, 0x3, 0x12,
index aae51c5..c6251ea 100644 (file)
@@ -1513,7 +1513,6 @@ static const struct dmi_system_id chv_no_valid_mask[] = {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
                        DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_Strago"),
-                       DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
                },
        },
        {
@@ -1521,7 +1520,6 @@ static const struct dmi_system_id chv_no_valid_mask[] = {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "HP"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Setzer"),
-                       DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
                },
        },
        {
@@ -1529,7 +1527,6 @@ static const struct dmi_system_id chv_no_valid_mask[] = {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Cyan"),
-                       DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
                },
        },
        {
@@ -1537,7 +1534,6 @@ static const struct dmi_system_id chv_no_valid_mask[] = {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Celes"),
-                       DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
                },
        },
        {}
index 1f13bcd..bc01359 100644 (file)
@@ -96,6 +96,7 @@ struct intel_pinctrl_context {
  * @pctldesc: Pin controller description
  * @pctldev: Pointer to the pin controller device
  * @chip: GPIO chip in this pin controller
+ * @irqchip: IRQ chip in this pin controller
  * @soc: SoC/PCH specific pin configuration data
  * @communities: All communities in this pin controller
  * @ncommunities: Number of communities in this pin controller
@@ -108,6 +109,7 @@ struct intel_pinctrl {
        struct pinctrl_desc pctldesc;
        struct pinctrl_dev *pctldev;
        struct gpio_chip chip;
+       struct irq_chip irqchip;
        const struct intel_pinctrl_soc_data *soc;
        struct intel_community *communities;
        size_t ncommunities;
@@ -1139,16 +1141,6 @@ static irqreturn_t intel_gpio_irq(int irq, void *data)
        return ret;
 }
 
-static struct irq_chip intel_gpio_irqchip = {
-       .name = "intel-gpio",
-       .irq_ack = intel_gpio_irq_ack,
-       .irq_mask = intel_gpio_irq_mask,
-       .irq_unmask = intel_gpio_irq_unmask,
-       .irq_set_type = intel_gpio_irq_type,
-       .irq_set_wake = intel_gpio_irq_wake,
-       .flags = IRQCHIP_MASK_ON_SUSPEND,
-};
-
 static int intel_gpio_add_pin_ranges(struct intel_pinctrl *pctrl,
                                     const struct intel_community *community)
 {
@@ -1198,12 +1190,22 @@ static int intel_gpio_probe(struct intel_pinctrl *pctrl, int irq)
 
        pctrl->chip = intel_gpio_chip;
 
+       /* Setup GPIO chip */
        pctrl->chip.ngpio = intel_gpio_ngpio(pctrl);
        pctrl->chip.label = dev_name(pctrl->dev);
        pctrl->chip.parent = pctrl->dev;
        pctrl->chip.base = -1;
        pctrl->irq = irq;
 
+       /* Setup IRQ chip */
+       pctrl->irqchip.name = dev_name(pctrl->dev);
+       pctrl->irqchip.irq_ack = intel_gpio_irq_ack;
+       pctrl->irqchip.irq_mask = intel_gpio_irq_mask;
+       pctrl->irqchip.irq_unmask = intel_gpio_irq_unmask;
+       pctrl->irqchip.irq_set_type = intel_gpio_irq_type;
+       pctrl->irqchip.irq_set_wake = intel_gpio_irq_wake;
+       pctrl->irqchip.flags = IRQCHIP_MASK_ON_SUSPEND;
+
        ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl);
        if (ret) {
                dev_err(pctrl->dev, "failed to register gpiochip\n");
@@ -1233,15 +1235,14 @@ static int intel_gpio_probe(struct intel_pinctrl *pctrl, int irq)
                return ret;
        }
 
-       ret = gpiochip_irqchip_add(&pctrl->chip, &intel_gpio_irqchip, 0,
+       ret = gpiochip_irqchip_add(&pctrl->chip, &pctrl->irqchip, 0,
                                   handle_bad_irq, IRQ_TYPE_NONE);
        if (ret) {
                dev_err(pctrl->dev, "failed to add irqchip\n");
                return ret;
        }
 
-       gpiochip_set_chained_irqchip(&pctrl->chip, &intel_gpio_irqchip, irq,
-                                    NULL);
+       gpiochip_set_chained_irqchip(&pctrl->chip, &pctrl->irqchip, irq, NULL);
        return 0;
 }
 
index 6462d3c..f2f5fcd 100644 (file)
@@ -183,10 +183,10 @@ static struct armada_37xx_pin_group armada_37xx_nb_groups[] = {
        PIN_GRP_EXTRA("uart2", 9, 2, BIT(1) | BIT(13) | BIT(14) | BIT(19),
                      BIT(1) | BIT(13) | BIT(14), BIT(1) | BIT(19),
                      18, 2, "gpio", "uart"),
-       PIN_GRP_GPIO("led0_od", 11, 1, BIT(20), "led"),
-       PIN_GRP_GPIO("led1_od", 12, 1, BIT(21), "led"),
-       PIN_GRP_GPIO("led2_od", 13, 1, BIT(22), "led"),
-       PIN_GRP_GPIO("led3_od", 14, 1, BIT(23), "led"),
+       PIN_GRP_GPIO_2("led0_od", 11, 1, BIT(20), BIT(20), 0, "led"),
+       PIN_GRP_GPIO_2("led1_od", 12, 1, BIT(21), BIT(21), 0, "led"),
+       PIN_GRP_GPIO_2("led2_od", 13, 1, BIT(22), BIT(22), 0, "led"),
+       PIN_GRP_GPIO_2("led3_od", 14, 1, BIT(23), BIT(23), 0, "led"),
 
 };
 
@@ -221,11 +221,11 @@ static const struct armada_37xx_pin_data armada_37xx_pin_sb = {
 };
 
 static inline void armada_37xx_update_reg(unsigned int *reg,
-                                         unsigned int offset)
+                                         unsigned int *offset)
 {
        /* We never have more than 2 registers */
-       if (offset >= GPIO_PER_REG) {
-               offset -= GPIO_PER_REG;
+       if (*offset >= GPIO_PER_REG) {
+               *offset -= GPIO_PER_REG;
                *reg += sizeof(u32);
        }
 }
@@ -376,7 +376,7 @@ static inline void armada_37xx_irq_update_reg(unsigned int *reg,
 {
        int offset = irqd_to_hwirq(d);
 
-       armada_37xx_update_reg(reg, offset);
+       armada_37xx_update_reg(reg, &offset);
 }
 
 static int armada_37xx_gpio_direction_input(struct gpio_chip *chip,
@@ -386,7 +386,7 @@ static int armada_37xx_gpio_direction_input(struct gpio_chip *chip,
        unsigned int reg = OUTPUT_EN;
        unsigned int mask;
 
-       armada_37xx_update_reg(&reg, offset);
+       armada_37xx_update_reg(&reg, &offset);
        mask = BIT(offset);
 
        return regmap_update_bits(info->regmap, reg, mask, 0);
@@ -399,7 +399,7 @@ static int armada_37xx_gpio_get_direction(struct gpio_chip *chip,
        unsigned int reg = OUTPUT_EN;
        unsigned int val, mask;
 
-       armada_37xx_update_reg(&reg, offset);
+       armada_37xx_update_reg(&reg, &offset);
        mask = BIT(offset);
        regmap_read(info->regmap, reg, &val);
 
@@ -413,7 +413,7 @@ static int armada_37xx_gpio_direction_output(struct gpio_chip *chip,
        unsigned int reg = OUTPUT_EN;
        unsigned int mask, val, ret;
 
-       armada_37xx_update_reg(&reg, offset);
+       armada_37xx_update_reg(&reg, &offset);
        mask = BIT(offset);
 
        ret = regmap_update_bits(info->regmap, reg, mask, mask);
@@ -434,7 +434,7 @@ static int armada_37xx_gpio_get(struct gpio_chip *chip, unsigned int offset)
        unsigned int reg = INPUT_VAL;
        unsigned int val, mask;
 
-       armada_37xx_update_reg(&reg, offset);
+       armada_37xx_update_reg(&reg, &offset);
        mask = BIT(offset);
 
        regmap_read(info->regmap, reg, &val);
@@ -449,7 +449,7 @@ static void armada_37xx_gpio_set(struct gpio_chip *chip, unsigned int offset,
        unsigned int reg = OUTPUT_VAL;
        unsigned int mask, val;
 
-       armada_37xx_update_reg(&reg, offset);
+       armada_37xx_update_reg(&reg, &offset);
        mask = BIT(offset);
        val = value ? mask : 0;
 
index 9749737..5646600 100644 (file)
@@ -705,7 +705,7 @@ static int stmfx_pinctrl_probe(struct platform_device *pdev)
 
 static int stmfx_pinctrl_remove(struct platform_device *pdev)
 {
-       struct stmfx *stmfx = dev_get_platdata(&pdev->dev);
+       struct stmfx *stmfx = dev_get_drvdata(pdev->dev.parent);
 
        return stmfx_function_disable(stmfx,
                                      STMFX_FUNC_GPIO |
index 1b67bb5..ae21d08 100644 (file)
@@ -674,6 +674,7 @@ config EEEPC_LAPTOP
 config ASUS_WMI
        tristate "ASUS WMI Driver"
        depends on ACPI_WMI
+       depends on ACPI_BATTERY
        depends on INPUT
        depends on HWMON
        depends on BACKLIGHT_CLASS_DEVICE
index 86cc2cc..af063f6 100644 (file)
@@ -420,12 +420,6 @@ failed_sensitivity:
 
 static int cmpc_accel_remove_v4(struct acpi_device *acpi)
 {
-       struct input_dev *inputdev;
-       struct cmpc_accel *accel;
-
-       inputdev = dev_get_drvdata(&acpi->dev);
-       accel = dev_get_drvdata(&inputdev->dev);
-
        device_remove_file(&acpi->dev, &cmpc_accel_sensitivity_attr_v4);
        device_remove_file(&acpi->dev, &cmpc_accel_g_select_attr_v4);
        return cmpc_remove_acpi_notify_device(acpi);
@@ -656,12 +650,6 @@ failed_file:
 
 static int cmpc_accel_remove(struct acpi_device *acpi)
 {
-       struct input_dev *inputdev;
-       struct cmpc_accel *accel;
-
-       inputdev = dev_get_drvdata(&acpi->dev);
-       accel = dev_get_drvdata(&inputdev->dev);
-
        device_remove_file(&acpi->dev, &cmpc_accel_sensitivity_attr);
        return cmpc_remove_acpi_notify_device(acpi);
 }
index 61fe341..ffb8d5d 100644 (file)
@@ -90,7 +90,7 @@ static int i2c_multi_inst_probe(struct platform_device *pdev)
        for (i = 0; i < multi->num_clients && inst_data[i].type; i++) {
                memset(&board_info, 0, sizeof(board_info));
                strlcpy(board_info.type, inst_data[i].type, I2C_NAME_SIZE);
-               snprintf(name, sizeof(name), "%s-%s.%d", match->id,
+               snprintf(name, sizeof(name), "%s-%s.%d", dev_name(dev),
                         inst_data[i].type, i);
                board_info.dev_name = name;
                switch (inst_data[i].flags & IRQ_RESOURCE_TYPE) {
@@ -108,6 +108,7 @@ static int i2c_multi_inst_probe(struct platform_device *pdev)
                        if (ret < 0) {
                                dev_dbg(dev, "Error requesting irq at index %d: %d\n",
                                        inst_data[i].irq_idx, ret);
+                               goto error;
                        }
                        board_info.irq = ret;
                        break;
index ab7ae19..fa97834 100644 (file)
@@ -293,9 +293,8 @@ static int intel_punit_ipc_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, punit_ipcdev);
 
-       irq = platform_get_irq(pdev, 0);
+       irq = platform_get_irq_optional(pdev, 0);
        if (irq < 0) {
-               punit_ipcdev->irq = 0;
                dev_warn(&pdev->dev, "Invalid IRQ, using polling mode\n");
        } else {
                ret = devm_request_irq(&pdev->dev, irq, intel_punit_ioc,
index 9aca5e7..07d1b91 100644 (file)
@@ -422,6 +422,13 @@ static const struct dmi_system_id critclk_systems[] = {
                        DMI_MATCH(DMI_PRODUCT_VERSION, "6ES7647-8B"),
                },
        },
+       {
+               .ident = "SIMATIC IPC277E",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "SIEMENS AG"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "6AV7882-0"),
+               },
+       },
        { /*sentinel*/ }
 };
 
index 960961f..0517272 100644 (file)
@@ -97,8 +97,8 @@ config PTP_1588_CLOCK_PCH
        help
          This driver adds support for using the PCH EG20T as a PTP
          clock. The hardware supports time stamping of PTP packets
-         when using the end-to-end delay (E2E) mechansim. The peer
-         delay mechansim (P2P) is not supported.
+         when using the end-to-end delay (E2E) mechanism. The peer
+         delay mechanism (P2P) is not supported.
 
          This clock is only useful if your PTP programs are getting
          hardware time stamps on the PTP Ethernet packets using the
index 9c18476..67d0199 100644 (file)
@@ -155,7 +155,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
                        err = -EINVAL;
                        break;
                } else if (cmd == PTP_EXTTS_REQUEST) {
-                       req.extts.flags &= ~PTP_EXTTS_VALID_FLAGS;
+                       req.extts.flags &= PTP_EXTTS_V1_VALID_FLAGS;
                        req.extts.rsv[0] = 0;
                        req.extts.rsv[1] = 0;
                }
@@ -184,7 +184,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
                        err = -EINVAL;
                        break;
                } else if (cmd == PTP_PEROUT_REQUEST) {
-                       req.perout.flags &= ~PTP_PEROUT_VALID_FLAGS;
+                       req.perout.flags &= PTP_PEROUT_V1_VALID_FLAGS;
                        req.perout.rsv[0] = 0;
                        req.perout.rsv[1] = 0;
                        req.perout.rsv[2] = 0;
index c61f00b..a577218 100644 (file)
@@ -507,6 +507,8 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base,
                ptp_qoriq->regs.etts_regs = base + ETTS_REGS_OFFSET;
        }
 
+       spin_lock_init(&ptp_qoriq->lock);
+
        ktime_get_real_ts64(&now);
        ptp_qoriq_settime(&ptp_qoriq->caps, &now);
 
@@ -514,7 +516,6 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base,
          (ptp_qoriq->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT |
          (ptp_qoriq->cksel & CKSEL_MASK) << CKSEL_SHIFT;
 
-       spin_lock_init(&ptp_qoriq->lock);
        spin_lock_irqsave(&ptp_qoriq->lock, flags);
 
        regs = &ptp_qoriq->regs;
index b0e632b..e3a2518 100644 (file)
@@ -44,7 +44,7 @@ config PWM_AB8500
 
 config PWM_ATMEL
        tristate "Atmel PWM support"
-       depends on ARCH_AT91
+       depends on ARCH_AT91 && OF
        help
          Generic PWM framework driver for Atmel SoC.
 
@@ -423,6 +423,17 @@ config PWM_SPEAR
          To compile this driver as a module, choose M here: the module
          will be called pwm-spear.
 
+config PWM_SPRD
+       tristate "Spreadtrum PWM support"
+       depends on ARCH_SPRD || COMPILE_TEST
+       depends on HAS_IOMEM
+       help
+         Generic PWM framework driver for the PWM controller on
+         Spreadtrum SoCs.
+
+         To compile this driver as a module, choose M here: the module
+         will be called pwm-sprd.
+
 config PWM_STI
        tristate "STiH4xx PWM support"
        depends on ARCH_STI
index 76b555b..26326ad 100644 (file)
@@ -41,6 +41,7 @@ obj-$(CONFIG_PWM_ROCKCHIP)    += pwm-rockchip.o
 obj-$(CONFIG_PWM_SAMSUNG)      += pwm-samsung.o
 obj-$(CONFIG_PWM_SIFIVE)       += pwm-sifive.o
 obj-$(CONFIG_PWM_SPEAR)                += pwm-spear.o
+obj-$(CONFIG_PWM_SPRD)         += pwm-sprd.o
 obj-$(CONFIG_PWM_STI)          += pwm-sti.o
 obj-$(CONFIG_PWM_STM32)                += pwm-stm32.o
 obj-$(CONFIG_PWM_STM32_LP)     += pwm-stm32-lp.o
index 8edfac1..f877e77 100644 (file)
@@ -448,26 +448,27 @@ EXPORT_SYMBOL_GPL(pwm_free);
 /**
  * pwm_apply_state() - atomically apply a new state to a PWM device
  * @pwm: PWM device
- * @state: new state to apply. This can be adjusted by the PWM driver
- *        if the requested config is not achievable, for example,
- *        ->duty_cycle and ->period might be approximated.
+ * @state: new state to apply
  */
-int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state)
+int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state)
 {
+       struct pwm_chip *chip;
        int err;
 
        if (!pwm || !state || !state->period ||
            state->duty_cycle > state->period)
                return -EINVAL;
 
+       chip = pwm->chip;
+
        if (state->period == pwm->state.period &&
            state->duty_cycle == pwm->state.duty_cycle &&
            state->polarity == pwm->state.polarity &&
            state->enabled == pwm->state.enabled)
                return 0;
 
-       if (pwm->chip->ops->apply) {
-               err = pwm->chip->ops->apply(pwm->chip, pwm, state);
+       if (chip->ops->apply) {
+               err = chip->ops->apply(chip, pwm, state);
                if (err)
                        return err;
 
@@ -477,7 +478,7 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state)
                 * FIXME: restore the initial state in case of error.
                 */
                if (state->polarity != pwm->state.polarity) {
-                       if (!pwm->chip->ops->set_polarity)
+                       if (!chip->ops->set_polarity)
                                return -ENOTSUPP;
 
                        /*
@@ -486,12 +487,12 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state)
                         * ->apply().
                         */
                        if (pwm->state.enabled) {
-                               pwm->chip->ops->disable(pwm->chip, pwm);
+                               chip->ops->disable(chip, pwm);
                                pwm->state.enabled = false;
                        }
 
-                       err = pwm->chip->ops->set_polarity(pwm->chip, pwm,
-                                                          state->polarity);
+                       err = chip->ops->set_polarity(chip, pwm,
+                                                     state->polarity);
                        if (err)
                                return err;
 
@@ -500,9 +501,9 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state)
 
                if (state->period != pwm->state.period ||
                    state->duty_cycle != pwm->state.duty_cycle) {
-                       err = pwm->chip->ops->config(pwm->chip, pwm,
-                                                    state->duty_cycle,
-                                                    state->period);
+                       err = chip->ops->config(pwm->chip, pwm,
+                                               state->duty_cycle,
+                                               state->period);
                        if (err)
                                return err;
 
@@ -512,11 +513,11 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state)
 
                if (state->enabled != pwm->state.enabled) {
                        if (state->enabled) {
-                               err = pwm->chip->ops->enable(pwm->chip, pwm);
+                               err = chip->ops->enable(chip, pwm);
                                if (err)
                                        return err;
                        } else {
-                               pwm->chip->ops->disable(pwm->chip, pwm);
+                               chip->ops->disable(chip, pwm);
                        }
 
                        pwm->state.enabled = state->enabled;
index d13a83f..dcbc048 100644 (file)
@@ -39,7 +39,7 @@ static inline struct atmel_hlcdc_pwm *to_atmel_hlcdc_pwm(struct pwm_chip *chip)
 }
 
 static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
-                                struct pwm_state *state)
+                                const struct pwm_state *state)
 {
        struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
        struct atmel_hlcdc *hlcdc = chip->hlcdc;
index e5e1eaf..9ba7334 100644 (file)
@@ -209,7 +209,7 @@ static void atmel_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int atmel_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                          struct pwm_state *state)
+                          const struct pwm_state *state)
 {
        struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip);
        struct pwm_state cstate;
@@ -318,19 +318,6 @@ static const struct atmel_pwm_data mchp_sam9x60_pwm_data = {
        },
 };
 
-static const struct platform_device_id atmel_pwm_devtypes[] = {
-       {
-               .name = "at91sam9rl-pwm",
-               .driver_data = (kernel_ulong_t)&atmel_sam9rl_pwm_data,
-       }, {
-               .name = "sama5d3-pwm",
-               .driver_data = (kernel_ulong_t)&atmel_sama5_pwm_data,
-       }, {
-               /* sentinel */
-       },
-};
-MODULE_DEVICE_TABLE(platform, atmel_pwm_devtypes);
-
 static const struct of_device_id atmel_pwm_dt_ids[] = {
        {
                .compatible = "atmel,at91sam9rl-pwm",
@@ -350,34 +337,20 @@ static const struct of_device_id atmel_pwm_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, atmel_pwm_dt_ids);
 
-static inline const struct atmel_pwm_data *
-atmel_pwm_get_driver_data(struct platform_device *pdev)
-{
-       const struct platform_device_id *id;
-
-       if (pdev->dev.of_node)
-               return of_device_get_match_data(&pdev->dev);
-
-       id = platform_get_device_id(pdev);
-
-       return (struct atmel_pwm_data *)id->driver_data;
-}
-
 static int atmel_pwm_probe(struct platform_device *pdev)
 {
-       const struct atmel_pwm_data *data;
        struct atmel_pwm_chip *atmel_pwm;
        struct resource *res;
        int ret;
 
-       data = atmel_pwm_get_driver_data(pdev);
-       if (!data)
-               return -ENODEV;
-
        atmel_pwm = devm_kzalloc(&pdev->dev, sizeof(*atmel_pwm), GFP_KERNEL);
        if (!atmel_pwm)
                return -ENOMEM;
 
+       mutex_init(&atmel_pwm->isr_lock);
+       atmel_pwm->data = of_device_get_match_data(&pdev->dev);
+       atmel_pwm->updated_pwms = 0;
+
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        atmel_pwm->base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(atmel_pwm->base))
@@ -395,17 +368,10 @@ static int atmel_pwm_probe(struct platform_device *pdev)
 
        atmel_pwm->chip.dev = &pdev->dev;
        atmel_pwm->chip.ops = &atmel_pwm_ops;
-
-       if (pdev->dev.of_node) {
-               atmel_pwm->chip.of_xlate = of_pwm_xlate_with_flags;
-               atmel_pwm->chip.of_pwm_n_cells = 3;
-       }
-
+       atmel_pwm->chip.of_xlate = of_pwm_xlate_with_flags;
+       atmel_pwm->chip.of_pwm_n_cells = 3;
        atmel_pwm->chip.base = -1;
        atmel_pwm->chip.npwm = 4;
-       atmel_pwm->data = data;
-       atmel_pwm->updated_pwms = 0;
-       mutex_init(&atmel_pwm->isr_lock);
 
        ret = pwmchip_add(&atmel_pwm->chip);
        if (ret < 0) {
@@ -437,7 +403,6 @@ static struct platform_driver atmel_pwm_driver = {
                .name = "atmel-pwm",
                .of_match_table = of_match_ptr(atmel_pwm_dt_ids),
        },
-       .id_table = atmel_pwm_devtypes,
        .probe = atmel_pwm_probe,
        .remove = atmel_pwm_remove,
 };
index d961a82..56c38cf 100644 (file)
@@ -115,7 +115,7 @@ static void iproc_pwmc_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                           struct pwm_state *state)
+                           const struct pwm_state *state)
 {
        unsigned long prescale = IPROC_PWM_PRESCALE_MIN;
        struct iproc_pwmc *ip = to_iproc_pwmc(chip);
index f6fe0b9..91e24f0 100644 (file)
@@ -21,7 +21,7 @@
 #define PERIOD(x)              (((x) * 0x10) + 0x10)
 #define DUTY(x)                        (((x) * 0x10) + 0x14)
 
-#define MIN_PERIOD             108             /* 9.2 MHz max. PWM clock */
+#define PERIOD_MIN             0x2
 
 struct bcm2835_pwm {
        struct pwm_chip chip;
@@ -64,6 +64,7 @@ static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
        unsigned long rate = clk_get_rate(pc->clk);
        unsigned long scaler;
+       u32 period;
 
        if (!rate) {
                dev_err(pc->dev, "failed to get clock rate\n");
@@ -71,17 +72,14 @@ static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        }
 
        scaler = DIV_ROUND_CLOSEST(NSEC_PER_SEC, rate);
+       period = DIV_ROUND_CLOSEST(period_ns, scaler);
 
-       if (period_ns <= MIN_PERIOD) {
-               dev_err(pc->dev, "period %d not supported, minimum %d\n",
-                       period_ns, MIN_PERIOD);
+       if (period < PERIOD_MIN)
                return -EINVAL;
-       }
 
        writel(DIV_ROUND_CLOSEST(duty_ns, scaler),
               pc->base + DUTY(pwm->hwpwm));
-       writel(DIV_ROUND_CLOSEST(period_ns, scaler),
-              pc->base + PERIOD(pwm->hwpwm));
+       writel(period, pc->base + PERIOD(pwm->hwpwm));
 
        return 0;
 }
@@ -155,8 +153,11 @@ static int bcm2835_pwm_probe(struct platform_device *pdev)
 
        pc->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(pc->clk)) {
-               dev_err(&pdev->dev, "clock not found: %ld\n", PTR_ERR(pc->clk));
-               return PTR_ERR(pc->clk);
+               ret = PTR_ERR(pc->clk);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(&pdev->dev, "clock not found: %d\n", ret);
+
+               return ret;
        }
 
        ret = clk_prepare_enable(pc->clk);
index 85bea2d..8949744 100644 (file)
@@ -93,7 +93,7 @@ static int cros_ec_pwm_get_duty(struct cros_ec_device *ec, u8 index)
 }
 
 static int cros_ec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                            struct pwm_state *state)
+                            const struct pwm_state *state)
 {
        struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip);
        int duty_cycle;
index 9d31a21..59272a9 100644 (file)
@@ -227,7 +227,7 @@ static bool fsl_pwm_is_other_pwm_enabled(struct fsl_pwm_chip *fpc,
 
 static int fsl_pwm_apply_config(struct fsl_pwm_chip *fpc,
                                struct pwm_device *pwm,
-                               struct pwm_state *newstate)
+                               const struct pwm_state *newstate)
 {
        unsigned int duty;
        u32 reg_polarity;
@@ -292,17 +292,13 @@ static int fsl_pwm_apply_config(struct fsl_pwm_chip *fpc,
 
        regmap_update_bits(fpc->regmap, FTM_POL, BIT(pwm->hwpwm), reg_polarity);
 
-       newstate->period = fsl_pwm_ticks_to_ns(fpc,
-                                              fpc->period.mod_period + 1);
-       newstate->duty_cycle = fsl_pwm_ticks_to_ns(fpc, duty);
-
        ftm_set_write_protection(fpc);
 
        return 0;
 }
 
 static int fsl_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                        struct pwm_state *newstate)
+                        const struct pwm_state *newstate)
 {
        struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
        struct pwm_state *oldstate = &pwm->state;
index 753bd58..ad205fd 100644 (file)
@@ -149,7 +149,7 @@ static void hibvt_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int hibvt_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                               struct pwm_state *state)
+                          const struct pwm_state *state)
 {
        struct hibvt_pwm_chip *hi_pwm_chip = to_hibvt_pwm_chip(chip);
 
index e8385c1..9145f61 100644 (file)
@@ -89,7 +89,7 @@ to_imx_tpm_pwm_chip(struct pwm_chip *chip)
 static int pwm_imx_tpm_round_state(struct pwm_chip *chip,
                                   struct imx_tpm_pwm_param *p,
                                   struct pwm_state *real_state,
-                                  struct pwm_state *state)
+                                  const struct pwm_state *state)
 {
        struct imx_tpm_pwm_chip *tpm = to_imx_tpm_pwm_chip(chip);
        u32 rate, prescale, period_count, clock_unit;
@@ -289,7 +289,7 @@ static int pwm_imx_tpm_apply_hw(struct pwm_chip *chip,
 
 static int pwm_imx_tpm_apply(struct pwm_chip *chip,
                             struct pwm_device *pwm,
-                            struct pwm_state *state)
+                            const struct pwm_state *state)
 {
        struct imx_tpm_pwm_chip *tpm = to_imx_tpm_pwm_chip(chip);
        struct imx_tpm_pwm_param param;
index 434a351..ae11d85 100644 (file)
@@ -3,6 +3,10 @@
  * simple driver for PWM (Pulse Width Modulator) controller
  *
  * Derived from pxa PWM driver by eric miao <eric.miao@marvell.com>
+ *
+ * Limitations:
+ * - When disabled the output is driven to 0 independent of the configured
+ *   polarity.
  */
 
 #include <linux/bitfield.h>
@@ -205,7 +209,7 @@ static void pwm_imx27_wait_fifo_slot(struct pwm_chip *chip,
 }
 
 static int pwm_imx27_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                          struct pwm_state *state)
+                          const struct pwm_state *state)
 {
        unsigned long period_cycles, duty_cycles, prescale;
        struct pwm_imx27_chip *imx = to_pwm_imx27_chip(chip);
index f901e8a..9d78cc2 100644 (file)
@@ -2,6 +2,11 @@
 /*
  *  Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de>
  *  JZ4740 platform PWM support
+ *
+ * Limitations:
+ * - The .apply callback doesn't complete the currently running period before
+ *   reconfiguring the hardware.
+ * - Each period starts with the inactive part.
  */
 
 #include <linux/clk.h>
@@ -83,7 +88,7 @@ static void jz4740_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 }
 
 static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                           struct pwm_state *state)
+                           const struct pwm_state *state)
 {
        struct jz4740_pwm_chip *jz4740 = to_jz4740(pwm->chip);
        unsigned long long tmp;
index 4098a46..75bbfe5 100644 (file)
@@ -122,7 +122,7 @@ static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond)
 }
 
 static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                         struct pwm_state *state)
+                         const struct pwm_state *state)
 {
        struct pwm_lpss_chip *lpwm = to_lpwm(chip);
        int ret;
index eb6674c..b94e0d0 100644 (file)
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * Mediatek Pulse Width Modulator driver
+ * MediaTek Pulse Width Modulator driver
  *
  * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
  * Copyright (C) 2017 Zhi Mao <zhi.mao@mediatek.com>
  *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
 
 #include <linux/err.h>
 
 #define PWM_CLK_DIV_MAX                7
 
-enum {
-       MTK_CLK_MAIN = 0,
-       MTK_CLK_TOP,
-       MTK_CLK_PWM1,
-       MTK_CLK_PWM2,
-       MTK_CLK_PWM3,
-       MTK_CLK_PWM4,
-       MTK_CLK_PWM5,
-       MTK_CLK_PWM6,
-       MTK_CLK_PWM7,
-       MTK_CLK_PWM8,
-       MTK_CLK_MAX,
-};
-
-static const char * const mtk_pwm_clk_name[MTK_CLK_MAX] = {
-       "main", "top", "pwm1", "pwm2", "pwm3", "pwm4", "pwm5", "pwm6", "pwm7",
-       "pwm8"
-};
-
-struct mtk_pwm_platform_data {
+struct pwm_mediatek_of_data {
        unsigned int num_pwms;
        bool pwm45_fixup;
-       bool has_clks;
 };
 
 /**
- * struct mtk_pwm_chip - struct representing PWM chip
+ * struct pwm_mediatek_chip - struct representing PWM chip
  * @chip: linux PWM chip representation
  * @regs: base address of PWM chip
- * @clks: list of clocks
+ * @clk_top: the top clock generator
+ * @clk_main: the clock used by PWM core
+ * @clk_pwms: the clock used by each PWM channel
+ * @clk_freq: the fix clock frequency of legacy MIPS SoC
  */
-struct mtk_pwm_chip {
+struct pwm_mediatek_chip {
        struct pwm_chip chip;
        void __iomem *regs;
-       struct clk *clks[MTK_CLK_MAX];
-       const struct mtk_pwm_platform_data *soc;
+       struct clk *clk_top;
+       struct clk *clk_main;
+       struct clk **clk_pwms;
+       const struct pwm_mediatek_of_data *soc;
 };
 
-static const unsigned int mtk_pwm_reg_offset[] = {
+static const unsigned int pwm_mediatek_reg_offset[] = {
        0x0010, 0x0050, 0x0090, 0x00d0, 0x0110, 0x0150, 0x0190, 0x0220
 };
 
-static inline struct mtk_pwm_chip *to_mtk_pwm_chip(struct pwm_chip *chip)
+static inline struct pwm_mediatek_chip *
+to_pwm_mediatek_chip(struct pwm_chip *chip)
 {
-       return container_of(chip, struct mtk_pwm_chip, chip);
+       return container_of(chip, struct pwm_mediatek_chip, chip);
 }
 
-static int mtk_pwm_clk_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+static int pwm_mediatek_clk_enable(struct pwm_chip *chip,
+                                  struct pwm_device *pwm)
 {
-       struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip);
+       struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip);
        int ret;
 
-       if (!pc->soc->has_clks)
-               return 0;
-
-       ret = clk_prepare_enable(pc->clks[MTK_CLK_TOP]);
+       ret = clk_prepare_enable(pc->clk_top);
        if (ret < 0)
                return ret;
 
-       ret = clk_prepare_enable(pc->clks[MTK_CLK_MAIN]);
+       ret = clk_prepare_enable(pc->clk_main);
        if (ret < 0)
                goto disable_clk_top;
 
-       ret = clk_prepare_enable(pc->clks[MTK_CLK_PWM1 + pwm->hwpwm]);
+       ret = clk_prepare_enable(pc->clk_pwms[pwm->hwpwm]);
        if (ret < 0)
                goto disable_clk_main;
 
        return 0;
 
 disable_clk_main:
-       clk_disable_unprepare(pc->clks[MTK_CLK_MAIN]);
+       clk_disable_unprepare(pc->clk_main);
 disable_clk_top:
-       clk_disable_unprepare(pc->clks[MTK_CLK_TOP]);
+       clk_disable_unprepare(pc->clk_top);
 
        return ret;
 }
 
-static void mtk_pwm_clk_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+static void pwm_mediatek_clk_disable(struct pwm_chip *chip,
+                                    struct pwm_device *pwm)
 {
-       struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip);
-
-       if (!pc->soc->has_clks)
-               return;
+       struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip);
 
-       clk_disable_unprepare(pc->clks[MTK_CLK_PWM1 + pwm->hwpwm]);
-       clk_disable_unprepare(pc->clks[MTK_CLK_MAIN]);
-       clk_disable_unprepare(pc->clks[MTK_CLK_TOP]);
+       clk_disable_unprepare(pc->clk_pwms[pwm->hwpwm]);
+       clk_disable_unprepare(pc->clk_main);
+       clk_disable_unprepare(pc->clk_top);
 }
 
-static inline u32 mtk_pwm_readl(struct mtk_pwm_chip *chip, unsigned int num,
-                               unsigned int offset)
+static inline u32 pwm_mediatek_readl(struct pwm_mediatek_chip *chip,
+                                    unsigned int num, unsigned int offset)
 {
-       return readl(chip->regs + mtk_pwm_reg_offset[num] + offset);
+       return readl(chip->regs + pwm_mediatek_reg_offset[num] + offset);
 }
 
-static inline void mtk_pwm_writel(struct mtk_pwm_chip *chip,
-                                 unsigned int num, unsigned int offset,
-                                 u32 value)
+static inline void pwm_mediatek_writel(struct pwm_mediatek_chip *chip,
+                                      unsigned int num, unsigned int offset,
+                                      u32 value)
 {
-       writel(value, chip->regs + mtk_pwm_reg_offset[num] + offset);
+       writel(value, chip->regs + pwm_mediatek_reg_offset[num] + offset);
 }
 
-static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-                         int duty_ns, int period_ns)
+static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm,
+                              int duty_ns, int period_ns)
 {
-       struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip);
-       struct clk *clk = pc->clks[MTK_CLK_PWM1 + pwm->hwpwm];
+       struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip);
        u32 clkdiv = 0, cnt_period, cnt_duty, reg_width = PWMDWIDTH,
            reg_thres = PWMTHRES;
        u64 resolution;
        int ret;
 
-       ret = mtk_pwm_clk_enable(chip, pwm);
+       ret = pwm_mediatek_clk_enable(chip, pwm);
+
        if (ret < 0)
                return ret;
 
        /* Using resolution in picosecond gets accuracy higher */
        resolution = (u64)NSEC_PER_SEC * 1000;
-       do_div(resolution, clk_get_rate(clk));
+       do_div(resolution, clk_get_rate(pc->clk_pwms[pwm->hwpwm]));
 
        cnt_period = DIV_ROUND_CLOSEST_ULL((u64)period_ns * 1000, resolution);
        while (cnt_period > 8191) {
@@ -164,7 +144,7 @@ static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        }
 
        if (clkdiv > PWM_CLK_DIV_MAX) {
-               mtk_pwm_clk_disable(chip, pwm);
+               pwm_mediatek_clk_disable(chip, pwm);
                dev_err(chip->dev, "period %d not supported\n", period_ns);
                return -EINVAL;
        }
@@ -179,22 +159,22 @@ static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        }
 
        cnt_duty = DIV_ROUND_CLOSEST_ULL((u64)duty_ns * 1000, resolution);
-       mtk_pwm_writel(pc, pwm->hwpwm, PWMCON, BIT(15) | clkdiv);
-       mtk_pwm_writel(pc, pwm->hwpwm, reg_width, cnt_period);
-       mtk_pwm_writel(pc, pwm->hwpwm, reg_thres, cnt_duty);
+       pwm_mediatek_writel(pc, pwm->hwpwm, PWMCON, BIT(15) | clkdiv);
+       pwm_mediatek_writel(pc, pwm->hwpwm, reg_width, cnt_period);
+       pwm_mediatek_writel(pc, pwm->hwpwm, reg_thres, cnt_duty);
 
-       mtk_pwm_clk_disable(chip, pwm);
+       pwm_mediatek_clk_disable(chip, pwm);
 
        return 0;
 }
 
-static int mtk_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+static int pwm_mediatek_enable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
-       struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip);
+       struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip);
        u32 value;
        int ret;
 
-       ret = mtk_pwm_clk_enable(chip, pwm);
+       ret = pwm_mediatek_clk_enable(chip, pwm);
        if (ret < 0)
                return ret;
 
@@ -205,29 +185,28 @@ static int mtk_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
        return 0;
 }
 
-static void mtk_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+static void pwm_mediatek_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
-       struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip);
+       struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip);
        u32 value;
 
        value = readl(pc->regs);
        value &= ~BIT(pwm->hwpwm);
        writel(value, pc->regs);
 
-       mtk_pwm_clk_disable(chip, pwm);
+       pwm_mediatek_clk_disable(chip, pwm);
 }
 
-static const struct pwm_ops mtk_pwm_ops = {
-       .config = mtk_pwm_config,
-       .enable = mtk_pwm_enable,
-       .disable = mtk_pwm_disable,
+static const struct pwm_ops pwm_mediatek_ops = {
+       .config = pwm_mediatek_config,
+       .enable = pwm_mediatek_enable,
+       .disable = pwm_mediatek_disable,
        .owner = THIS_MODULE,
 };
 
-static int mtk_pwm_probe(struct platform_device *pdev)
+static int pwm_mediatek_probe(struct platform_device *pdev)
 {
-       const struct mtk_pwm_platform_data *data;
-       struct mtk_pwm_chip *pc;
+       struct pwm_mediatek_chip *pc;
        struct resource *res;
        unsigned int i;
        int ret;
@@ -236,31 +215,51 @@ static int mtk_pwm_probe(struct platform_device *pdev)
        if (!pc)
                return -ENOMEM;
 
-       data = of_device_get_match_data(&pdev->dev);
-       if (data == NULL)
-               return -EINVAL;
-       pc->soc = data;
+       pc->soc = of_device_get_match_data(&pdev->dev);
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        pc->regs = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(pc->regs))
                return PTR_ERR(pc->regs);
 
-       for (i = 0; i < data->num_pwms + 2 && pc->soc->has_clks; i++) {
-               pc->clks[i] = devm_clk_get(&pdev->dev, mtk_pwm_clk_name[i]);
-               if (IS_ERR(pc->clks[i])) {
+       pc->clk_pwms = devm_kcalloc(&pdev->dev, pc->soc->num_pwms,
+                                   sizeof(*pc->clk_pwms), GFP_KERNEL);
+       if (!pc->clk_pwms)
+               return -ENOMEM;
+
+       pc->clk_top = devm_clk_get(&pdev->dev, "top");
+       if (IS_ERR(pc->clk_top)) {
+               dev_err(&pdev->dev, "clock: top fail: %ld\n",
+                       PTR_ERR(pc->clk_top));
+               return PTR_ERR(pc->clk_top);
+       }
+
+       pc->clk_main = devm_clk_get(&pdev->dev, "main");
+       if (IS_ERR(pc->clk_main)) {
+               dev_err(&pdev->dev, "clock: main fail: %ld\n",
+                       PTR_ERR(pc->clk_main));
+               return PTR_ERR(pc->clk_main);
+       }
+
+       for (i = 0; i < pc->soc->num_pwms; i++) {
+               char name[8];
+
+               snprintf(name, sizeof(name), "pwm%d", i + 1);
+
+               pc->clk_pwms[i] = devm_clk_get(&pdev->dev, name);
+               if (IS_ERR(pc->clk_pwms[i])) {
                        dev_err(&pdev->dev, "clock: %s fail: %ld\n",
-                               mtk_pwm_clk_name[i], PTR_ERR(pc->clks[i]));
-                       return PTR_ERR(pc->clks[i]);
+                               name, PTR_ERR(pc->clk_pwms[i]));
+                       return PTR_ERR(pc->clk_pwms[i]);
                }
        }
 
        platform_set_drvdata(pdev, pc);
 
        pc->chip.dev = &pdev->dev;
-       pc->chip.ops = &mtk_pwm_ops;
+       pc->chip.ops = &pwm_mediatek_ops;
        pc->chip.base = -1;
-       pc->chip.npwm = data->num_pwms;
+       pc->chip.npwm = pc->soc->num_pwms;
 
        ret = pwmchip_add(&pc->chip);
        if (ret < 0) {
@@ -271,55 +270,63 @@ static int mtk_pwm_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int mtk_pwm_remove(struct platform_device *pdev)
+static int pwm_mediatek_remove(struct platform_device *pdev)
 {
-       struct mtk_pwm_chip *pc = platform_get_drvdata(pdev);
+       struct pwm_mediatek_chip *pc = platform_get_drvdata(pdev);
 
        return pwmchip_remove(&pc->chip);
 }
 
-static const struct mtk_pwm_platform_data mt2712_pwm_data = {
+static const struct pwm_mediatek_of_data mt2712_pwm_data = {
        .num_pwms = 8,
        .pwm45_fixup = false,
-       .has_clks = true,
 };
 
-static const struct mtk_pwm_platform_data mt7622_pwm_data = {
+static const struct pwm_mediatek_of_data mt7622_pwm_data = {
        .num_pwms = 6,
        .pwm45_fixup = false,
-       .has_clks = true,
 };
 
-static const struct mtk_pwm_platform_data mt7623_pwm_data = {
+static const struct pwm_mediatek_of_data mt7623_pwm_data = {
        .num_pwms = 5,
        .pwm45_fixup = true,
-       .has_clks = true,
 };
 
-static const struct mtk_pwm_platform_data mt7628_pwm_data = {
+static const struct pwm_mediatek_of_data mt7628_pwm_data = {
        .num_pwms = 4,
        .pwm45_fixup = true,
-       .has_clks = false,
 };
 
-static const struct of_device_id mtk_pwm_of_match[] = {
+static const struct pwm_mediatek_of_data mt7629_pwm_data = {
+       .num_pwms = 1,
+       .pwm45_fixup = false,
+};
+
+static const struct pwm_mediatek_of_data mt8516_pwm_data = {
+       .num_pwms = 5,
+       .pwm45_fixup = false,
+};
+
+static const struct of_device_id pwm_mediatek_of_match[] = {
        { .compatible = "mediatek,mt2712-pwm", .data = &mt2712_pwm_data },
        { .compatible = "mediatek,mt7622-pwm", .data = &mt7622_pwm_data },
        { .compatible = "mediatek,mt7623-pwm", .data = &mt7623_pwm_data },
        { .compatible = "mediatek,mt7628-pwm", .data = &mt7628_pwm_data },
+       { .compatible = "mediatek,mt7629-pwm", .data = &mt7629_pwm_data },
+       { .compatible = "mediatek,mt8516-pwm", .data = &mt8516_pwm_data },
        { },
 };
-MODULE_DEVICE_TABLE(of, mtk_pwm_of_match);
+MODULE_DEVICE_TABLE(of, pwm_mediatek_of_match);
 
-static struct platform_driver mtk_pwm_driver = {
+static struct platform_driver pwm_mediatek_driver = {
        .driver = {
-               .name = "mtk-pwm",
-               .of_match_table = mtk_pwm_of_match,
+               .name = "pwm-mediatek",
+               .of_match_table = pwm_mediatek_of_match,
        },
-       .probe = mtk_pwm_probe,
-       .remove = mtk_pwm_remove,
+       .probe = pwm_mediatek_probe,
+       .remove = pwm_mediatek_remove,
 };
-module_platform_driver(mtk_pwm_driver);
+module_platform_driver(pwm_mediatek_driver);
 
 MODULE_AUTHOR("John Crispin <blogic@openwrt.org>");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
index 3cbff5c..6245bbd 100644 (file)
@@ -159,7 +159,7 @@ static void meson_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
 }
 
 static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm,
-                         struct pwm_state *state)
+                         const struct pwm_state *state)
 {
        struct meson_pwm_channel *channel = pwm_get_chip_data(pwm);
        unsigned int duty, period, pre_div, cnt, duty_cnt;
@@ -265,7 +265,7 @@ static void meson_pwm_disable(struct meson_pwm *meson, struct pwm_device *pwm)
 }
 
 static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                          struct pwm_state *state)
+                          const struct pwm_state *state)
 {
        struct meson_pwm_channel *channel = pwm_get_chip_data(pwm);
        struct meson_pwm *meson = to_meson_pwm(chip);
index 04c0f6b..b14376b 100644 (file)
@@ -126,15 +126,13 @@ static int mxs_pwm_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
        struct mxs_pwm_chip *mxs;
-       struct resource *res;
        int ret;
 
        mxs = devm_kzalloc(&pdev->dev, sizeof(*mxs), GFP_KERNEL);
        if (!mxs)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       mxs->base = devm_ioremap_resource(&pdev->dev, res);
+       mxs->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(mxs->base))
                return PTR_ERR(mxs->base);
 
index 5b2b8ec..852eb23 100644 (file)
@@ -158,7 +158,7 @@ static void rcar_pwm_disable(struct rcar_pwm_chip *rp)
 }
 
 static int rcar_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                         struct pwm_state *state)
+                         const struct pwm_state *state)
 {
        struct rcar_pwm_chip *rp = to_rcar_pwm_chip(chip);
        struct pwm_state cur_state;
@@ -187,7 +187,7 @@ static int rcar_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        /* The SYNC should be set to 0 even if rcar_pwm_set_counter failed */
        rcar_pwm_update(rp, RCAR_PWMCR_SYNC, 0, RCAR_PWMCR);
 
-       if (!ret && state->enabled)
+       if (!ret)
                ret = rcar_pwm_enable(rp);
 
        return ret;
index 51b96cb..73352e6 100644 (file)
@@ -90,16 +90,16 @@ static void rockchip_pwm_get_state(struct pwm_chip *chip,
                state->enabled = ((val & enable_conf) == enable_conf) ?
                                 true : false;
 
-       if (pc->data->supports_polarity) {
-               if (!(val & PWM_DUTY_POSITIVE))
-                       state->polarity = PWM_POLARITY_INVERSED;
-       }
+       if (pc->data->supports_polarity && !(val & PWM_DUTY_POSITIVE))
+               state->polarity = PWM_POLARITY_INVERSED;
+       else
+               state->polarity = PWM_POLARITY_NORMAL;
 
        clk_disable(pc->pclk);
 }
 
 static void rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-                              struct pwm_state *state)
+                              const struct pwm_state *state)
 {
        struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
        unsigned long period, duty;
@@ -183,7 +183,7 @@ static int rockchip_pwm_enable(struct pwm_chip *chip,
 }
 
 static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                             struct pwm_state *state)
+                             const struct pwm_state *state)
 {
        struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
        struct pwm_state curstate;
@@ -212,12 +212,6 @@ static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
                        goto out;
        }
 
-       /*
-        * Update the state with the real hardware, which can differ a bit
-        * because of period/duty_cycle approximation.
-        */
-       rockchip_pwm_get_state(chip, pwm, state);
-
 out:
        clk_disable(pc->pclk);
 
index a7c107f..cc63f9b 100644 (file)
@@ -147,7 +147,7 @@ static int pwm_sifive_enable(struct pwm_chip *chip, bool enable)
 }
 
 static int pwm_sifive_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                           struct pwm_state *state)
+                           const struct pwm_state *state)
 {
        struct pwm_sifive_ddata *ddata = pwm_sifive_chip_to_ddata(chip);
        struct pwm_state cur_state;
@@ -250,10 +250,8 @@ static int pwm_sifive_probe(struct platform_device *pdev)
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        ddata->regs = devm_ioremap_resource(dev, res);
-       if (IS_ERR(ddata->regs)) {
-               dev_err(dev, "Unable to map IO resources\n");
+       if (IS_ERR(ddata->regs))
                return PTR_ERR(ddata->regs);
-       }
 
        ddata->clk = devm_clk_get(dev, NULL);
        if (IS_ERR(ddata->clk)) {
diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c
new file mode 100644 (file)
index 0000000..be23942
--- /dev/null
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Spreadtrum Communications Inc.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+
+#define SPRD_PWM_PRESCALE      0x0
+#define SPRD_PWM_MOD           0x4
+#define SPRD_PWM_DUTY          0x8
+#define SPRD_PWM_ENABLE                0x18
+
+#define SPRD_PWM_MOD_MAX       GENMASK(7, 0)
+#define SPRD_PWM_DUTY_MSK      GENMASK(15, 0)
+#define SPRD_PWM_PRESCALE_MSK  GENMASK(7, 0)
+#define SPRD_PWM_ENABLE_BIT    BIT(0)
+
+#define SPRD_PWM_CHN_NUM       4
+#define SPRD_PWM_REGS_SHIFT    5
+#define SPRD_PWM_CHN_CLKS_NUM  2
+#define SPRD_PWM_CHN_OUTPUT_CLK        1
+
+struct sprd_pwm_chn {
+       struct clk_bulk_data clks[SPRD_PWM_CHN_CLKS_NUM];
+       u32 clk_rate;
+};
+
+struct sprd_pwm_chip {
+       void __iomem *base;
+       struct device *dev;
+       struct pwm_chip chip;
+       int num_pwms;
+       struct sprd_pwm_chn chn[SPRD_PWM_CHN_NUM];
+};
+
+/*
+ * The list of clocks required by PWM channels, and each channel has 2 clocks:
+ * enable clock and pwm clock.
+ */
+static const char * const sprd_pwm_clks[] = {
+       "enable0", "pwm0",
+       "enable1", "pwm1",
+       "enable2", "pwm2",
+       "enable3", "pwm3",
+};
+
+static u32 sprd_pwm_read(struct sprd_pwm_chip *spc, u32 hwid, u32 reg)
+{
+       u32 offset = reg + (hwid << SPRD_PWM_REGS_SHIFT);
+
+       return readl_relaxed(spc->base + offset);
+}
+
+static void sprd_pwm_write(struct sprd_pwm_chip *spc, u32 hwid,
+                          u32 reg, u32 val)
+{
+       u32 offset = reg + (hwid << SPRD_PWM_REGS_SHIFT);
+
+       writel_relaxed(val, spc->base + offset);
+}
+
+static void sprd_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+                              struct pwm_state *state)
+{
+       struct sprd_pwm_chip *spc =
+               container_of(chip, struct sprd_pwm_chip, chip);
+       struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm];
+       u32 val, duty, prescale;
+       u64 tmp;
+       int ret;
+
+       /*
+        * The clocks to PWM channel has to be enabled first before
+        * reading to the registers.
+        */
+       ret = clk_bulk_prepare_enable(SPRD_PWM_CHN_CLKS_NUM, chn->clks);
+       if (ret) {
+               dev_err(spc->dev, "failed to enable pwm%u clocks\n",
+                       pwm->hwpwm);
+               return;
+       }
+
+       val = sprd_pwm_read(spc, pwm->hwpwm, SPRD_PWM_ENABLE);
+       if (val & SPRD_PWM_ENABLE_BIT)
+               state->enabled = true;
+       else
+               state->enabled = false;
+
+       /*
+        * The hardware provides a counter that is feed by the source clock.
+        * The period length is (PRESCALE + 1) * MOD counter steps.
+        * The duty cycle length is (PRESCALE + 1) * DUTY counter steps.
+        * Thus the period_ns and duty_ns calculation formula should be:
+        * period_ns = NSEC_PER_SEC * (prescale + 1) * mod / clk_rate
+        * duty_ns = NSEC_PER_SEC * (prescale + 1) * duty / clk_rate
+        */
+       val = sprd_pwm_read(spc, pwm->hwpwm, SPRD_PWM_PRESCALE);
+       prescale = val & SPRD_PWM_PRESCALE_MSK;
+       tmp = (prescale + 1) * NSEC_PER_SEC * SPRD_PWM_MOD_MAX;
+       state->period = DIV_ROUND_CLOSEST_ULL(tmp, chn->clk_rate);
+
+       val = sprd_pwm_read(spc, pwm->hwpwm, SPRD_PWM_DUTY);
+       duty = val & SPRD_PWM_DUTY_MSK;
+       tmp = (prescale + 1) * NSEC_PER_SEC * duty;
+       state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, chn->clk_rate);
+
+       /* Disable PWM clocks if the PWM channel is not in enable state. */
+       if (!state->enabled)
+               clk_bulk_disable_unprepare(SPRD_PWM_CHN_CLKS_NUM, chn->clks);
+}
+
+static int sprd_pwm_config(struct sprd_pwm_chip *spc, struct pwm_device *pwm,
+                          int duty_ns, int period_ns)
+{
+       struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm];
+       u32 prescale, duty;
+       u64 tmp;
+
+       /*
+        * The hardware provides a counter that is feed by the source clock.
+        * The period length is (PRESCALE + 1) * MOD counter steps.
+        * The duty cycle length is (PRESCALE + 1) * DUTY counter steps.
+        *
+        * To keep the maths simple we're always using MOD = SPRD_PWM_MOD_MAX.
+        * The value for PRESCALE is selected such that the resulting period
+        * gets the maximal length not bigger than the requested one with the
+        * given settings (MOD = SPRD_PWM_MOD_MAX and input clock).
+        */
+       duty = duty_ns * SPRD_PWM_MOD_MAX / period_ns;
+
+       tmp = (u64)chn->clk_rate * period_ns;
+       do_div(tmp, NSEC_PER_SEC);
+       prescale = DIV_ROUND_CLOSEST_ULL(tmp, SPRD_PWM_MOD_MAX) - 1;
+       if (prescale > SPRD_PWM_PRESCALE_MSK)
+               prescale = SPRD_PWM_PRESCALE_MSK;
+
+       /*
+        * Note: Writing DUTY triggers the hardware to actually apply the
+        * values written to MOD and DUTY to the output, so must keep writing
+        * DUTY last.
+        *
+        * The hardware can ensures that current running period is completed
+        * before changing a new configuration to avoid mixed settings.
+        */
+       sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_PRESCALE, prescale);
+       sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_MOD, SPRD_PWM_MOD_MAX);
+       sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_DUTY, duty);
+
+       return 0;
+}
+
+static int sprd_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+                         const struct pwm_state *state)
+{
+       struct sprd_pwm_chip *spc =
+               container_of(chip, struct sprd_pwm_chip, chip);
+       struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm];
+       struct pwm_state *cstate = &pwm->state;
+       int ret;
+
+       if (state->enabled) {
+               if (!cstate->enabled) {
+                       /*
+                        * The clocks to PWM channel has to be enabled first
+                        * before writing to the registers.
+                        */
+                       ret = clk_bulk_prepare_enable(SPRD_PWM_CHN_CLKS_NUM,
+                                                     chn->clks);
+                       if (ret) {
+                               dev_err(spc->dev,
+                                       "failed to enable pwm%u clocks\n",
+                                       pwm->hwpwm);
+                               return ret;
+                       }
+               }
+
+               if (state->period != cstate->period ||
+                   state->duty_cycle != cstate->duty_cycle) {
+                       ret = sprd_pwm_config(spc, pwm, state->duty_cycle,
+                                             state->period);
+                       if (ret)
+                               return ret;
+               }
+
+               sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_ENABLE, 1);
+       } else if (cstate->enabled) {
+               /*
+                * Note: After setting SPRD_PWM_ENABLE to zero, the controller
+                * will not wait for current period to be completed, instead it
+                * will stop the PWM channel immediately.
+                */
+               sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_ENABLE, 0);
+
+               clk_bulk_disable_unprepare(SPRD_PWM_CHN_CLKS_NUM, chn->clks);
+       }
+
+       return 0;
+}
+
+static const struct pwm_ops sprd_pwm_ops = {
+       .apply = sprd_pwm_apply,
+       .get_state = sprd_pwm_get_state,
+       .owner = THIS_MODULE,
+};
+
+static int sprd_pwm_clk_init(struct sprd_pwm_chip *spc)
+{
+       struct clk *clk_pwm;
+       int ret, i;
+
+       for (i = 0; i < SPRD_PWM_CHN_NUM; i++) {
+               struct sprd_pwm_chn *chn = &spc->chn[i];
+               int j;
+
+               for (j = 0; j < SPRD_PWM_CHN_CLKS_NUM; ++j)
+                       chn->clks[j].id =
+                               sprd_pwm_clks[i * SPRD_PWM_CHN_CLKS_NUM + j];
+
+               ret = devm_clk_bulk_get(spc->dev, SPRD_PWM_CHN_CLKS_NUM,
+                                       chn->clks);
+               if (ret) {
+                       if (ret == -ENOENT)
+                               break;
+
+                       if (ret != -EPROBE_DEFER)
+                               dev_err(spc->dev,
+                                       "failed to get channel clocks\n");
+
+                       return ret;
+               }
+
+               clk_pwm = chn->clks[SPRD_PWM_CHN_OUTPUT_CLK].clk;
+               chn->clk_rate = clk_get_rate(clk_pwm);
+       }
+
+       if (!i) {
+               dev_err(spc->dev, "no available PWM channels\n");
+               return -ENODEV;
+       }
+
+       spc->num_pwms = i;
+
+       return 0;
+}
+
+static int sprd_pwm_probe(struct platform_device *pdev)
+{
+       struct sprd_pwm_chip *spc;
+       int ret;
+
+       spc = devm_kzalloc(&pdev->dev, sizeof(*spc), GFP_KERNEL);
+       if (!spc)
+               return -ENOMEM;
+
+       spc->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(spc->base))
+               return PTR_ERR(spc->base);
+
+       spc->dev = &pdev->dev;
+       platform_set_drvdata(pdev, spc);
+
+       ret = sprd_pwm_clk_init(spc);
+       if (ret)
+               return ret;
+
+       spc->chip.dev = &pdev->dev;
+       spc->chip.ops = &sprd_pwm_ops;
+       spc->chip.base = -1;
+       spc->chip.npwm = spc->num_pwms;
+
+       ret = pwmchip_add(&spc->chip);
+       if (ret)
+               dev_err(&pdev->dev, "failed to add PWM chip\n");
+
+       return ret;
+}
+
+static int sprd_pwm_remove(struct platform_device *pdev)
+{
+       struct sprd_pwm_chip *spc = platform_get_drvdata(pdev);
+
+       return pwmchip_remove(&spc->chip);
+}
+
+static const struct of_device_id sprd_pwm_of_match[] = {
+       { .compatible = "sprd,ums512-pwm", },
+       { },
+};
+MODULE_DEVICE_TABLE(of, sprd_pwm_of_match);
+
+static struct platform_driver sprd_pwm_driver = {
+       .driver = {
+               .name = "sprd-pwm",
+               .of_match_table = sprd_pwm_of_match,
+       },
+       .probe = sprd_pwm_probe,
+       .remove = sprd_pwm_remove,
+};
+
+module_platform_driver(sprd_pwm_driver);
+
+MODULE_DESCRIPTION("Spreadtrum PWM Driver");
+MODULE_LICENSE("GPL v2");
index 20450e3..1508616 100644 (file)
@@ -564,10 +564,8 @@ static int sti_pwm_probe(struct platform_device *pdev)
                return PTR_ERR(pc->regmap);
 
        irq = platform_get_irq(pdev, 0);
-       if (irq < 0) {
-               dev_err(&pdev->dev, "Failed to obtain IRQ\n");
+       if (irq < 0)
                return irq;
-       }
 
        ret = devm_request_irq(&pdev->dev, irq, sti_pwm_interrupt, 0,
                               pdev->name, pc);
index 2211a64..67fca62 100644 (file)
@@ -32,7 +32,7 @@ static inline struct stm32_pwm_lp *to_stm32_pwm_lp(struct pwm_chip *chip)
 #define STM32_LPTIM_MAX_PRESCALER      128
 
 static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                             struct pwm_state *state)
+                             const struct pwm_state *state)
 {
        struct stm32_pwm_lp *priv = to_stm32_pwm_lp(chip);
        unsigned long long prd, div, dty;
@@ -59,6 +59,12 @@ static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        /* Calculate the period and prescaler value */
        div = (unsigned long long)clk_get_rate(priv->clk) * state->period;
        do_div(div, NSEC_PER_SEC);
+       if (!div) {
+               /* Clock is too slow to achieve requested period. */
+               dev_dbg(priv->chip.dev, "Can't reach %u ns\n",  state->period);
+               return -EINVAL;
+       }
+
        prd = div;
        while (div > STM32_LPTIM_MAX_ARR) {
                presc++;
index 740e2de..359b085 100644 (file)
@@ -440,7 +440,7 @@ static void stm32_pwm_disable(struct stm32_pwm *priv, int ch)
 }
 
 static int stm32_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                          struct pwm_state *state)
+                          const struct pwm_state *state)
 {
        bool enabled;
        struct stm32_pwm *priv = to_stm32_pwm_dev(chip);
@@ -468,7 +468,7 @@ static int stm32_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int stm32_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm,
-                                 struct pwm_state *state)
+                                 const struct pwm_state *state)
 {
        struct stm32_pwm *priv = to_stm32_pwm_dev(chip);
        int ret;
index de78c82..6f5840a 100644 (file)
@@ -145,7 +145,7 @@ static void sun4i_pwm_get_state(struct pwm_chip *chip,
 }
 
 static int sun4i_pwm_calculate(struct sun4i_pwm_chip *sun4i_pwm,
-                              struct pwm_state *state,
+                              const struct pwm_state *state,
                               u32 *dty, u32 *prd, unsigned int *prsclr)
 {
        u64 clk_rate, div = 0;
@@ -192,17 +192,11 @@ static int sun4i_pwm_calculate(struct sun4i_pwm_chip *sun4i_pwm,
        *dty = div;
        *prsclr = prescaler;
 
-       div = (u64)pval * NSEC_PER_SEC * *prd;
-       state->period = DIV_ROUND_CLOSEST_ULL(div, clk_rate);
-
-       div = (u64)pval * NSEC_PER_SEC * *dty;
-       state->duty_cycle = DIV_ROUND_CLOSEST_ULL(div, clk_rate);
-
        return 0;
 }
 
 static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                          struct pwm_state *state)
+                          const struct pwm_state *state)
 {
        struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
        struct pwm_state cstate;
index e24f4be..e2c21cc 100644 (file)
@@ -148,7 +148,7 @@ static int zx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int zx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                       struct pwm_state *state)
+                       const struct pwm_state *state)
 {
        struct zx_pwm_chip *zpc = to_zx_pwm_chip(chip);
        struct pwm_state cstate;
index afe9447..a46be22 100644 (file)
@@ -5053,6 +5053,19 @@ regulator_register(const struct regulator_desc *regulator_desc,
 
        init_data = regulator_of_get_init_data(dev, regulator_desc, config,
                                               &rdev->dev.of_node);
+
+       /*
+        * Sometimes not all resources are probed already so we need to take
+        * that into account. This happens most the time if the ena_gpiod comes
+        * from a gpio extender or something else.
+        */
+       if (PTR_ERR(init_data) == -EPROBE_DEFER) {
+               kfree(config);
+               kfree(rdev);
+               ret = -EPROBE_DEFER;
+               goto rinse;
+       }
+
        /*
         * We need to keep track of any GPIO descriptor coming from the
         * device tree until we have handled it over to the core. If the
index 56f3f72..710e670 100644 (file)
@@ -136,7 +136,6 @@ static int da9062_buck_set_mode(struct regulator_dev *rdev, unsigned mode)
 static unsigned da9062_buck_get_mode(struct regulator_dev *rdev)
 {
        struct da9062_regulator *regl = rdev_get_drvdata(rdev);
-       struct regmap_field *field;
        unsigned int val, mode = 0;
        int ret;
 
@@ -158,18 +157,7 @@ static unsigned da9062_buck_get_mode(struct regulator_dev *rdev)
                return REGULATOR_MODE_NORMAL;
        }
 
-       /* Detect current regulator state */
-       ret = regmap_field_read(regl->suspend, &val);
-       if (ret < 0)
-               return 0;
-
-       /* Read regulator mode from proper register, depending on state */
-       if (val)
-               field = regl->suspend_sleep;
-       else
-               field = regl->sleep;
-
-       ret = regmap_field_read(field, &val);
+       ret = regmap_field_read(regl->sleep, &val);
        if (ret < 0)
                return 0;
 
@@ -208,21 +196,9 @@ static int da9062_ldo_set_mode(struct regulator_dev *rdev, unsigned mode)
 static unsigned da9062_ldo_get_mode(struct regulator_dev *rdev)
 {
        struct da9062_regulator *regl = rdev_get_drvdata(rdev);
-       struct regmap_field *field;
        int ret, val;
 
-       /* Detect current regulator state */
-       ret = regmap_field_read(regl->suspend, &val);
-       if (ret < 0)
-               return 0;
-
-       /* Read regulator mode from proper register, depending on state */
-       if (val)
-               field = regl->suspend_sleep;
-       else
-               field = regl->sleep;
-
-       ret = regmap_field_read(field, &val);
+       ret = regmap_field_read(regl->sleep, &val);
        if (ret < 0)
                return 0;
 
@@ -408,10 +384,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
                        __builtin_ffs((int)DA9062AA_BUCK1_MODE_MASK) - 1,
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_BUCK1_MODE_MASK)) - 1),
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VBUCK1_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_BUCK1_CONT,
+                       __builtin_ffs((int)DA9062AA_BUCK1_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VBUCK1_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_BUCK1_CONF_MASK) - 1),
        },
        {
                .desc.id = DA9061_ID_BUCK2,
@@ -444,10 +420,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
                        __builtin_ffs((int)DA9062AA_BUCK3_MODE_MASK) - 1,
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_BUCK3_MODE_MASK)) - 1),
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VBUCK3_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_BUCK3_CONT,
+                       __builtin_ffs((int)DA9062AA_BUCK3_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VBUCK3_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_BUCK3_CONF_MASK) - 1),
        },
        {
                .desc.id = DA9061_ID_BUCK3,
@@ -480,10 +456,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
                        __builtin_ffs((int)DA9062AA_BUCK4_MODE_MASK) - 1,
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_BUCK4_MODE_MASK)) - 1),
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VBUCK4_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_BUCK4_CONT,
+                       __builtin_ffs((int)DA9062AA_BUCK4_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VBUCK4_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_BUCK4_CONF_MASK) - 1),
        },
        {
                .desc.id = DA9061_ID_LDO1,
@@ -509,10 +485,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_LDO1_SL_B_MASK)) - 1),
                .suspend_vsel_reg = DA9062AA_VLDO1_B,
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VLDO1_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_LDO1_CONT,
+                       __builtin_ffs((int)DA9062AA_LDO1_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VLDO1_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_LDO1_CONF_MASK) - 1),
                .oc_event = REG_FIELD(DA9062AA_STATUS_D,
                        __builtin_ffs((int)DA9062AA_LDO1_ILIM_MASK) - 1,
                        sizeof(unsigned int) * 8 -
@@ -542,10 +518,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_LDO2_SL_B_MASK)) - 1),
                .suspend_vsel_reg = DA9062AA_VLDO2_B,
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VLDO2_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_LDO2_CONT,
+                       __builtin_ffs((int)DA9062AA_LDO2_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VLDO2_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_LDO2_CONF_MASK) - 1),
                .oc_event = REG_FIELD(DA9062AA_STATUS_D,
                        __builtin_ffs((int)DA9062AA_LDO2_ILIM_MASK) - 1,
                        sizeof(unsigned int) * 8 -
@@ -575,10 +551,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_LDO3_SL_B_MASK)) - 1),
                .suspend_vsel_reg = DA9062AA_VLDO3_B,
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VLDO3_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_LDO3_CONT,
+                       __builtin_ffs((int)DA9062AA_LDO3_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VLDO3_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_LDO3_CONF_MASK) - 1),
                .oc_event = REG_FIELD(DA9062AA_STATUS_D,
                        __builtin_ffs((int)DA9062AA_LDO3_ILIM_MASK) - 1,
                        sizeof(unsigned int) * 8 -
@@ -608,10 +584,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_LDO4_SL_B_MASK)) - 1),
                .suspend_vsel_reg = DA9062AA_VLDO4_B,
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VLDO4_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_LDO4_CONT,
+                       __builtin_ffs((int)DA9062AA_LDO4_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VLDO4_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_LDO4_CONF_MASK) - 1),
                .oc_event = REG_FIELD(DA9062AA_STATUS_D,
                        __builtin_ffs((int)DA9062AA_LDO4_ILIM_MASK) - 1,
                        sizeof(unsigned int) * 8 -
@@ -652,10 +628,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
                        __builtin_ffs((int)DA9062AA_BUCK1_MODE_MASK) - 1,
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_BUCK1_MODE_MASK)) - 1),
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VBUCK1_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_BUCK1_CONT,
+                       __builtin_ffs((int)DA9062AA_BUCK1_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VBUCK1_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_BUCK1_CONF_MASK) - 1),
        },
        {
                .desc.id = DA9062_ID_BUCK2,
@@ -688,10 +664,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
                        __builtin_ffs((int)DA9062AA_BUCK2_MODE_MASK) - 1,
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_BUCK2_MODE_MASK)) - 1),
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VBUCK2_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_BUCK2_CONT,
+                       __builtin_ffs((int)DA9062AA_BUCK2_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VBUCK2_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_BUCK2_CONF_MASK) - 1),
        },
        {
                .desc.id = DA9062_ID_BUCK3,
@@ -724,10 +700,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
                        __builtin_ffs((int)DA9062AA_BUCK3_MODE_MASK) - 1,
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_BUCK3_MODE_MASK)) - 1),
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VBUCK3_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_BUCK3_CONT,
+                       __builtin_ffs((int)DA9062AA_BUCK3_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VBUCK3_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_BUCK3_CONF_MASK) - 1),
        },
        {
                .desc.id = DA9062_ID_BUCK4,
@@ -760,10 +736,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
                        __builtin_ffs((int)DA9062AA_BUCK4_MODE_MASK) - 1,
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_BUCK4_MODE_MASK)) - 1),
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VBUCK4_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_BUCK4_CONT,
+                       __builtin_ffs((int)DA9062AA_BUCK4_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VBUCK4_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_BUCK4_CONF_MASK) - 1),
        },
        {
                .desc.id = DA9062_ID_LDO1,
@@ -789,10 +765,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_LDO1_SL_B_MASK)) - 1),
                .suspend_vsel_reg = DA9062AA_VLDO1_B,
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VLDO1_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_LDO1_CONT,
+                       __builtin_ffs((int)DA9062AA_LDO1_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VLDO1_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_LDO1_CONF_MASK) - 1),
                .oc_event = REG_FIELD(DA9062AA_STATUS_D,
                        __builtin_ffs((int)DA9062AA_LDO1_ILIM_MASK) - 1,
                        sizeof(unsigned int) * 8 -
@@ -822,10 +798,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_LDO2_SL_B_MASK)) - 1),
                .suspend_vsel_reg = DA9062AA_VLDO2_B,
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VLDO2_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_LDO2_CONT,
+                       __builtin_ffs((int)DA9062AA_LDO2_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VLDO2_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_LDO2_CONF_MASK) - 1),
                .oc_event = REG_FIELD(DA9062AA_STATUS_D,
                        __builtin_ffs((int)DA9062AA_LDO2_ILIM_MASK) - 1,
                        sizeof(unsigned int) * 8 -
@@ -855,10 +831,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_LDO3_SL_B_MASK)) - 1),
                .suspend_vsel_reg = DA9062AA_VLDO3_B,
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VLDO3_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_LDO3_CONT,
+                       __builtin_ffs((int)DA9062AA_LDO3_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VLDO3_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_LDO3_CONF_MASK) - 1),
                .oc_event = REG_FIELD(DA9062AA_STATUS_D,
                        __builtin_ffs((int)DA9062AA_LDO3_ILIM_MASK) - 1,
                        sizeof(unsigned int) * 8 -
@@ -888,10 +864,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
                        sizeof(unsigned int) * 8 -
                        __builtin_clz((DA9062AA_LDO4_SL_B_MASK)) - 1),
                .suspend_vsel_reg = DA9062AA_VLDO4_B,
-               .suspend = REG_FIELD(DA9062AA_DVC_1,
-                       __builtin_ffs((int)DA9062AA_VLDO4_SEL_MASK) - 1,
+               .suspend = REG_FIELD(DA9062AA_LDO4_CONT,
+                       __builtin_ffs((int)DA9062AA_LDO4_CONF_MASK) - 1,
                        sizeof(unsigned int) * 8 -
-                       __builtin_clz((DA9062AA_VLDO4_SEL_MASK)) - 1),
+                       __builtin_clz(DA9062AA_LDO4_CONF_MASK) - 1),
                .oc_event = REG_FIELD(DA9062AA_STATUS_D,
                        __builtin_ffs((int)DA9062AA_LDO4_ILIM_MASK) - 1,
                        sizeof(unsigned int) * 8 -
index d90a6fd..f815330 100644 (file)
@@ -144,8 +144,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct fixed_voltage_config *config;
        struct fixed_voltage_data *drvdata;
-       const struct fixed_dev_type *drvtype =
-               of_match_device(dev->driver->of_match_table, dev)->data;
+       const struct fixed_dev_type *drvtype = of_device_get_match_data(dev);
        struct regulator_config cfg = { };
        enum gpiod_flags gflags;
        int ret;
@@ -177,7 +176,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
        drvdata->desc.type = REGULATOR_VOLTAGE;
        drvdata->desc.owner = THIS_MODULE;
 
-       if (drvtype->has_enable_clock) {
+       if (drvtype && drvtype->has_enable_clock) {
                drvdata->desc.ops = &fixed_voltage_clkenabled_ops;
 
                drvdata->enable_clock = devm_clk_get(dev, NULL);
index ff97cc5..9b05e03 100644 (file)
@@ -210,6 +210,7 @@ static const struct regulator_desc lochnagar_regulators[] = {
 
                .enable_time = 3000,
                .ramp_delay = 1000,
+               .off_on_delay = 15000,
 
                .owner = THIS_MODULE,
        },
index afefb29..87637eb 100644 (file)
@@ -231,12 +231,12 @@ static int of_get_regulation_constraints(struct device *dev,
                                        "regulator-off-in-suspend"))
                        suspend_state->enabled = DISABLE_IN_SUSPEND;
 
-               if (!of_property_read_u32(np, "regulator-suspend-min-microvolt",
-                                         &pval))
+               if (!of_property_read_u32(suspend_np,
+                               "regulator-suspend-min-microvolt", &pval))
                        suspend_state->min_uV = pval;
 
-               if (!of_property_read_u32(np, "regulator-suspend-max-microvolt",
-                                         &pval))
+               if (!of_property_read_u32(suspend_np,
+                               "regulator-suspend-max-microvolt", &pval))
                        suspend_state->max_uV = pval;
 
                if (!of_property_read_u32(suspend_np,
@@ -445,11 +445,20 @@ struct regulator_init_data *regulator_of_get_init_data(struct device *dev,
                goto error;
        }
 
-       if (desc->of_parse_cb && desc->of_parse_cb(child, desc, config)) {
-               dev_err(dev,
-                       "driver callback failed to parse DT for regulator %pOFn\n",
-                       child);
-               goto error;
+       if (desc->of_parse_cb) {
+               int ret;
+
+               ret = desc->of_parse_cb(child, desc, config);
+               if (ret) {
+                       if (ret == -EPROBE_DEFER) {
+                               of_node_put(child);
+                               return ERR_PTR(-EPROBE_DEFER);
+                       }
+                       dev_err(dev,
+                               "driver callback failed to parse DT for regulator %pOFn\n",
+                               child);
+                       goto error;
+               }
        }
 
        *node = child;
index df5df1c..6895379 100644 (file)
@@ -788,7 +788,13 @@ static int pfuze100_regulator_probe(struct i2c_client *client,
 
                /* SW2~SW4 high bit check and modify the voltage value table */
                if (i >= sw_check_start && i <= sw_check_end) {
-                       regmap_read(pfuze_chip->regmap, desc->vsel_reg, &val);
+                       ret = regmap_read(pfuze_chip->regmap,
+                                               desc->vsel_reg, &val);
+                       if (ret) {
+                               dev_err(&client->dev, "Fails to read from the register.\n");
+                               return ret;
+                       }
+
                        if (val & sw_hi) {
                                if (pfuze_chip->chip_id == PFUZE3000 ||
                                        pfuze_chip->chip_id == PFUZE3001) {
index db6c085..0246b6f 100644 (file)
@@ -735,8 +735,8 @@ static const struct rpmh_vreg_hw_data pmic5_hfsmps515 = {
 static const struct rpmh_vreg_hw_data pmic5_bob = {
        .regulator_type = VRM,
        .ops = &rpmh_regulator_vrm_bypass_ops,
-       .voltage_range = REGULATOR_LINEAR_RANGE(300000, 0, 135, 32000),
-       .n_voltages = 136,
+       .voltage_range = REGULATOR_LINEAR_RANGE(3000000, 0, 31, 32000),
+       .n_voltages = 32,
        .pmic_mode_map = pmic_mode_map_pmic5_bob,
        .of_map_mode = rpmh_regulator_pmic4_bob_of_map_mode,
 };
index cced1ff..89b9314 100644 (file)
@@ -173,19 +173,14 @@ static int ti_abb_wait_txdone(struct device *dev, struct ti_abb *abb)
        while (timeout++ <= abb->settling_time) {
                status = ti_abb_check_txdone(abb);
                if (status)
-                       break;
+                       return 0;
 
                udelay(1);
        }
 
-       if (timeout > abb->settling_time) {
-               dev_warn_ratelimited(dev,
-                                    "%s:TRANXDONE timeout(%duS) int=0x%08x\n",
-                                    __func__, timeout, readl(abb->int_base));
-               return -ETIMEDOUT;
-       }
-
-       return 0;
+       dev_warn_ratelimited(dev, "%s:TRANXDONE timeout(%duS) int=0x%08x\n",
+                            __func__, timeout, readl(abb->int_base));
+       return -ETIMEDOUT;
 }
 
 /**
@@ -205,19 +200,14 @@ static int ti_abb_clear_all_txdone(struct device *dev, const struct ti_abb *abb)
 
                status = ti_abb_check_txdone(abb);
                if (!status)
-                       break;
+                       return 0;
 
                udelay(1);
        }
 
-       if (timeout > abb->settling_time) {
-               dev_warn_ratelimited(dev,
-                                    "%s:TRANXDONE timeout(%duS) int=0x%08x\n",
-                                    __func__, timeout, readl(abb->int_base));
-               return -ETIMEDOUT;
-       }
-
-       return 0;
+       dev_warn_ratelimited(dev, "%s:TRANXDONE timeout(%duS) int=0x%08x\n",
+                            __func__, timeout, readl(abb->int_base));
+       return -ETIMEDOUT;
 }
 
 /**
index c6d3c84..b46df80 100644 (file)
@@ -102,6 +102,7 @@ static int scmi_reset_probe(struct scmi_device *sdev)
        data->rcdev.owner = THIS_MODULE;
        data->rcdev.of_node = np;
        data->rcdev.nr_resets = handle->reset_ops->num_domains_get(handle);
+       data->handle = handle;
 
        return devm_reset_controller_register(dev, &data->rcdev);
 }
index fc53e1e..c94184d 100644 (file)
@@ -1553,8 +1553,8 @@ static int dasd_eckd_read_vol_info(struct dasd_device *device)
        if (rc == 0) {
                memcpy(&private->vsq, vsq, sizeof(*vsq));
        } else {
-               dev_warn(&device->cdev->dev,
-                        "Reading the volume storage information failed with rc=%d\n", rc);
+               DBF_EVENT_DEVID(DBF_WARNING, device->cdev,
+                               "Reading the volume storage information failed with rc=%d", rc);
        }
 
        if (useglobal)
@@ -1737,8 +1737,8 @@ static int dasd_eckd_read_ext_pool_info(struct dasd_device *device)
        if (rc == 0) {
                dasd_eckd_cpy_ext_pool_data(device, lcq);
        } else {
-               dev_warn(&device->cdev->dev,
-                        "Reading the logical configuration failed with rc=%d\n", rc);
+               DBF_EVENT_DEVID(DBF_WARNING, device->cdev,
+                               "Reading the logical configuration failed with rc=%d", rc);
        }
 
        dasd_sfree_request(cqr, cqr->memdev);
@@ -2020,14 +2020,10 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
        dasd_eckd_read_features(device);
 
        /* Read Volume Information */
-       rc = dasd_eckd_read_vol_info(device);
-       if (rc)
-               goto out_err3;
+       dasd_eckd_read_vol_info(device);
 
        /* Read Extent Pool Information */
-       rc = dasd_eckd_read_ext_pool_info(device);
-       if (rc)
-               goto out_err3;
+       dasd_eckd_read_ext_pool_info(device);
 
        /* Read Device Characteristics */
        rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC,
@@ -2059,9 +2055,6 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
        if (readonly)
                set_bit(DASD_FLAG_DEVICE_RO, &device->flags);
 
-       if (dasd_eckd_is_ese(device))
-               dasd_set_feature(device->cdev, DASD_FEATURE_DISCARD, 1);
-
        dev_info(&device->cdev->dev, "New DASD %04X/%02X (CU %04X/%02X) "
                 "with %d cylinders, %d heads, %d sectors%s\n",
                 private->rdc_data.dev_type,
@@ -3695,14 +3688,6 @@ static int dasd_eckd_release_space(struct dasd_device *device,
                return -EINVAL;
 }
 
-static struct dasd_ccw_req *
-dasd_eckd_build_cp_discard(struct dasd_device *device, struct dasd_block *block,
-                          struct request *req, sector_t first_trk,
-                          sector_t last_trk)
-{
-       return dasd_eckd_dso_ras(device, block, req, first_trk, last_trk, 1);
-}
-
 static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single(
                                               struct dasd_device *startdev,
                                               struct dasd_block *block,
@@ -4447,10 +4432,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp(struct dasd_device *startdev,
        cmdwtd = private->features.feature[12] & 0x40;
        use_prefix = private->features.feature[8] & 0x01;
 
-       if (req_op(req) == REQ_OP_DISCARD)
-               return dasd_eckd_build_cp_discard(startdev, block, req,
-                                                 first_trk, last_trk);
-
        cqr = NULL;
        if (cdlspecial || dasd_page_cache) {
                /* do nothing, just fall through to the cmd mode single case */
@@ -4729,14 +4710,12 @@ static struct dasd_ccw_req *dasd_eckd_build_alias_cp(struct dasd_device *base,
                                                     struct dasd_block *block,
                                                     struct request *req)
 {
-       struct dasd_device *startdev = NULL;
        struct dasd_eckd_private *private;
-       struct dasd_ccw_req *cqr;
+       struct dasd_device *startdev;
        unsigned long flags;
+       struct dasd_ccw_req *cqr;
 
-       /* Discard requests can only be processed on base devices */
-       if (req_op(req) != REQ_OP_DISCARD)
-               startdev = dasd_alias_get_start_dev(base);
+       startdev = dasd_alias_get_start_dev(base);
        if (!startdev)
                startdev = base;
        private = startdev->private;
@@ -5663,14 +5642,10 @@ static int dasd_eckd_restore_device(struct dasd_device *device)
        dasd_eckd_read_features(device);
 
        /* Read Volume Information */
-       rc = dasd_eckd_read_vol_info(device);
-       if (rc)
-               goto out_err2;
+       dasd_eckd_read_vol_info(device);
 
        /* Read Extent Pool Information */
-       rc = dasd_eckd_read_ext_pool_info(device);
-       if (rc)
-               goto out_err2;
+       dasd_eckd_read_ext_pool_info(device);
 
        /* Read Device Characteristics */
        rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC,
@@ -6521,20 +6496,8 @@ static void dasd_eckd_setup_blk_queue(struct dasd_block *block)
        unsigned int logical_block_size = block->bp_block;
        struct request_queue *q = block->request_queue;
        struct dasd_device *device = block->base;
-       struct dasd_eckd_private *private;
-       unsigned int max_discard_sectors;
-       unsigned int max_bytes;
-       unsigned int ext_bytes; /* Extent Size in Bytes */
-       int recs_per_trk;
-       int trks_per_cyl;
-       int ext_limit;
-       int ext_size; /* Extent Size in Cylinders */
        int max;
 
-       private = device->private;
-       trks_per_cyl = private->rdc_data.trk_per_cyl;
-       recs_per_trk = recs_per_track(&private->rdc_data, 0, logical_block_size);
-
        if (device->features & DASD_FEATURE_USERAW) {
                /*
                 * the max_blocks value for raw_track access is 256
@@ -6555,28 +6518,6 @@ static void dasd_eckd_setup_blk_queue(struct dasd_block *block)
        /* With page sized segments each segment can be translated into one idaw/tidaw */
        blk_queue_max_segment_size(q, PAGE_SIZE);
        blk_queue_segment_boundary(q, PAGE_SIZE - 1);
-
-       if (dasd_eckd_is_ese(device)) {
-               /*
-                * Depending on the extent size, up to UINT_MAX bytes can be
-                * accepted. However, neither DASD_ECKD_RAS_EXTS_MAX nor the
-                * device limits should be exceeded.
-                */
-               ext_size = dasd_eckd_ext_size(device);
-               ext_limit = min(private->real_cyl / ext_size, DASD_ECKD_RAS_EXTS_MAX);
-               ext_bytes = ext_size * trks_per_cyl * recs_per_trk *
-                       logical_block_size;
-               max_bytes = UINT_MAX - (UINT_MAX % ext_bytes);
-               if (max_bytes / ext_bytes > ext_limit)
-                       max_bytes = ext_bytes * ext_limit;
-
-               max_discard_sectors = max_bytes / 512;
-
-               blk_queue_max_discard_sectors(q, max_discard_sectors);
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
-               q->limits.discard_granularity = ext_bytes;
-               q->limits.discard_alignment = ext_bytes;
-       }
 }
 
 static struct ccw_driver dasd_eckd_driver = {
index 0005ec9..b42a937 100644 (file)
@@ -372,7 +372,7 @@ int ccwgroup_create_dev(struct device *parent, struct ccwgroup_driver *gdrv,
                goto error;
        }
        /* Check for trailing stuff. */
-       if (i == num_devices && strlen(buf) > 0) {
+       if (i == num_devices && buf && strlen(buf) > 0) {
                rc = -EINVAL;
                goto error;
        }
index ba7d248..dcdaba6 100644 (file)
@@ -113,6 +113,7 @@ struct subchannel {
        enum sch_todo todo;
        struct work_struct todo_work;
        struct schib_config config;
+       u64 dma_mask;
        char *driver_override; /* Driver name to force a match */
 } __attribute__ ((aligned(8)));
 
index 22c5581..8318504 100644 (file)
@@ -232,7 +232,12 @@ struct subchannel *css_alloc_subchannel(struct subchannel_id schid,
         * belong to a subchannel need to fit 31 bit width (e.g. ccw).
         */
        sch->dev.coherent_dma_mask = DMA_BIT_MASK(31);
-       sch->dev.dma_mask = &sch->dev.coherent_dma_mask;
+       /*
+        * But we don't have such restrictions imposed on the stuff that
+        * is handled by the streaming API.
+        */
+       sch->dma_mask = DMA_BIT_MASK(64);
+       sch->dev.dma_mask = &sch->dma_mask;
        return sch;
 
 err:
@@ -1388,6 +1393,8 @@ device_initcall(cio_settle_init);
 
 int sch_is_pseudo_sch(struct subchannel *sch)
 {
+       if (!sch->dev.parent)
+               return 0;
        return sch == to_css(sch->dev.parent)->pseudo_subchannel;
 }
 
index 131430b..0c6245f 100644 (file)
@@ -710,7 +710,7 @@ static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch)
        if (!cdev->private)
                goto err_priv;
        cdev->dev.coherent_dma_mask = sch->dev.coherent_dma_mask;
-       cdev->dev.dma_mask = &cdev->dev.coherent_dma_mask;
+       cdev->dev.dma_mask = sch->dev.dma_mask;
        dma_pool = cio_gp_dma_create(&cdev->dev, 1);
        if (!dma_pool)
                goto err_dma_pool;
index d722458..65841af 100644 (file)
@@ -124,9 +124,7 @@ EXPORT_SYMBOL(ccw_device_is_multipath);
 /**
  * ccw_device_clear() - terminate I/O request processing
  * @cdev: target ccw device
- * @intparm: interruption parameter; value is only used if no I/O is
- *          outstanding, otherwise the intparm associated with the I/O request
- *          is returned
+ * @intparm: interruption parameter to be returned upon conclusion of csch
  *
  * ccw_device_clear() calls csch on @cdev's subchannel.
  * Returns:
@@ -179,6 +177,9 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm)
  * completed during the time specified by @expires. If a timeout occurs, the
  * channel program is terminated via xsch, hsch or csch, and the device's
  * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT).
+ * The interruption handler will echo back the @intparm specified here, unless
+ * another interruption parameter is specified by a subsequent invocation of
+ * ccw_device_halt() or ccw_device_clear().
  * Returns:
  *  %0, if the operation was successful;
  *  -%EBUSY, if the device is busy, or status pending;
@@ -256,6 +257,9 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
  * Start a S/390 channel program. When the interrupt arrives, the
  * IRQ handler is called, either immediately, delayed (dev-end missing,
  * or sense required) or never (no IRQ handler registered).
+ * The interruption handler will echo back the @intparm specified here, unless
+ * another interruption parameter is specified by a subsequent invocation of
+ * ccw_device_halt() or ccw_device_clear().
  * Returns:
  *  %0, if the operation was successful;
  *  -%EBUSY, if the device is busy, or status pending;
@@ -287,6 +291,9 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
  * Start a S/390 channel program. When the interrupt arrives, the
  * IRQ handler is called, either immediately, delayed (dev-end missing,
  * or sense required) or never (no IRQ handler registered).
+ * The interruption handler will echo back the @intparm specified here, unless
+ * another interruption parameter is specified by a subsequent invocation of
+ * ccw_device_halt() or ccw_device_clear().
  * Returns:
  *  %0, if the operation was successful;
  *  -%EBUSY, if the device is busy, or status pending;
@@ -322,6 +329,9 @@ int ccw_device_start(struct ccw_device *cdev, struct ccw1 *cpa,
  * completed during the time specified by @expires. If a timeout occurs, the
  * channel program is terminated via xsch, hsch or csch, and the device's
  * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT).
+ * The interruption handler will echo back the @intparm specified here, unless
+ * another interruption parameter is specified by a subsequent invocation of
+ * ccw_device_halt() or ccw_device_clear().
  * Returns:
  *  %0, if the operation was successful;
  *  -%EBUSY, if the device is busy, or status pending;
@@ -343,11 +353,12 @@ int ccw_device_start_timeout(struct ccw_device *cdev, struct ccw1 *cpa,
 /**
  * ccw_device_halt() - halt I/O request processing
  * @cdev: target ccw device
- * @intparm: interruption parameter; value is only used if no I/O is
- *          outstanding, otherwise the intparm associated with the I/O request
- *          is returned
+ * @intparm: interruption parameter to be returned upon conclusion of hsch
  *
  * ccw_device_halt() calls hsch on @cdev's subchannel.
+ * The interruption handler will echo back the @intparm specified here, unless
+ * another interruption parameter is specified by a subsequent invocation of
+ * ccw_device_clear().
  * Returns:
  *  %0 on success,
  *  -%ENODEV on device not operational,
index f4ca1d2..cd16488 100644 (file)
@@ -113,7 +113,7 @@ static void set_impl_params(struct qdio_irq *irq_ptr,
        irq_ptr->qib.pfmt = qib_param_field_format;
        if (qib_param_field)
                memcpy(irq_ptr->qib.parm, qib_param_field,
-                      QDIO_MAX_BUFFERS_PER_Q);
+                      sizeof(irq_ptr->qib.parm));
 
        if (!input_slib_elements)
                goto output;
index a76b8a8..a191506 100644 (file)
@@ -1322,24 +1322,24 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func)
        /* < CEX2A is not supported */
        if (rawtype < AP_DEVICE_TYPE_CEX2A)
                return 0;
-       /* up to CEX6 known and fully supported */
-       if (rawtype <= AP_DEVICE_TYPE_CEX6)
+       /* up to CEX7 known and fully supported */
+       if (rawtype <= AP_DEVICE_TYPE_CEX7)
                return rawtype;
        /*
-        * unknown new type > CEX6, check for compatibility
+        * unknown new type > CEX7, check for compatibility
         * to the highest known and supported type which is
-        * currently CEX6 with the help of the QACT function.
+        * currently CEX7 with the help of the QACT function.
         */
        if (ap_qact_available()) {
                struct ap_queue_status status;
                union ap_qact_ap_info apinfo = {0};
 
                apinfo.mode = (func >> 26) & 0x07;
-               apinfo.cat = AP_DEVICE_TYPE_CEX6;
+               apinfo.cat = AP_DEVICE_TYPE_CEX7;
                status = ap_qact(qid, 0, &apinfo);
                if (status.response_code == AP_RESPONSE_NORMAL
                    && apinfo.cat >= AP_DEVICE_TYPE_CEX2A
-                   && apinfo.cat <= AP_DEVICE_TYPE_CEX6)
+                   && apinfo.cat <= AP_DEVICE_TYPE_CEX7)
                        comp_type = apinfo.cat;
        }
        if (!comp_type)
index 6f3cf37..433b7b6 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0+ */
 /*
- * Copyright IBM Corp. 2006, 2012
+ * Copyright IBM Corp. 2006, 2019
  * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
  *           Martin Schwidefsky <schwidefsky@de.ibm.com>
  *           Ralph Wuerthner <rwuerthn@de.ibm.com>
@@ -63,6 +63,7 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
 #define AP_DEVICE_TYPE_CEX4    10
 #define AP_DEVICE_TYPE_CEX5    11
 #define AP_DEVICE_TYPE_CEX6    12
+#define AP_DEVICE_TYPE_CEX7    13
 
 /*
  * Known function facilities
index f76a1d0..9de3d46 100644 (file)
@@ -1363,9 +1363,122 @@ static struct attribute_group ccadata_attr_group = {
        .bin_attrs = ccadata_attrs,
 };
 
+#define CCACIPHERTOKENSIZE     (sizeof(struct cipherkeytoken) + 80)
+
+/*
+ * Sysfs attribute read function for all secure key ccacipher binary attributes.
+ * The implementation can not deal with partial reads, because a new random
+ * secure key blob is generated with each read. In case of partial reads
+ * (i.e. off != 0 or count < key blob size) -EINVAL is returned.
+ */
+static ssize_t pkey_ccacipher_aes_attr_read(enum pkey_key_size keybits,
+                                           bool is_xts, char *buf, loff_t off,
+                                           size_t count)
+{
+       size_t keysize;
+       int rc;
+
+       if (off != 0 || count < CCACIPHERTOKENSIZE)
+               return -EINVAL;
+       if (is_xts)
+               if (count < 2 * CCACIPHERTOKENSIZE)
+                       return -EINVAL;
+
+       keysize = CCACIPHERTOKENSIZE;
+       rc = cca_gencipherkey(-1, -1, keybits, 0, buf, &keysize);
+       if (rc)
+               return rc;
+       memset(buf + keysize, 0, CCACIPHERTOKENSIZE - keysize);
+
+       if (is_xts) {
+               keysize = CCACIPHERTOKENSIZE;
+               rc = cca_gencipherkey(-1, -1, keybits, 0,
+                                     buf + CCACIPHERTOKENSIZE, &keysize);
+               if (rc)
+                       return rc;
+               memset(buf + CCACIPHERTOKENSIZE + keysize, 0,
+                      CCACIPHERTOKENSIZE - keysize);
+
+               return 2 * CCACIPHERTOKENSIZE;
+       }
+
+       return CCACIPHERTOKENSIZE;
+}
+
+static ssize_t ccacipher_aes_128_read(struct file *filp,
+                                     struct kobject *kobj,
+                                     struct bin_attribute *attr,
+                                     char *buf, loff_t off,
+                                     size_t count)
+{
+       return pkey_ccacipher_aes_attr_read(PKEY_SIZE_AES_128, false, buf,
+                                           off, count);
+}
+
+static ssize_t ccacipher_aes_192_read(struct file *filp,
+                                     struct kobject *kobj,
+                                     struct bin_attribute *attr,
+                                     char *buf, loff_t off,
+                                     size_t count)
+{
+       return pkey_ccacipher_aes_attr_read(PKEY_SIZE_AES_192, false, buf,
+                                           off, count);
+}
+
+static ssize_t ccacipher_aes_256_read(struct file *filp,
+                                     struct kobject *kobj,
+                                     struct bin_attribute *attr,
+                                     char *buf, loff_t off,
+                                     size_t count)
+{
+       return pkey_ccacipher_aes_attr_read(PKEY_SIZE_AES_256, false, buf,
+                                           off, count);
+}
+
+static ssize_t ccacipher_aes_128_xts_read(struct file *filp,
+                                         struct kobject *kobj,
+                                         struct bin_attribute *attr,
+                                         char *buf, loff_t off,
+                                         size_t count)
+{
+       return pkey_ccacipher_aes_attr_read(PKEY_SIZE_AES_128, true, buf,
+                                           off, count);
+}
+
+static ssize_t ccacipher_aes_256_xts_read(struct file *filp,
+                                         struct kobject *kobj,
+                                         struct bin_attribute *attr,
+                                         char *buf, loff_t off,
+                                         size_t count)
+{
+       return pkey_ccacipher_aes_attr_read(PKEY_SIZE_AES_256, true, buf,
+                                           off, count);
+}
+
+static BIN_ATTR_RO(ccacipher_aes_128, CCACIPHERTOKENSIZE);
+static BIN_ATTR_RO(ccacipher_aes_192, CCACIPHERTOKENSIZE);
+static BIN_ATTR_RO(ccacipher_aes_256, CCACIPHERTOKENSIZE);
+static BIN_ATTR_RO(ccacipher_aes_128_xts, 2 * CCACIPHERTOKENSIZE);
+static BIN_ATTR_RO(ccacipher_aes_256_xts, 2 * CCACIPHERTOKENSIZE);
+
+static struct bin_attribute *ccacipher_attrs[] = {
+       &bin_attr_ccacipher_aes_128,
+       &bin_attr_ccacipher_aes_192,
+       &bin_attr_ccacipher_aes_256,
+       &bin_attr_ccacipher_aes_128_xts,
+       &bin_attr_ccacipher_aes_256_xts,
+       NULL
+};
+
+static struct attribute_group ccacipher_attr_group = {
+       .name      = "ccacipher",
+       .bin_attrs = ccacipher_attrs,
+};
+
 static const struct attribute_group *pkey_attr_groups[] = {
        &protkey_attr_group,
        &ccadata_attr_group,
+       &ccacipher_attr_group,
        NULL,
 };
 
index 003662a..be2520c 100644 (file)
@@ -36,6 +36,8 @@ static struct ap_device_id ap_queue_ids[] = {
          .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
        { .dev_type = AP_DEVICE_TYPE_CEX6,
          .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+       { .dev_type = AP_DEVICE_TYPE_CEX7,
+         .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
        { /* end of sibling */ },
 };
 
index 45bdb47..9157e72 100644 (file)
@@ -522,8 +522,7 @@ static int zcrypt_release(struct inode *inode, struct file *filp)
        if (filp->f_inode->i_cdev == &zcrypt_cdev) {
                struct zcdn_device *zcdndev;
 
-               if (mutex_lock_interruptible(&ap_perms_mutex))
-                       return -ERESTARTSYS;
+               mutex_lock(&ap_perms_mutex);
                zcdndev = find_zcdndev_by_devt(filp->f_inode->i_rdev);
                mutex_unlock(&ap_perms_mutex);
                if (zcdndev) {
index 2d3f273..d464618 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0+ */
 /*
- *  Copyright IBM Corp. 2001, 2018
+ *  Copyright IBM Corp. 2001, 2019
  *  Author(s): Robert Burroughs
  *            Eric Rossman (edrossma@us.ibm.com)
  *            Cornelia Huck <cornelia.huck@de.ibm.com>
@@ -29,6 +29,7 @@
 #define ZCRYPT_CEX4           10
 #define ZCRYPT_CEX5           11
 #define ZCRYPT_CEX6           12
+#define ZCRYPT_CEX7           13
 
 /**
  * Large random numbers are pulled in 4096 byte chunks from the crypto cards
index f58d8de..442e3d6 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- *  Copyright IBM Corp. 2012
+ *  Copyright IBM Corp. 2012, 2019
  *  Author(s): Holger Dengler <hd@linux.vnet.ibm.com>
  */
 
@@ -38,8 +38,8 @@
 #define CEX4_CLEANUP_TIME      (900*HZ)
 
 MODULE_AUTHOR("IBM Corporation");
-MODULE_DESCRIPTION("CEX4/CEX5/CEX6 Cryptographic Card device driver, " \
-                  "Copyright IBM Corp. 2018");
+MODULE_DESCRIPTION("CEX4/CEX5/CEX6/CEX7 Cryptographic Card device driver, " \
+                  "Copyright IBM Corp. 2019");
 MODULE_LICENSE("GPL");
 
 static struct ap_device_id zcrypt_cex4_card_ids[] = {
@@ -49,6 +49,8 @@ static struct ap_device_id zcrypt_cex4_card_ids[] = {
          .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
        { .dev_type = AP_DEVICE_TYPE_CEX6,
          .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
+       { .dev_type = AP_DEVICE_TYPE_CEX7,
+         .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
        { /* end of list */ },
 };
 
@@ -61,6 +63,8 @@ static struct ap_device_id zcrypt_cex4_queue_ids[] = {
          .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
        { .dev_type = AP_DEVICE_TYPE_CEX6,
          .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+       { .dev_type = AP_DEVICE_TYPE_CEX7,
+         .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
        { /* end of list */ },
 };
 
@@ -146,7 +150,7 @@ static const struct attribute_group cca_queue_attr_group = {
 };
 
 /**
- * Probe function for CEX4/CEX5/CEX6 card device. It always
+ * Probe function for CEX4/CEX5/CEX6/CEX7 card device. It always
  * accepts the AP device since the bus_match already checked
  * the hardware type.
  * @ap_dev: pointer to the AP device.
@@ -158,25 +162,31 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
         * MEX_1k, MEX_2k, MEX_4k, CRT_1k, CRT_2k, CRT_4k, RNG, SECKEY
         */
        static const int CEX4A_SPEED_IDX[] = {
-                14, 19, 249, 42, 228, 1458, 0, 0};
+                14,  19, 249, 42, 228, 1458, 0, 0};
        static const int CEX5A_SPEED_IDX[] = {
-                 8,  9,  20, 18,  66,  458, 0, 0};
+                 8,   9,  20, 18,  66,  458, 0, 0};
        static const int CEX6A_SPEED_IDX[] = {
-                 6,  9,  20, 17,  65,  438, 0, 0};
+                 6,   9,  20, 17,  65,  438, 0, 0};
+       static const int CEX7A_SPEED_IDX[] = {
+                 6,   8,  17, 15,  54,  362, 0, 0};
 
        static const int CEX4C_SPEED_IDX[] = {
                 59,  69, 308, 83, 278, 2204, 209, 40};
        static const int CEX5C_SPEED_IDX[] = {
-                24,  31,  50, 37,  90,  479,  27, 10};
+                24,  31,  50, 37,  90,  479,  27, 10};
        static const int CEX6C_SPEED_IDX[] = {
-                16,  20,  32, 27,  77,  455,  23,  9};
+                16,  20,  32, 27,  77,  455,  24,  9};
+       static const int CEX7C_SPEED_IDX[] = {
+                14,  16,  26, 23,  64,  376,  23,  8};
 
        static const int CEX4P_SPEED_IDX[] = {
-               224, 313, 3560, 359, 605, 2827, 0, 50};
+                 0,   0,   0,   0,   0,   0,   0,  50};
        static const int CEX5P_SPEED_IDX[] = {
-                63,  84,  156,  83, 142,  533, 0, 10};
+                 0,   0,   0,   0,   0,   0,   0,  10};
        static const int CEX6P_SPEED_IDX[] = {
-                55,  70,  121,  73, 129,  522, 0,  9};
+                 0,   0,   0,   0,   0,   0,   0,   9};
+       static const int CEX7P_SPEED_IDX[] = {
+                 0,   0,   0,   0,   0,   0,   0,   8};
 
        struct ap_card *ac = to_ap_card(&ap_dev->device);
        struct zcrypt_card *zc;
@@ -198,11 +208,19 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
                        zc->user_space_type = ZCRYPT_CEX5;
                        memcpy(zc->speed_rating, CEX5A_SPEED_IDX,
                               sizeof(CEX5A_SPEED_IDX));
-               } else {
+               } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) {
                        zc->type_string = "CEX6A";
                        zc->user_space_type = ZCRYPT_CEX6;
                        memcpy(zc->speed_rating, CEX6A_SPEED_IDX,
                               sizeof(CEX6A_SPEED_IDX));
+               } else {
+                       zc->type_string = "CEX7A";
+                       /* wrong user space type, just for compatibility
+                        * with the ZCRYPT_STATUS_MASK ioctl.
+                        */
+                       zc->user_space_type = ZCRYPT_CEX6;
+                       memcpy(zc->speed_rating, CEX7A_SPEED_IDX,
+                              sizeof(CEX7A_SPEED_IDX));
                }
                zc->min_mod_size = CEX4A_MIN_MOD_SIZE;
                if (ap_test_bit(&ac->functions, AP_FUNC_MEX4K) &&
@@ -232,7 +250,7 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
                        zc->user_space_type = ZCRYPT_CEX3C;
                        memcpy(zc->speed_rating, CEX5C_SPEED_IDX,
                               sizeof(CEX5C_SPEED_IDX));
-               } else {
+               } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) {
                        zc->type_string = "CEX6C";
                        /* wrong user space type, must be CEX6
                         * just keep it for cca compatibility
@@ -240,6 +258,14 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
                        zc->user_space_type = ZCRYPT_CEX3C;
                        memcpy(zc->speed_rating, CEX6C_SPEED_IDX,
                               sizeof(CEX6C_SPEED_IDX));
+               } else {
+                       zc->type_string = "CEX7C";
+                       /* wrong user space type, must be CEX7
+                        * just keep it for cca compatibility
+                        */
+                       zc->user_space_type = ZCRYPT_CEX3C;
+                       memcpy(zc->speed_rating, CEX7C_SPEED_IDX,
+                              sizeof(CEX7C_SPEED_IDX));
                }
                zc->min_mod_size = CEX4C_MIN_MOD_SIZE;
                zc->max_mod_size = CEX4C_MAX_MOD_SIZE;
@@ -255,11 +281,19 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
                        zc->user_space_type = ZCRYPT_CEX5;
                        memcpy(zc->speed_rating, CEX5P_SPEED_IDX,
                               sizeof(CEX5P_SPEED_IDX));
-               } else {
+               } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) {
                        zc->type_string = "CEX6P";
                        zc->user_space_type = ZCRYPT_CEX6;
                        memcpy(zc->speed_rating, CEX6P_SPEED_IDX,
                               sizeof(CEX6P_SPEED_IDX));
+               } else {
+                       zc->type_string = "CEX7P";
+                       /* wrong user space type, just for compatibility
+                        * with the ZCRYPT_STATUS_MASK ioctl.
+                        */
+                       zc->user_space_type = ZCRYPT_CEX6;
+                       memcpy(zc->speed_rating, CEX7P_SPEED_IDX,
+                              sizeof(CEX7P_SPEED_IDX));
                }
                zc->min_mod_size = CEX4C_MIN_MOD_SIZE;
                zc->max_mod_size = CEX4C_MAX_MOD_SIZE;
@@ -289,8 +323,8 @@ out:
 }
 
 /**
- * This is called to remove the CEX4/CEX5/CEX6 card driver information
- * if an AP card device is removed.
+ * This is called to remove the CEX4/CEX5/CEX6/CEX7 card driver
+ * information if an AP card device is removed.
  */
 static void zcrypt_cex4_card_remove(struct ap_device *ap_dev)
 {
@@ -311,7 +345,7 @@ static struct ap_driver zcrypt_cex4_card_driver = {
 };
 
 /**
- * Probe function for CEX4/CEX5/CEX6 queue device. It always
+ * Probe function for CEX4/CEX5/CEX6/CEX7 queue device. It always
  * accepts the AP device since the bus_match already checked
  * the hardware type.
  * @ap_dev: pointer to the AP device.
@@ -369,7 +403,7 @@ out:
 }
 
 /**
- * This is called to remove the CEX4/CEX5/CEX6 queue driver
+ * This is called to remove the CEX4/CEX5/CEX6/CEX7 queue driver
  * information if an AP queue device is removed.
  */
 static void zcrypt_cex4_queue_remove(struct ap_device *ap_dev)
index a7868c8..dda2743 100644 (file)
@@ -4715,8 +4715,7 @@ static int qeth_qdio_establish(struct qeth_card *card)
 
        QETH_CARD_TEXT(card, 2, "qdioest");
 
-       qib_param_field = kzalloc(QDIO_MAX_BUFFERS_PER_Q,
-                                 GFP_KERNEL);
+       qib_param_field = kzalloc(FIELD_SIZEOF(struct qib, parm), GFP_KERNEL);
        if (!qib_param_field) {
                rc =  -ENOMEM;
                goto out_free_nothing;
index b8799cd..bd8143e 100644 (file)
@@ -2021,10 +2021,10 @@ static bool qeth_l2_vnicc_recover_char(struct qeth_card *card, u32 vnicc,
 static void qeth_l2_vnicc_init(struct qeth_card *card)
 {
        u32 *timeout = &card->options.vnicc.learning_timeout;
+       bool enable, error = false;
        unsigned int chars_len, i;
        unsigned long chars_tmp;
        u32 sup_cmds, vnicc;
-       bool enable, error;
 
        QETH_CARD_TEXT(card, 2, "vniccini");
        /* reset rx_bcast */
@@ -2045,17 +2045,24 @@ static void qeth_l2_vnicc_init(struct qeth_card *card)
        chars_len = sizeof(card->options.vnicc.sup_chars) * BITS_PER_BYTE;
        for_each_set_bit(i, &chars_tmp, chars_len) {
                vnicc = BIT(i);
-               qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds);
-               if (!(sup_cmds & IPA_VNICC_SET_TIMEOUT) ||
-                   !(sup_cmds & IPA_VNICC_GET_TIMEOUT))
+               if (qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds)) {
+                       sup_cmds = 0;
+                       error = true;
+               }
+               if ((sup_cmds & IPA_VNICC_SET_TIMEOUT) &&
+                   (sup_cmds & IPA_VNICC_GET_TIMEOUT))
+                       card->options.vnicc.getset_timeout_sup |= vnicc;
+               else
                        card->options.vnicc.getset_timeout_sup &= ~vnicc;
-               if (!(sup_cmds & IPA_VNICC_ENABLE) ||
-                   !(sup_cmds & IPA_VNICC_DISABLE))
+               if ((sup_cmds & IPA_VNICC_ENABLE) &&
+                   (sup_cmds & IPA_VNICC_DISABLE))
+                       card->options.vnicc.set_char_sup |= vnicc;
+               else
                        card->options.vnicc.set_char_sup &= ~vnicc;
        }
        /* enforce assumed default values and recover settings, if changed  */
-       error = qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING,
-                                             timeout);
+       error |= qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING,
+                                              timeout);
        chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT;
        chars_tmp |= QETH_VNICC_BRIDGE_INVISIBLE;
        chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE;
index 296bbc3..cf63916 100644 (file)
 
 struct kmem_cache *zfcp_fsf_qtcb_cache;
 
+static bool ber_stop = true;
+module_param(ber_stop, bool, 0600);
+MODULE_PARM_DESC(ber_stop,
+                "Shuts down FCP devices for FCP channels that report a bit-error count in excess of its threshold (default on)");
+
 static void zfcp_fsf_request_timeout_handler(struct timer_list *t)
 {
        struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer);
@@ -236,10 +241,15 @@ static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req)
        case FSF_STATUS_READ_SENSE_DATA_AVAIL:
                break;
        case FSF_STATUS_READ_BIT_ERROR_THRESHOLD:
-               dev_warn(&adapter->ccw_device->dev,
-                        "The error threshold for checksum statistics "
-                        "has been exceeded\n");
                zfcp_dbf_hba_bit_err("fssrh_3", req);
+               if (ber_stop) {
+                       dev_warn(&adapter->ccw_device->dev,
+                                "All paths over this FCP device are disused because of excessive bit errors\n");
+                       zfcp_erp_adapter_shutdown(adapter, 0, "fssrh_b");
+               } else {
+                       dev_warn(&adapter->ccw_device->dev,
+                                "The error threshold for checksum statistics has been exceeded\n");
+               }
                break;
        case FSF_STATUS_READ_LINK_DOWN:
                zfcp_fsf_status_read_link_down(req);
index 1b92f3c..90cf469 100644 (file)
@@ -898,7 +898,7 @@ config SCSI_SNI_53C710
 
 config 53C700_LE_ON_BE
        bool
-       depends on SCSI_LASI700
+       depends on SCSI_LASI700 || SCSI_SNI_53C710
        default y
 
 config SCSI_STEX
index 644f7f5..4a85878 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/moduleparam.h>
 #include <linux/pci.h>
 #include <linux/aer.h>
-#include <linux/pci-aspm.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
index da00ca5..401743e 100644 (file)
@@ -1923,6 +1923,7 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req,
        struct fcoe_fcp_rsp_payload *fcp_rsp;
        struct bnx2fc_rport *tgt = io_req->tgt;
        struct scsi_cmnd *sc_cmd;
+       u16 scope = 0, qualifier = 0;
 
        /* scsi_cmd_cmpl is called with tgt lock held */
 
@@ -1990,12 +1991,30 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req,
 
                        if (io_req->cdb_status == SAM_STAT_TASK_SET_FULL ||
                            io_req->cdb_status == SAM_STAT_BUSY) {
-                               /* Set the jiffies + retry_delay_timer * 100ms
-                                  for the rport/tgt */
-                               tgt->retry_delay_timestamp = jiffies +
-                                       fcp_rsp->retry_delay_timer * HZ / 10;
+                               /* Newer array firmware with BUSY or
+                                * TASK_SET_FULL may return a status that needs
+                                * the scope bits masked.
+                                * Or a huge delay timestamp up to 27 minutes
+                                * can result.
+                                */
+                               if (fcp_rsp->retry_delay_timer) {
+                                       /* Upper 2 bits */
+                                       scope = fcp_rsp->retry_delay_timer
+                                               & 0xC000;
+                                       /* Lower 14 bits */
+                                       qualifier = fcp_rsp->retry_delay_timer
+                                               & 0x3FFF;
+                               }
+                               if (scope > 0 && qualifier > 0 &&
+                                       qualifier <= 0x3FEF) {
+                                       /* Set the jiffies +
+                                        * retry_delay_timer * 100ms
+                                        * for the rport/tgt
+                                        */
+                                       tgt->retry_delay_timestamp = jiffies +
+                                               (qualifier * HZ / 10);
+                               }
                        }
-
                }
                if (io_req->fcp_resid)
                        scsi_set_resid(sc_cmd, io_req->fcp_resid);
index 5f8153c..76751d6 100644 (file)
@@ -579,7 +579,6 @@ ch_release(struct inode *inode, struct file *file)
        scsi_changer *ch = file->private_data;
 
        scsi_device_put(ch->device);
-       ch->device = NULL;
        file->private_data = NULL;
        kref_put(&ch->ref, ch_destroy);
        return 0;
index 4971104..f32da0c 100644 (file)
@@ -512,6 +512,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
        unsigned int tpg_desc_tbl_off;
        unsigned char orig_transition_tmo;
        unsigned long flags;
+       bool transitioning_sense = false;
 
        if (!pg->expiry) {
                unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
@@ -572,13 +573,19 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
                        goto retry;
                }
                /*
-                * Retry on ALUA state transition or if any
-                * UNIT ATTENTION occurred.
+                * If the array returns with 'ALUA state transition'
+                * sense code here it cannot return RTPG data during
+                * transition. So set the state to 'transitioning' directly.
                 */
                if (sense_hdr.sense_key == NOT_READY &&
-                   sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
-                       err = SCSI_DH_RETRY;
-               else if (sense_hdr.sense_key == UNIT_ATTENTION)
+                   sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) {
+                       transitioning_sense = true;
+                       goto skip_rtpg;
+               }
+               /*
+                * Retry on any other UNIT ATTENTION occurred.
+                */
+               if (sense_hdr.sense_key == UNIT_ATTENTION)
                        err = SCSI_DH_RETRY;
                if (err == SCSI_DH_RETRY &&
                    pg->expiry != 0 && time_before(jiffies, pg->expiry)) {
@@ -666,7 +673,11 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
                off = 8 + (desc[7] * 4);
        }
 
+ skip_rtpg:
        spin_lock_irqsave(&pg->lock, flags);
+       if (transitioning_sense)
+               pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
+
        sdev_printk(KERN_INFO, sdev,
                    "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
                    ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state),
index d1513fd..0847e68 100644 (file)
@@ -3683,7 +3683,7 @@ void hisi_sas_debugfs_work_handler(struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(hisi_sas_debugfs_work_handler);
 
-void hisi_sas_debugfs_release(struct hisi_hba *hisi_hba)
+static void hisi_sas_debugfs_release(struct hisi_hba *hisi_hba)
 {
        struct device *dev = hisi_hba->dev;
        int i;
@@ -3705,7 +3705,7 @@ void hisi_sas_debugfs_release(struct hisi_hba *hisi_hba)
                devm_kfree(dev, hisi_hba->debugfs_port_reg[i]);
 }
 
-int hisi_sas_debugfs_alloc(struct hisi_hba *hisi_hba)
+static int hisi_sas_debugfs_alloc(struct hisi_hba *hisi_hba)
 {
        const struct hisi_sas_hw *hw = hisi_hba->hw;
        struct device *dev = hisi_hba->dev;
@@ -3796,7 +3796,7 @@ fail:
        return -ENOMEM;
 }
 
-void hisi_sas_debugfs_bist_init(struct hisi_hba *hisi_hba)
+static void hisi_sas_debugfs_bist_init(struct hisi_hba *hisi_hba)
 {
        hisi_hba->debugfs_bist_dentry =
                        debugfs_create_dir("bist", hisi_hba->debugfs_dir);
index 1bb6aad..216e557 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/interrupt.h>
 #include <linux/types.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
@@ -5478,6 +5477,8 @@ static int hpsa_ciss_submit(struct ctlr_info *h,
                return SCSI_MLQUEUE_HOST_BUSY;
        }
 
+       c->device = dev;
+
        enqueue_cmd_and_start_io(h, c);
        /* the cmd'll come back via intr handler in complete_scsi_command()  */
        return 0;
@@ -5549,6 +5550,7 @@ static int hpsa_ioaccel_submit(struct ctlr_info *h,
                hpsa_cmd_init(h, c->cmdindex, c);
                c->cmd_type = CMD_SCSI;
                c->scsi_cmd = cmd;
+               c->device = dev;
                rc = hpsa_scsi_ioaccel_raid_map(h, c);
                if (rc < 0)     /* scsi_dma_map failed. */
                        rc = SCSI_MLQUEUE_HOST_BUSY;
@@ -5556,6 +5558,7 @@ static int hpsa_ioaccel_submit(struct ctlr_info *h,
                hpsa_cmd_init(h, c->cmdindex, c);
                c->cmd_type = CMD_SCSI;
                c->scsi_cmd = cmd;
+               c->device = dev;
                rc = hpsa_scsi_ioaccel_direct_map(h, c);
                if (rc < 0)     /* scsi_dma_map failed. */
                        rc = SCSI_MLQUEUE_HOST_BUSY;
index e91377a..e8813d2 100644 (file)
@@ -9055,7 +9055,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                }
        }
 
-#if defined(BUILD_NVME)
        /* Clear NVME stats */
        if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
                for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
@@ -9063,7 +9062,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                               sizeof(phba->sli4_hba.hdwq[idx].nvme_cstat));
                }
        }
-#endif
 
        /* Clear SCSI stats */
        if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
index f4b879d..fc6e454 100644 (file)
@@ -851,9 +851,9 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 
        if (!(vport->fc_flag & FC_PT2PT)) {
                /* Check config parameter use-adisc or FCP-2 */
-               if ((vport->cfg_use_adisc && (vport->fc_flag & FC_RSCN_MODE)) ||
+               if (vport->cfg_use_adisc && ((vport->fc_flag & FC_RSCN_MODE) ||
                    ((ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) &&
-                    (ndlp->nlp_type & NLP_FCP_TARGET))) {
+                    (ndlp->nlp_type & NLP_FCP_TARGET)))) {
                        spin_lock_irq(shost->host_lock);
                        ndlp->nlp_flag |= NLP_NPR_ADISC;
                        spin_unlock_irq(shost->host_lock);
index fe10976..6822cd9 100644 (file)
@@ -528,7 +528,6 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
                        list_del_init(&psb->list);
                        psb->exch_busy = 0;
                        psb->status = IOSTAT_SUCCESS;
-#ifdef BUILD_NVME
                        if (psb->cur_iocbq.iocb_flag == LPFC_IO_NVME) {
                                qp->abts_nvme_io_bufs--;
                                spin_unlock(&qp->abts_io_buf_list_lock);
@@ -536,7 +535,6 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
                                lpfc_sli4_nvme_xri_aborted(phba, axri, psb);
                                return;
                        }
-#endif
                        qp->abts_scsi_io_bufs--;
                        spin_unlock(&qp->abts_io_buf_list_lock);
 
index a0c6945..614f78d 100644 (file)
@@ -7866,7 +7866,7 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
        if (sli4_hba->hdwq) {
                for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++) {
                        eq = phba->sli4_hba.hba_eq_hdl[eqidx].eq;
-                       if (eq->queue_id == sli4_hba->mbx_cq->assoc_qid) {
+                       if (eq && eq->queue_id == sli4_hba->mbx_cq->assoc_qid) {
                                fpeq = eq;
                                break;
                        }
index 45a6604..ff6d4aa 100644 (file)
@@ -4183,11 +4183,11 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
                 */
                if (pdev->subsystem_vendor == PCI_VENDOR_ID_COMPAQ &&
                    pdev->subsystem_device == 0xC000)
-                       return -ENODEV;
+                       goto out_disable_device;
                /* Now check the magic signature byte */
                pci_read_config_word(pdev, PCI_CONF_AMISIG, &magic);
                if (magic != HBA_SIGNATURE_471 && magic != HBA_SIGNATURE)
-                       return -ENODEV;
+                       goto out_disable_device;
                /* Ok it is probably a megaraid */
        }
 
index d0c2f8d..c8e512b 100644 (file)
@@ -51,7 +51,6 @@
 #include <linux/workqueue.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/interrupt.h>
 #include <linux/aer.h>
 #include <linux/raid_class.h>
index 1659d35..59ca98f 100644 (file)
@@ -596,7 +596,7 @@ static void qedf_dcbx_handler(void *dev, struct qed_dcbx_get *get, u32 mib_type)
                tmp_prio = get->operational.app_prio.fcoe;
                if (qedf_default_prio > -1)
                        qedf->prio = qedf_default_prio;
-               else if (tmp_prio < 0 || tmp_prio > 7) {
+               else if (tmp_prio > 7) {
                        QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
                            "FIP/FCoE prio %d out of range, setting to %d.\n",
                            tmp_prio, QEDF_DEFAULT_PRIO);
index 8190c2a..7259bce 100644 (file)
@@ -440,9 +440,6 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj,
                valid = 0;
                if (ha->optrom_size == OPTROM_SIZE_2300 && start == 0)
                        valid = 1;
-               else if (start == (ha->flt_region_boot * 4) ||
-                   start == (ha->flt_region_fw * 4))
-                       valid = 1;
                else if (IS_QLA24XX_TYPE(ha) || IS_QLA25XX(ha))
                        valid = 1;
                if (!valid) {
@@ -489,8 +486,10 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj,
                    "Writing flash region -- 0x%x/0x%x.\n",
                    ha->optrom_region_start, ha->optrom_region_size);
 
-               ha->isp_ops->write_optrom(vha, ha->optrom_buffer,
+               rval = ha->isp_ops->write_optrom(vha, ha->optrom_buffer,
                    ha->optrom_region_start, ha->optrom_region_size);
+               if (rval)
+                       rval = -EIO;
                break;
        default:
                rval = -EINVAL;
@@ -2920,6 +2919,8 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
        struct qla_hw_data *ha = vha->hw;
        uint16_t id = vha->vp_idx;
 
+       set_bit(VPORT_DELETE, &vha->dpc_flags);
+
        while (test_bit(LOOP_RESYNC_ACTIVE, &vha->dpc_flags) ||
            test_bit(FCPORT_UPDATE_NEEDED, &vha->dpc_flags))
                msleep(1000);
index 28d587a..99f0a1a 100644 (file)
@@ -253,7 +253,7 @@ qla2x00_process_els(struct bsg_job *bsg_job)
        srb_t *sp;
        const char *type;
        int req_sg_cnt, rsp_sg_cnt;
-       int rval =  (DRIVER_ERROR << 16);
+       int rval =  (DID_ERROR << 16);
        uint16_t nextlid = 0;
 
        if (bsg_request->msgcode == FC_BSG_RPT_ELS) {
@@ -432,7 +432,7 @@ qla2x00_process_ct(struct bsg_job *bsg_job)
        struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
        scsi_qla_host_t *vha = shost_priv(host);
        struct qla_hw_data *ha = vha->hw;
-       int rval = (DRIVER_ERROR << 16);
+       int rval = (DID_ERROR << 16);
        int req_sg_cnt, rsp_sg_cnt;
        uint16_t loop_id;
        struct fc_port *fcport;
@@ -1950,7 +1950,7 @@ qlafx00_mgmt_cmd(struct bsg_job *bsg_job)
        struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
        scsi_qla_host_t *vha = shost_priv(host);
        struct qla_hw_data *ha = vha->hw;
-       int rval = (DRIVER_ERROR << 16);
+       int rval = (DID_ERROR << 16);
        struct qla_mt_iocb_rqst_fx00 *piocb_rqst;
        srb_t *sp;
        int req_sg_cnt = 0, rsp_sg_cnt = 0;
index 873a6ae..6ffa987 100644 (file)
@@ -2396,6 +2396,7 @@ typedef struct fc_port {
        unsigned int query:1;
        unsigned int id_changed:1;
        unsigned int scan_needed:1;
+       unsigned int n2n_flag:1;
 
        struct completion nvme_del_done;
        uint32_t nvme_prli_service_param;
@@ -2446,7 +2447,6 @@ typedef struct fc_port {
        uint8_t fc4_type;
        uint8_t fc4f_nvme;
        uint8_t scan_state;
-       uint8_t n2n_flag;
 
        unsigned long last_queue_full;
        unsigned long last_ramp_up;
@@ -3036,6 +3036,7 @@ enum scan_flags_t {
 enum fc4type_t {
        FS_FC4TYPE_FCP  = BIT_0,
        FS_FC4TYPE_NVME = BIT_1,
+       FS_FCP_IS_N2N = BIT_7,
 };
 
 struct fab_scan_rp {
@@ -4394,6 +4395,7 @@ typedef struct scsi_qla_host {
 #define IOCB_WORK_ACTIVE       31
 #define SET_ZIO_THRESHOLD_NEEDED 32
 #define ISP_ABORT_TO_ROM       33
+#define VPORT_DELETE           34
 
        unsigned long   pci_flags;
 #define PFLG_DISCONNECTED      0       /* PCI device removed */
index dc0e366..5298ed1 100644 (file)
@@ -3102,7 +3102,8 @@ int qla24xx_post_gpnid_work(struct scsi_qla_host *vha, port_id_t *id)
 {
        struct qla_work_evt *e;
 
-       if (test_bit(UNLOADING, &vha->dpc_flags))
+       if (test_bit(UNLOADING, &vha->dpc_flags) ||
+           (vha->vp_idx && test_bit(VPORT_DELETE, &vha->dpc_flags)))
                return 0;
 
        e = qla2x00_alloc_work(vha, QLA_EVT_GPNID);
index 643d232..1d04131 100644 (file)
@@ -746,12 +746,15 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
                        break;
                default:
                        if ((id.b24 != fcport->d_id.b24 &&
-                           fcport->d_id.b24) ||
+                           fcport->d_id.b24 &&
+                           fcport->loop_id != FC_NO_LOOP_ID) ||
                            (fcport->loop_id != FC_NO_LOOP_ID &&
                                fcport->loop_id != loop_id)) {
                                ql_dbg(ql_dbg_disc, vha, 0x20e3,
                                    "%s %d %8phC post del sess\n",
                                    __func__, __LINE__, fcport->port_name);
+                               if (fcport->n2n_flag)
+                                       fcport->d_id.b24 = 0;
                                qlt_schedule_sess_for_deletion(fcport);
                                return;
                        }
@@ -759,6 +762,8 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
                }
 
                fcport->loop_id = loop_id;
+               if (fcport->n2n_flag)
+                       fcport->d_id.b24 = id.b24;
 
                wwn = wwn_to_u64(fcport->port_name);
                qlt_find_sess_invalidate_other(vha, wwn,
@@ -972,7 +977,7 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res)
                wwn = wwn_to_u64(e->port_name);
 
                ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0x20e8,
-                   "%s %8phC %02x:%02x:%02x state %d/%d lid %x \n",
+                   "%s %8phC %02x:%02x:%02x CLS %x/%x lid %x \n",
                    __func__, (void *)&wwn, e->port_id[2], e->port_id[1],
                    e->port_id[0], e->current_login_state, e->last_login_state,
                    (loop_id & 0x7fff));
@@ -1499,7 +1504,8 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
             (fcport->fw_login_state == DSC_LS_PRLI_PEND)))
                return 0;
 
-       if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
+       if (fcport->fw_login_state == DSC_LS_PLOGI_COMP &&
+           !N2N_TOPO(vha->hw)) {
                if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline)) {
                        set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
                        return 0;
@@ -1570,8 +1576,9 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
                                qla24xx_post_gpdb_work(vha, fcport, 0);
                        }  else {
                                ql_dbg(ql_dbg_disc, vha, 0x2118,
-                                   "%s %d %8phC post NVMe PRLI\n",
-                                   __func__, __LINE__, fcport->port_name);
+                                   "%s %d %8phC post %s PRLI\n",
+                                   __func__, __LINE__, fcport->port_name,
+                                   fcport->fc4f_nvme ? "NVME" : "FC");
                                qla24xx_post_prli_work(vha, fcport);
                        }
                        break;
@@ -1853,17 +1860,38 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
                        break;
                }
 
-               if (ea->fcport->n2n_flag) {
+               if (ea->fcport->fc4f_nvme) {
                        ql_dbg(ql_dbg_disc, vha, 0x2118,
                                "%s %d %8phC post fc4 prli\n",
                                __func__, __LINE__, ea->fcport->port_name);
                        ea->fcport->fc4f_nvme = 0;
-                       ea->fcport->n2n_flag = 0;
                        qla24xx_post_prli_work(vha, ea->fcport);
+                       return;
+               }
+
+               /* at this point both PRLI NVME & PRLI FCP failed */
+               if (N2N_TOPO(vha->hw)) {
+                       if (ea->fcport->n2n_link_reset_cnt < 3) {
+                               ea->fcport->n2n_link_reset_cnt++;
+                               /*
+                                * remote port is not sending Plogi. Reset
+                                * link to kick start his state machine
+                                */
+                               set_bit(N2N_LINK_RESET, &vha->dpc_flags);
+                       } else {
+                               ql_log(ql_log_warn, vha, 0x2119,
+                                   "%s %d %8phC Unable to reconnect\n",
+                                   __func__, __LINE__, ea->fcport->port_name);
+                       }
+               } else {
+                       /*
+                        * switch connect. login failed. Take connection
+                        * down and allow relogin to retrigger
+                        */
+                       ea->fcport->flags &= ~FCF_ASYNC_SENT;
+                       ea->fcport->keep_nport_handle = 0;
+                       qlt_schedule_sess_for_deletion(ea->fcport);
                }
-               ql_dbg(ql_dbg_disc, vha, 0x2119,
-                   "%s %d %8phC unhandle event of %x\n",
-                   __func__, __LINE__, ea->fcport->port_name, ea->data[0]);
                break;
        }
 }
@@ -3190,7 +3218,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha)
 
                for (j = 0; j < 2; j++, fwdt++) {
                        if (!fwdt->template) {
-                               ql_log(ql_log_warn, vha, 0x00ba,
+                               ql_dbg(ql_dbg_init, vha, 0x00ba,
                                    "-> fwdt%u no template\n", j);
                                continue;
                        }
@@ -4986,28 +5014,47 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
        unsigned long flags;
 
        /* Inititae N2N login. */
-       if (test_and_clear_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags)) {
-               /* borrowing */
-               u32 *bp, i, sz;
-
-               memset(ha->init_cb, 0, ha->init_cb_size);
-               sz = min_t(int, sizeof(struct els_plogi_payload),
-                   ha->init_cb_size);
-               rval = qla24xx_get_port_login_templ(vha, ha->init_cb_dma,
-                   (void *)ha->init_cb, sz);
-               if (rval == QLA_SUCCESS) {
-                       bp = (uint32_t *)ha->init_cb;
-                       for (i = 0; i < sz/4 ; i++, bp++)
-                               *bp = cpu_to_be32(*bp);
+       if (N2N_TOPO(ha)) {
+               if (test_and_clear_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags)) {
+                       /* borrowing */
+                       u32 *bp, i, sz;
+
+                       memset(ha->init_cb, 0, ha->init_cb_size);
+                       sz = min_t(int, sizeof(struct els_plogi_payload),
+                           ha->init_cb_size);
+                       rval = qla24xx_get_port_login_templ(vha,
+                           ha->init_cb_dma, (void *)ha->init_cb, sz);
+                       if (rval == QLA_SUCCESS) {
+                               bp = (uint32_t *)ha->init_cb;
+                               for (i = 0; i < sz/4 ; i++, bp++)
+                                       *bp = cpu_to_be32(*bp);
 
-                       memcpy(&ha->plogi_els_payld.data, (void *)ha->init_cb,
-                           sizeof(ha->plogi_els_payld.data));
-                       set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
-               } else {
-                       ql_dbg(ql_dbg_init, vha, 0x00d1,
-                           "PLOGI ELS param read fail.\n");
+                               memcpy(&ha->plogi_els_payld.data,
+                                   (void *)ha->init_cb,
+                                   sizeof(ha->plogi_els_payld.data));
+                               set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+                       } else {
+                               ql_dbg(ql_dbg_init, vha, 0x00d1,
+                                   "PLOGI ELS param read fail.\n");
+                               goto skip_login;
+                       }
+               }
+
+               list_for_each_entry(fcport, &vha->vp_fcports, list) {
+                       if (fcport->n2n_flag) {
+                               qla24xx_fcport_handle_login(vha, fcport);
+                               return QLA_SUCCESS;
+                       }
+               }
+skip_login:
+               spin_lock_irqsave(&vha->work_lock, flags);
+               vha->scan.scan_retry++;
+               spin_unlock_irqrestore(&vha->work_lock, flags);
+
+               if (vha->scan.scan_retry < MAX_SCAN_RETRIES) {
+                       set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
+                       set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
                }
-               return QLA_SUCCESS;
        }
 
        found_devs = 0;
index e92e52a..518eb95 100644 (file)
@@ -2656,9 +2656,10 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
        els_iocb->port_id[0] = sp->fcport->d_id.b.al_pa;
        els_iocb->port_id[1] = sp->fcport->d_id.b.area;
        els_iocb->port_id[2] = sp->fcport->d_id.b.domain;
-       els_iocb->s_id[0] = vha->d_id.b.al_pa;
-       els_iocb->s_id[1] = vha->d_id.b.area;
-       els_iocb->s_id[2] = vha->d_id.b.domain;
+       /* For SID the byte order is different than DID */
+       els_iocb->s_id[1] = vha->d_id.b.al_pa;
+       els_iocb->s_id[2] = vha->d_id.b.area;
+       els_iocb->s_id[0] = vha->d_id.b.domain;
 
        if (elsio->u.els_logo.els_cmd == ELS_DCMD_PLOGI) {
                els_iocb->control_flags = 0;
index 4c26630..009fd5a 100644 (file)
@@ -2837,8 +2837,6 @@ qla2x00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
        if (sense_len == 0) {
                rsp->status_srb = NULL;
                sp->done(sp, cp->result);
-       } else {
-               WARN_ON_ONCE(true);
        }
 }
 
index 4c858e2..4a1f21c 100644 (file)
@@ -702,6 +702,7 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr)
                mcp->mb[2] = LSW(risc_addr);
                mcp->mb[3] = 0;
                mcp->mb[4] = 0;
+               mcp->mb[11] = 0;
                ha->flags.using_lr_setting = 0;
                if (IS_QLA25XX(ha) || IS_QLA81XX(ha) || IS_QLA83XX(ha) ||
                    IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
@@ -746,7 +747,7 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr)
                if (ha->flags.exchoffld_enabled)
                        mcp->mb[4] |= ENABLE_EXCHANGE_OFFLD;
 
-               mcp->out_mb |= MBX_4|MBX_3|MBX_2|MBX_1;
+               mcp->out_mb |= MBX_4 | MBX_3 | MBX_2 | MBX_1 | MBX_11;
                mcp->in_mb |= MBX_3 | MBX_2 | MBX_1;
        } else {
                mcp->mb[1] = LSW(risc_addr);
@@ -2249,7 +2250,7 @@ qla2x00_lip_reset(scsi_qla_host_t *vha)
        mbx_cmd_t mc;
        mbx_cmd_t *mcp = &mc;
 
-       ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x105a,
+       ql_dbg(ql_dbg_disc, vha, 0x105a,
            "Entered %s.\n", __func__);
 
        if (IS_CNA_CAPABLE(vha->hw)) {
@@ -3883,14 +3884,24 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
                case TOPO_N2N:
                        ha->current_topology = ISP_CFG_N;
                        spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+                       list_for_each_entry(fcport, &vha->vp_fcports, list) {
+                               fcport->scan_state = QLA_FCPORT_SCAN;
+                               fcport->n2n_flag = 0;
+                       }
+
                        fcport = qla2x00_find_fcport_by_wwpn(vha,
                            rptid_entry->u.f1.port_name, 1);
                        spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 
                        if (fcport) {
                                fcport->plogi_nack_done_deadline = jiffies + HZ;
-                               fcport->dm_login_expire = jiffies + 3*HZ;
+                               fcport->dm_login_expire = jiffies + 2*HZ;
                                fcport->scan_state = QLA_FCPORT_FOUND;
+                               fcport->n2n_flag = 1;
+                               fcport->keep_nport_handle = 1;
+                               if (vha->flags.nvme_enabled)
+                                       fcport->fc4f_nvme = 1;
+
                                switch (fcport->disc_state) {
                                case DSC_DELETED:
                                        set_bit(RELOGIN_NEEDED,
@@ -3924,7 +3935,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
                                    rptid_entry->u.f1.port_name,
                                    rptid_entry->u.f1.node_name,
                                    NULL,
-                                   FC4_TYPE_UNKNOWN);
+                                   FS_FCP_IS_N2N);
                        }
 
                        /* if our portname is higher then initiate N2N login */
@@ -4023,6 +4034,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 
                list_for_each_entry(fcport, &vha->vp_fcports, list) {
                        fcport->scan_state = QLA_FCPORT_SCAN;
+                       fcport->n2n_flag = 0;
                }
 
                fcport = qla2x00_find_fcport_by_wwpn(vha,
@@ -4032,6 +4044,14 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
                        fcport->login_retry = vha->hw->login_retry_count;
                        fcport->plogi_nack_done_deadline = jiffies + HZ;
                        fcport->scan_state = QLA_FCPORT_FOUND;
+                       fcport->keep_nport_handle = 1;
+                       fcport->n2n_flag = 1;
+                       fcport->d_id.b.domain =
+                               rptid_entry->u.f2.remote_nport_id[2];
+                       fcport->d_id.b.area =
+                               rptid_entry->u.f2.remote_nport_id[1];
+                       fcport->d_id.b.al_pa =
+                               rptid_entry->u.f2.remote_nport_id[0];
                }
        }
 }
index 1a9a11a..6afad68 100644 (file)
@@ -66,6 +66,7 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha)
        uint16_t vp_id;
        struct qla_hw_data *ha = vha->hw;
        unsigned long flags = 0;
+       u8 i;
 
        mutex_lock(&ha->vport_lock);
        /*
@@ -75,8 +76,9 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha)
         * ensures no active vp_list traversal while the vport is removed
         * from the queue)
         */
-       wait_event_timeout(vha->vref_waitq, !atomic_read(&vha->vref_count),
-           10*HZ);
+       for (i = 0; i < 10 && atomic_read(&vha->vref_count); i++)
+               wait_event_timeout(vha->vref_waitq,
+                   atomic_read(&vha->vref_count), HZ);
 
        spin_lock_irqsave(&ha->vport_slock, flags);
        if (atomic_read(&vha->vref_count)) {
@@ -262,6 +264,9 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb)
        spin_lock_irqsave(&ha->vport_slock, flags);
        list_for_each_entry(vha, &ha->vp_list, list) {
                if (vha->vp_idx) {
+                       if (test_bit(VPORT_DELETE, &vha->dpc_flags))
+                               continue;
+
                        atomic_inc(&vha->vref_count);
                        spin_unlock_irqrestore(&ha->vport_slock, flags);
 
@@ -300,6 +305,20 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb)
 int
 qla2x00_vp_abort_isp(scsi_qla_host_t *vha)
 {
+       fc_port_t *fcport;
+
+       /*
+        * To exclusively reset vport, we need to log it out first.
+        * Note: This control_vp can fail if ISP reset is already
+        * issued, this is expected, as the vp would be already
+        * logged out due to ISP reset.
+        */
+       if (!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags)) {
+               qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL);
+               list_for_each_entry(fcport, &vha->vp_fcports, list)
+                       fcport->logout_on_delete = 0;
+       }
+
        /*
         * Physical port will do most of the abort and recovery work. We can
         * just treat it as a loop down
@@ -312,16 +331,9 @@ qla2x00_vp_abort_isp(scsi_qla_host_t *vha)
                        atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
        }
 
-       /*
-        * To exclusively reset vport, we need to log it out first.  Note: this
-        * control_vp can fail if ISP reset is already issued, this is
-        * expected, as the vp would be already logged out due to ISP reset.
-        */
-       if (!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags))
-               qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL);
-
        ql_dbg(ql_dbg_taskm, vha, 0x801d,
            "Scheduling enable of Vport %d.\n", vha->vp_idx);
+
        return qla24xx_enable_vp(vha);
 }
 
index 73db01e..337162a 100644 (file)
@@ -1115,9 +1115,15 @@ static inline int test_fcport_count(scsi_qla_host_t *vha)
 void
 qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha)
 {
+       u8 i;
+
        qla2x00_mark_all_devices_lost(vha, 0);
 
-       wait_event_timeout(vha->fcport_waitQ, test_fcport_count(vha), 10*HZ);
+       for (i = 0; i < 10; i++)
+               wait_event_timeout(vha->fcport_waitQ, test_fcport_count(vha),
+                   HZ);
+
+       flush_workqueue(vha->hw->wq);
 }
 
 /*
@@ -3218,6 +3224,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
            req->req_q_in, req->req_q_out, rsp->rsp_q_in, rsp->rsp_q_out);
 
        ha->wq = alloc_workqueue("qla2xxx_wq", 0, 0);
+       if (unlikely(!ha->wq)) {
+               ret = -ENOMEM;
+               goto probe_failed;
+       }
 
        if (ha->isp_ops->initialize_adapter(base_vha)) {
                ql_log(ql_log_fatal, base_vha, 0x00d6,
@@ -3525,6 +3535,10 @@ qla2x00_shutdown(struct pci_dev *pdev)
                qla2x00_try_to_stop_firmware(vha);
        }
 
+       /* Disable timer */
+       if (vha->timer_active)
+               qla2x00_stop_timer(vha);
+
        /* Turn adapter off line */
        vha->flags.online = 0;
 
@@ -5036,6 +5050,10 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e)
 
                        memcpy(fcport->port_name, e->u.new_sess.port_name,
                            WWN_SIZE);
+
+                       if (e->u.new_sess.fc4_type & FS_FCP_IS_N2N)
+                               fcport->n2n_flag = 1;
+
                } else {
                        ql_dbg(ql_dbg_disc, vha, 0xffff,
                                   "%s %8phC mem alloc fail.\n",
@@ -5134,11 +5152,9 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e)
                        if (dfcp)
                                qlt_schedule_sess_for_deletion(tfcp);
 
-
-                       if (N2N_TOPO(vha->hw))
-                               fcport->flags &= ~FCF_FABRIC_DEVICE;
-
                        if (N2N_TOPO(vha->hw)) {
+                               fcport->flags &= ~FCF_FABRIC_DEVICE;
+                               fcport->keep_nport_handle = 1;
                                if (vha->flags.nvme_enabled) {
                                        fcport->fc4f_nvme = 1;
                                        fcport->n2n_flag = 1;
index 0ffda61..a06e562 100644 (file)
@@ -953,7 +953,7 @@ void qlt_free_session_done(struct work_struct *work)
        struct qla_hw_data *ha = vha->hw;
        unsigned long flags;
        bool logout_started = false;
-       scsi_qla_host_t *base_vha;
+       scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
        struct qlt_plogi_ack_t *own =
                sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN];
 
@@ -1020,6 +1020,7 @@ void qlt_free_session_done(struct work_struct *work)
 
        if (logout_started) {
                bool traced = false;
+               u16 cnt = 0;
 
                while (!READ_ONCE(sess->logout_completed)) {
                        if (!traced) {
@@ -1029,6 +1030,9 @@ void qlt_free_session_done(struct work_struct *work)
                                traced = true;
                        }
                        msleep(100);
+                       cnt++;
+                       if (cnt > 200)
+                               break;
                }
 
                ql_dbg(ql_dbg_disc, vha, 0xf087,
@@ -1101,6 +1105,7 @@ void qlt_free_session_done(struct work_struct *work)
        }
 
        spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+       sess->free_pending = 0;
 
        ql_dbg(ql_dbg_tgt_mgt, vha, 0xf001,
            "Unregistration of sess %p %8phC finished fcp_cnt %d\n",
@@ -1109,17 +1114,9 @@ void qlt_free_session_done(struct work_struct *work)
        if (tgt && (tgt->sess_count == 0))
                wake_up_all(&tgt->waitQ);
 
-       if (vha->fcport_count == 0)
-               wake_up_all(&vha->fcport_waitQ);
-
-       base_vha = pci_get_drvdata(ha->pdev);
-
-       sess->free_pending = 0;
-
-       if (test_bit(PFLG_DRIVER_REMOVING, &base_vha->pci_flags))
-               return;
-
-       if ((!tgt || !tgt->tgt_stop) && !LOOP_TRANSITION(vha)) {
+       if (!test_bit(PFLG_DRIVER_REMOVING, &base_vha->pci_flags) &&
+           !(vha->vp_idx && test_bit(VPORT_DELETE, &vha->dpc_flags)) &&
+           (!tgt || !tgt->tgt_stop) && !LOOP_TRANSITION(vha)) {
                switch (vha->host->active_mode) {
                case MODE_INITIATOR:
                case MODE_DUAL:
@@ -1132,6 +1129,9 @@ void qlt_free_session_done(struct work_struct *work)
                        break;
                }
        }
+
+       if (vha->fcport_count == 0)
+               wake_up_all(&vha->fcport_waitQ);
 }
 
 /* ha->tgt.sess_lock supposed to be held on entry */
@@ -1161,7 +1161,7 @@ void qlt_unreg_sess(struct fc_port *sess)
        sess->last_login_gen = sess->login_gen;
 
        INIT_WORK(&sess->free_work, qlt_free_session_done);
-       schedule_work(&sess->free_work);
+       queue_work(sess->vha->hw->wq, &sess->free_work);
 }
 EXPORT_SYMBOL(qlt_unreg_sess);
 
index 1c470e3..ae2fa17 100644 (file)
@@ -967,6 +967,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses,
        ses->data_direction = scmd->sc_data_direction;
        ses->sdb = scmd->sdb;
        ses->result = scmd->result;
+       ses->resid_len = scmd->req.resid_len;
        ses->underflow = scmd->underflow;
        ses->prot_op = scmd->prot_op;
        ses->eh_eflags = scmd->eh_eflags;
@@ -977,6 +978,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses,
        memset(scmd->cmnd, 0, BLK_MAX_CDB);
        memset(&scmd->sdb, 0, sizeof(scmd->sdb));
        scmd->result = 0;
+       scmd->req.resid_len = 0;
 
        if (sense_bytes) {
                scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE,
@@ -1029,6 +1031,7 @@ void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses)
        scmd->sc_data_direction = ses->data_direction;
        scmd->sdb = ses->sdb;
        scmd->result = ses->result;
+       scmd->req.resid_len = ses->resid_len;
        scmd->underflow = ses->underflow;
        scmd->prot_op = ses->prot_op;
        scmd->eh_eflags = ses->eh_eflags;
index dc210b9..5447738 100644 (file)
@@ -1834,6 +1834,7 @@ static const struct blk_mq_ops scsi_mq_ops_no_commit = {
        .init_request   = scsi_mq_init_request,
        .exit_request   = scsi_mq_exit_request,
        .initialize_rq_fn = scsi_initialize_rq,
+       .cleanup_rq     = scsi_cleanup_rq,
        .busy           = scsi_mq_lld_busy,
        .map_queues     = scsi_map_queues,
 };
@@ -1921,7 +1922,8 @@ struct scsi_device *scsi_device_from_queue(struct request_queue *q)
 {
        struct scsi_device *sdev = NULL;
 
-       if (q->mq_ops == &scsi_mq_ops)
+       if (q->mq_ops == &scsi_mq_ops_no_commit ||
+           q->mq_ops == &scsi_mq_ops)
                sdev = q->queuedata;
        if (!sdev || !get_device(&sdev->sdev_gendev))
                sdev = NULL;
index 64c96c7..6d7362e 100644 (file)
@@ -730,6 +730,14 @@ sdev_store_delete(struct device *dev, struct device_attribute *attr,
                  const char *buf, size_t count)
 {
        struct kernfs_node *kn;
+       struct scsi_device *sdev = to_scsi_device(dev);
+
+       /*
+        * We need to try to get module, avoiding the module been removed
+        * during delete.
+        */
+       if (scsi_device_get(sdev))
+               return -ENODEV;
 
        kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
        WARN_ON_ONCE(!kn);
@@ -744,9 +752,10 @@ sdev_store_delete(struct device *dev, struct device_attribute *attr,
         * state into SDEV_DEL.
         */
        device_remove_file(dev, attr);
-       scsi_remove_device(to_scsi_device(dev));
+       scsi_remove_device(sdev);
        if (kn)
                sysfs_unbreak_active_protection(kn);
+       scsi_device_put(sdev);
        return count;
 };
 static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete);
index 7623196..ebb4016 100644 (file)
@@ -1166,11 +1166,12 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
        sector_t lba = sectors_to_logical(sdp, blk_rq_pos(rq));
        sector_t threshold;
        unsigned int nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
-       bool dif, dix;
        unsigned int mask = logical_to_sectors(sdp, 1) - 1;
        bool write = rq_data_dir(rq) == WRITE;
        unsigned char protect, fua;
        blk_status_t ret;
+       unsigned int dif;
+       bool dix;
 
        ret = scsi_init_io(cmd);
        if (ret != BLK_STS_OK)
@@ -1211,9 +1212,6 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
        dix = scsi_prot_sg_count(cmd);
        dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
 
-       if (write && dix)
-               t10_pi_prepare(cmd->request, sdkp->protection_type);
-
        if (dif || dix)
                protect = sd_setup_protect_cmnd(cmd, dix, dif);
        else
@@ -1657,7 +1655,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
                /* we need to evaluate the error return  */
                if (scsi_sense_valid(sshdr) &&
                        (sshdr->asc == 0x3a ||  /* medium not present */
-                        sshdr->asc == 0x20))   /* invalid command */
+                        sshdr->asc == 0x20 ||  /* invalid command */
+                        (sshdr->asc == 0x74 && sshdr->ascq == 0x71)))  /* drive is password locked */
                                /* this is no error here */
                                return 0;
 
@@ -2055,11 +2054,6 @@ static int sd_done(struct scsi_cmnd *SCpnt)
                                           "sd_done: completed %d of %d bytes\n",
                                           good_bytes, scsi_bufflen(SCpnt)));
 
-       if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt) &&
-           good_bytes)
-               t10_pi_complete(SCpnt->request, sdkp->protection_type,
-                               good_bytes / scsi_prot_interval(SCpnt));
-
        return good_bytes;
 }
 
index aef4881..a85d52b 100644 (file)
@@ -66,10 +66,8 @@ static int snirm710_probe(struct platform_device *dev)
 
        base = res->start;
        hostdata = kzalloc(sizeof(*hostdata), GFP_KERNEL);
-       if (!hostdata) {
-               dev_printk(KERN_ERR, dev, "Failed to allocate host data\n");
+       if (!hostdata)
                return -ENOMEM;
-       }
 
        hostdata->dev = &dev->dev;
        dma_set_mask(&dev->dev, DMA_BIT_MASK(32));
index ed8b9ac..542d2ba 100644 (file)
@@ -1837,8 +1837,7 @@ static int storvsc_probe(struct hv_device *device,
        /*
         * Set the number of HW queues we are supporting.
         */
-       if (stor_device->num_sc != 0)
-               host->nr_hw_queues = stor_device->num_sc + 1;
+       host->nr_hw_queues = num_present_cpus();
 
        /*
         * Set the error handler work queue.
index a9344eb..dc2f6d2 100644 (file)
@@ -98,6 +98,8 @@ static int ufs_bsg_request(struct bsg_job *job)
 
        bsg_reply->reply_payload_rcv_len = 0;
 
+       pm_runtime_get_sync(hba->dev);
+
        msgcode = bsg_request->msgcode;
        switch (msgcode) {
        case UPIU_TRANSACTION_QUERY_REQ:
@@ -135,6 +137,8 @@ static int ufs_bsg_request(struct bsg_job *job)
                break;
        }
 
+       pm_runtime_put_sync(hba->dev);
+
        if (!desc_buff)
                goto out;
 
index 034dd9c..11a87f5 100644 (file)
@@ -8143,6 +8143,9 @@ int ufshcd_shutdown(struct ufs_hba *hba)
 {
        int ret = 0;
 
+       if (!hba->is_powered)
+               goto out;
+
        if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba))
                goto out;
 
index 50831eb..c68882e 100644 (file)
@@ -46,7 +46,7 @@ static ssize_t soc_uid_show(struct device *dev,
        hdr->func = IMX_SC_MISC_FUNC_UNIQUE_ID;
        hdr->size = 1;
 
-       ret = imx_scu_call_rpc(soc_ipc_handle, &msg, false);
+       ret = imx_scu_call_rpc(soc_ipc_handle, &msg, true);
        if (ret) {
                pr_err("%s: get soc uid failed, ret %d\n", __func__, ret);
                return ret;
index aa8d842..b83a1d1 100644 (file)
@@ -120,7 +120,7 @@ static int ion_system_heap_allocate(struct ion_heap *heap,
                if (!page)
                        goto free_pages;
                list_add_tail(&page->lru, &pages);
-               size_remaining -= PAGE_SIZE << compound_order(page);
+               size_remaining -= page_size(page);
                max_order = compound_order(page);
                i++;
        }
@@ -133,7 +133,7 @@ static int ion_system_heap_allocate(struct ion_heap *heap,
 
        sg = table->sgl;
        list_for_each_entry_safe(page, tmp_page, &pages, lru) {
-               sg_set_page(sg, page, PAGE_SIZE << compound_order(page), 0);
+               sg_set_page(sg, page, page_size(page), 0);
                sg = sg_next(sg);
                list_del(&page->lru);
        }
index 290dbfc..ce32dfe 100644 (file)
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 config EXFAT_FS
        tristate "exFAT fs support"
        depends on BLOCK
@@ -6,7 +7,7 @@ config EXFAT_FS
          This adds support for the exFAT file system.
 
 config EXFAT_DONT_MOUNT_VFAT
-       bool "Prohibit mounting of fat/vfat filesysems by exFAT"
+       bool "Prohibit mounting of fat/vfat filesystems by exFAT"
        depends on EXFAT_FS
        default y
        help
index 84944df..6c90aec 100644 (file)
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: GPL-2.0-or-later
 
 obj-$(CONFIG_EXFAT_FS) += exfat.o
 
index 6c12f2d..3abab33 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
  */
index f086c75..81d20e6 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
  */
index 1565ce6..e1b0017 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
  */
index b3e9cf7..79174e5 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
  */
index 03cb829..a5c4b68 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
  */
index 5f6caee..3b2b0ce 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
  */
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/time.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/seq_file.h>
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
@@ -3450,7 +3451,7 @@ static void exfat_free_super(struct exfat_sb_info *sbi)
                kfree(sbi->options.iocharset);
        /* mutex_init is in exfat_fill_super function. only for 3.7+ */
        mutex_destroy(&sbi->s_lock);
-       kfree(sbi);
+       kvfree(sbi);
 }
 
 static void exfat_put_super(struct super_block *sb)
@@ -3845,7 +3846,7 @@ static int exfat_fill_super(struct super_block *sb, void *data, int silent)
         * the filesystem, since we're only just about to mount
         * it and have no inodes etc active!
         */
-       sbi = kzalloc(sizeof(struct exfat_sb_info), GFP_KERNEL);
+       sbi = kvzalloc(sizeof(*sbi), GFP_KERNEL);
        if (!sbi)
                return -ENOMEM;
        mutex_init(&sbi->s_lock);
index 366082f..b91a1fa 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
  */
index 8ec524a..cb61c2a 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 menuconfig FB_TFT
        tristate "Support for small TFT LCD display modules"
-       depends on FB && SPI
+       depends on FB && SPI && OF
        depends on GPIOLIB || COMPILE_TEST
        select FB_SYS_FILLRECT
        select FB_SYS_COPYAREA
@@ -199,13 +199,3 @@ config FB_TFT_WATTEROTT
        depends on FB_TFT
        help
          Generic Framebuffer support for WATTEROTT
-
-config FB_FLEX
-       tristate "Generic FB driver for TFT LCD displays"
-       depends on FB_TFT
-       help
-         Generic Framebuffer support for TFT LCD displays.
-
-config FB_TFT_FBTFT_DEVICE
-       tristate "Module to for adding FBTFT devices"
-       depends on FB_TFT
index 6bc0331..27af43f 100644 (file)
@@ -36,7 +36,3 @@ obj-$(CONFIG_FB_TFT_UC1611)      += fb_uc1611.o
 obj-$(CONFIG_FB_TFT_UC1701)      += fb_uc1701.o
 obj-$(CONFIG_FB_TFT_UPD161704)   += fb_upd161704.o
 obj-$(CONFIG_FB_TFT_WATTEROTT)   += fb_watterott.o
-obj-$(CONFIG_FB_FLEX)            += flexfb.o
-
-# Device modules
-obj-$(CONFIG_FB_TFT_FBTFT_DEVICE) += fbtft_device.o
index cf5700a..a0a67aa 100644 (file)
@@ -714,7 +714,7 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display,
        if (par->gamma.curves && gamma) {
                if (fbtft_gamma_parse_str(par, par->gamma.curves, gamma,
                                          strlen(gamma)))
-                       goto alloc_fail;
+                       goto release_framebuf;
        }
 
        /* Transmit buffer */
@@ -731,7 +731,7 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display,
        if (txbuflen > 0) {
                txbuf = devm_kzalloc(par->info->device, txbuflen, GFP_KERNEL);
                if (!txbuf)
-                       goto alloc_fail;
+                       goto release_framebuf;
                par->txbuf.buf = txbuf;
                par->txbuf.len = txbuflen;
        }
@@ -753,6 +753,9 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display,
 
        return info;
 
+release_framebuf:
+       framebuffer_release(info);
+
 alloc_fail:
        vfree(vmem);
 
diff --git a/drivers/staging/fbtft/fbtft_device.c b/drivers/staging/fbtft/fbtft_device.c
deleted file mode 100644 (file)
index 44e1410..0000000
+++ /dev/null
@@ -1,1261 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- *
- * Copyright (C) 2013, Noralf Tronnes
- */
-
-#define pr_fmt(fmt) "fbtft_device: " fmt
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/gpio/consumer.h>
-#include <linux/spi/spi.h>
-#include <video/mipi_display.h>
-
-#include "fbtft.h"
-
-#define MAX_GPIOS 32
-
-static struct spi_device *spi_device;
-static struct platform_device *p_device;
-
-static char *name;
-module_param(name, charp, 0000);
-MODULE_PARM_DESC(name,
-                "Devicename (required). name=list => list all supported devices.");
-
-static unsigned int rotate;
-module_param(rotate, uint, 0000);
-MODULE_PARM_DESC(rotate,
-                "Angle to rotate display counter clockwise: 0, 90, 180, 270");
-
-static unsigned int busnum;
-module_param(busnum, uint, 0000);
-MODULE_PARM_DESC(busnum, "SPI bus number (default=0)");
-
-static unsigned int cs;
-module_param(cs, uint, 0000);
-MODULE_PARM_DESC(cs, "SPI chip select (default=0)");
-
-static unsigned int speed;
-module_param(speed, uint, 0000);
-MODULE_PARM_DESC(speed, "SPI speed (override device default)");
-
-static int mode = -1;
-module_param(mode, int, 0000);
-MODULE_PARM_DESC(mode, "SPI mode (override device default)");
-
-static unsigned int fps;
-module_param(fps, uint, 0000);
-MODULE_PARM_DESC(fps, "Frames per second (override driver default)");
-
-static char *gamma;
-module_param(gamma, charp, 0000);
-MODULE_PARM_DESC(gamma,
-                "String representation of Gamma Curve(s). Driver specific.");
-
-static int txbuflen;
-module_param(txbuflen, int, 0000);
-MODULE_PARM_DESC(txbuflen, "txbuflen (override driver default)");
-
-static int bgr = -1;
-module_param(bgr, int, 0000);
-MODULE_PARM_DESC(bgr,
-                "BGR bit (supported by some drivers).");
-
-static unsigned int startbyte;
-module_param(startbyte, uint, 0000);
-MODULE_PARM_DESC(startbyte, "Sets the Start byte used by some SPI displays.");
-
-static bool custom;
-module_param(custom, bool, 0000);
-MODULE_PARM_DESC(custom, "Add a custom display device. Use speed= argument to make it a SPI device, else platform_device");
-
-static unsigned int width;
-module_param(width, uint, 0000);
-MODULE_PARM_DESC(width, "Display width, used with the custom argument");
-
-static unsigned int height;
-module_param(height, uint, 0000);
-MODULE_PARM_DESC(height, "Display height, used with the custom argument");
-
-static unsigned int buswidth = 8;
-module_param(buswidth, uint, 0000);
-MODULE_PARM_DESC(buswidth, "Display bus width, used with the custom argument");
-
-static s16 init[FBTFT_MAX_INIT_SEQUENCE];
-static int init_num;
-module_param_array(init, short, &init_num, 0000);
-MODULE_PARM_DESC(init, "Init sequence, used with the custom argument");
-
-static unsigned long debug;
-module_param(debug, ulong, 0000);
-MODULE_PARM_DESC(debug,
-                "level: 0-7 (the remaining 29 bits is for advanced usage)");
-
-static unsigned int verbose = 3;
-module_param(verbose, uint, 0000);
-MODULE_PARM_DESC(verbose,
-                "0 silent, >1 show devices, >2 show devices before (default=3)");
-
-struct fbtft_device_display {
-       char *name;
-       struct spi_board_info *spi;
-       struct platform_device *pdev;
-};
-
-static void fbtft_device_pdev_release(struct device *dev);
-
-static int write_gpio16_wr_slow(struct fbtft_par *par, void *buf, size_t len);
-static void adafruit18_green_tab_set_addr_win(struct fbtft_par *par,
-                                             int xs, int ys, int xe, int ye);
-
-#define ADAFRUIT18_GAMMA \
-               "02 1c 07 12 37 32 29 2d 29 25 2B 39 00 01 03 10\n" \
-               "03 1d 07 06 2E 2C 29 2D 2E 2E 37 3F 00 00 02 10"
-
-#define CBERRY28_GAMMA \
-               "D0 00 14 15 13 2C 42 43 4E 09 16 14 18 21\n" \
-               "D0 00 14 15 13 0B 43 55 53 0C 17 14 23 20"
-
-static const s16 cberry28_init_sequence[] = {
-       /* turn off sleep mode */
-       -1, MIPI_DCS_EXIT_SLEEP_MODE,
-       -2, 120,
-
-       /* set pixel format to RGB-565 */
-       -1, MIPI_DCS_SET_PIXEL_FORMAT, MIPI_DCS_PIXEL_FMT_16BIT,
-
-       -1, 0xB2, 0x0C, 0x0C, 0x00, 0x33, 0x33,
-
-       /*
-        * VGH = 13.26V
-        * VGL = -10.43V
-        */
-       -1, 0xB7, 0x35,
-
-       /*
-        * VDV and VRH register values come from command write
-        * (instead of NVM)
-        */
-       -1, 0xC2, 0x01, 0xFF,
-
-       /*
-        * VAP =  4.7V + (VCOM + VCOM offset + 0.5 * VDV)
-        * VAN = -4.7V + (VCOM + VCOM offset + 0.5 * VDV)
-        */
-       -1, 0xC3, 0x17,
-
-       /* VDV = 0V */
-       -1, 0xC4, 0x20,
-
-       /* VCOM = 0.675V */
-       -1, 0xBB, 0x17,
-
-       /* VCOM offset = 0V */
-       -1, 0xC5, 0x20,
-
-       /*
-        * AVDD = 6.8V
-        * AVCL = -4.8V
-        * VDS = 2.3V
-        */
-       -1, 0xD0, 0xA4, 0xA1,
-
-       -1, MIPI_DCS_SET_DISPLAY_ON,
-
-       -3,
-};
-
-static const s16 hy28b_init_sequence[] = {
-       -1, 0x00e7, 0x0010, -1, 0x0000, 0x0001,
-       -1, 0x0001, 0x0100, -1, 0x0002, 0x0700,
-       -1, 0x0003, 0x1030, -1, 0x0004, 0x0000,
-       -1, 0x0008, 0x0207, -1, 0x0009, 0x0000,
-       -1, 0x000a, 0x0000, -1, 0x000c, 0x0001,
-       -1, 0x000d, 0x0000, -1, 0x000f, 0x0000,
-       -1, 0x0010, 0x0000, -1, 0x0011, 0x0007,
-       -1, 0x0012, 0x0000, -1, 0x0013, 0x0000,
-       -2, 50, -1, 0x0010, 0x1590, -1, 0x0011,
-       0x0227, -2, 50, -1, 0x0012, 0x009c, -2, 50,
-       -1, 0x0013, 0x1900, -1, 0x0029, 0x0023,
-       -1, 0x002b, 0x000e, -2, 50,
-       -1, 0x0020, 0x0000, -1, 0x0021, 0x0000,
-       -2, 50, -1, 0x0050, 0x0000,
-       -1, 0x0051, 0x00ef, -1, 0x0052, 0x0000,
-       -1, 0x0053, 0x013f, -1, 0x0060, 0xa700,
-       -1, 0x0061, 0x0001, -1, 0x006a, 0x0000,
-       -1, 0x0080, 0x0000, -1, 0x0081, 0x0000,
-       -1, 0x0082, 0x0000, -1, 0x0083, 0x0000,
-       -1, 0x0084, 0x0000, -1, 0x0085, 0x0000,
-       -1, 0x0090, 0x0010, -1, 0x0092, 0x0000,
-       -1, 0x0093, 0x0003, -1, 0x0095, 0x0110,
-       -1, 0x0097, 0x0000, -1, 0x0098, 0x0000,
-       -1, 0x0007, 0x0133, -1, 0x0020, 0x0000,
-       -1, 0x0021, 0x0000, -2, 100, -3 };
-
-#define HY28B_GAMMA \
-       "04 1F 4 7 7 0 7 7 6 0\n" \
-       "0F 00 1 7 4 0 0 0 6 7"
-
-static const s16 pitft_init_sequence[] = {
-       -1, MIPI_DCS_SOFT_RESET,
-       -2, 5,
-       -1, MIPI_DCS_SET_DISPLAY_OFF,
-       -1, 0xEF, 0x03, 0x80, 0x02,
-       -1, 0xCF, 0x00, 0xC1, 0x30,
-       -1, 0xED, 0x64, 0x03, 0x12, 0x81,
-       -1, 0xE8, 0x85, 0x00, 0x78,
-       -1, 0xCB, 0x39, 0x2C, 0x00, 0x34, 0x02,
-       -1, 0xF7, 0x20,
-       -1, 0xEA, 0x00, 0x00,
-       -1, 0xC0, 0x23,
-       -1, 0xC1, 0x10,
-       -1, 0xC5, 0x3E, 0x28,
-       -1, 0xC7, 0x86,
-       -1, MIPI_DCS_SET_PIXEL_FORMAT, 0x55,
-       -1, 0xB1, 0x00, 0x18,
-       -1, 0xB6, 0x08, 0x82, 0x27,
-       -1, 0xF2, 0x00,
-       -1, MIPI_DCS_SET_GAMMA_CURVE, 0x01,
-       -1, 0xE0, 0x0F, 0x31, 0x2B, 0x0C, 0x0E, 0x08, 0x4E,
-               0xF1, 0x37, 0x07, 0x10, 0x03, 0x0E, 0x09, 0x00,
-       -1, 0xE1, 0x00, 0x0E, 0x14, 0x03, 0x11, 0x07, 0x31,
-               0xC1, 0x48, 0x08, 0x0F, 0x0C, 0x31, 0x36, 0x0F,
-       -1, MIPI_DCS_EXIT_SLEEP_MODE,
-       -2, 100,
-       -1, MIPI_DCS_SET_DISPLAY_ON,
-       -2, 20,
-       -3
-};
-
-static const s16 waveshare32b_init_sequence[] = {
-       -1, 0xCB, 0x39, 0x2C, 0x00, 0x34, 0x02,
-       -1, 0xCF, 0x00, 0xC1, 0x30,
-       -1, 0xE8, 0x85, 0x00, 0x78,
-       -1, 0xEA, 0x00, 0x00,
-       -1, 0xED, 0x64, 0x03, 0x12, 0x81,
-       -1, 0xF7, 0x20,
-       -1, 0xC0, 0x23,
-       -1, 0xC1, 0x10,
-       -1, 0xC5, 0x3E, 0x28,
-       -1, 0xC7, 0x86,
-       -1, MIPI_DCS_SET_ADDRESS_MODE, 0x28,
-       -1, MIPI_DCS_SET_PIXEL_FORMAT, 0x55,
-       -1, 0xB1, 0x00, 0x18,
-       -1, 0xB6, 0x08, 0x82, 0x27,
-       -1, 0xF2, 0x00,
-       -1, MIPI_DCS_SET_GAMMA_CURVE, 0x01,
-       -1, 0xE0, 0x0F, 0x31, 0x2B, 0x0C, 0x0E, 0x08, 0x4E,
-               0xF1, 0x37, 0x07, 0x10, 0x03, 0x0E, 0x09, 0x00,
-       -1, 0xE1, 0x00, 0x0E, 0x14, 0x03, 0x11, 0x07, 0x31,
-               0xC1, 0x48, 0x08, 0x0F, 0x0C, 0x31, 0x36, 0x0F,
-       -1, MIPI_DCS_EXIT_SLEEP_MODE,
-       -2, 120,
-       -1, MIPI_DCS_SET_DISPLAY_ON,
-       -1, MIPI_DCS_WRITE_MEMORY_START,
-       -3
-};
-
-#define PIOLED_GAMMA   "0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 " \
-                       "2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 " \
-                       "3 3 3 4 4 4 4 4 4 4 4 4 4 4 4"
-
-/* Supported displays in alphabetical order */
-static struct fbtft_device_display displays[] = {
-       {
-               .name = "adafruit18",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_st7735r",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .gamma = ADAFRUIT18_GAMMA,
-                       }
-               }
-       }, {
-               .name = "adafruit18_green",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_st7735r",
-                       .max_speed_hz = 4000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                                       .fbtftops.set_addr_win =
-                                           adafruit18_green_tab_set_addr_win,
-                               },
-                               .bgr = true,
-                               .gamma = ADAFRUIT18_GAMMA,
-                       }
-               }
-       }, {
-               .name = "adafruit22",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_hx8340bn",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 9,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "adafruit22a",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9340",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "adafruit28",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9341",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "adafruit13m",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ssd1306",
-                       .max_speed_hz = 16000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                               },
-                       }
-               }
-       }, {
-               .name = "admatec_c-berry28",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_st7789v",
-                       .max_speed_hz = 48000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                                       .init_sequence = cberry28_init_sequence,
-                               },
-                               .gamma = CBERRY28_GAMMA,
-                       }
-               }
-       }, {
-               .name = "agm1264k-fl",
-               .pdev = &(struct platform_device) {
-                       .name = "fb_agm1264k-fl",
-                       .id = 0,
-                       .dev = {
-                       .release = fbtft_device_pdev_release,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = FBTFT_ONBOARD_BACKLIGHT,
-                               },
-                       },
-                       }
-               }
-       }, {
-               .name = "dogs102",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_uc1701",
-                       .max_speed_hz = 8000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "er_tftm050_2",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ra8875",
-                       .max_speed_hz = 5000000,
-                       .mode = SPI_MODE_3,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                                       .width = 480,
-                                       .height = 272,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "er_tftm070_5",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ra8875",
-                       .max_speed_hz = 5000000,
-                       .mode = SPI_MODE_3,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                                       .width = 800,
-                                       .height = 480,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "ew24ha0",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_uc1611",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_3,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                               },
-                       }
-               }
-       }, {
-               .name = "ew24ha0_9bit",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_uc1611",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_3,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 9,
-                               },
-                       }
-               }
-       }, {
-               .name = "flexfb",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "flexfb",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-               }
-       }, {
-               .name = "flexpfb",
-               .pdev = &(struct platform_device) {
-                       .name = "flexpfb",
-                       .id = 0,
-                       .dev = {
-                       .release = fbtft_device_pdev_release,
-                       }
-               }
-       }, {
-               .name = "freetronicsoled128",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ssd1351",
-                       .max_speed_hz = 20000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = FBTFT_ONBOARD_BACKLIGHT,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "hx8353d",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_hx8353d",
-                       .max_speed_hz = 16000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                       }
-               }
-       }, {
-               .name = "hy28a",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9320",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_3,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .startbyte = 0x70,
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "hy28b",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9325",
-                       .max_speed_hz = 48000000,
-                       .mode = SPI_MODE_3,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                                       .init_sequence = hy28b_init_sequence,
-                               },
-                               .startbyte = 0x70,
-                               .bgr = true,
-                               .fps = 50,
-                               .gamma = HY28B_GAMMA,
-                       }
-               }
-       }, {
-               .name = "ili9481",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9481",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .regwidth = 16,
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "itdb24",
-               .pdev = &(struct platform_device) {
-                       .name = "fb_s6d1121",
-                       .id = 0,
-                       .dev = {
-                       .release = fbtft_device_pdev_release,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = false,
-                       },
-                       }
-               }
-       }, {
-               .name = "itdb28",
-               .pdev = &(struct platform_device) {
-                       .name = "fb_ili9325",
-                       .id = 0,
-                       .dev = {
-                       .release = fbtft_device_pdev_release,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       },
-                       }
-               }
-       }, {
-               .name = "itdb28_spi",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9325",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "mi0283qt-2",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_hx8347d",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .startbyte = 0x70,
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "mi0283qt-9a",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9341",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 9,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "mi0283qt-v2",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_watterott",
-                       .max_speed_hz = 4000000,
-                       .mode = SPI_MODE_3,
-                       .platform_data = &(struct fbtft_platform_data) {
-                       }
-               }
-       }, {
-               .name = "nokia3310",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_pcd8544",
-                       .max_speed_hz = 400000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                               },
-                       }
-               }
-       }, {
-               .name = "nokia3310a",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_tls8204",
-                       .max_speed_hz = 1000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                               },
-                       }
-               }
-       }, {
-               .name = "nokia5110",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9163",
-                       .max_speed_hz = 12000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "piscreen",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9486",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .regwidth = 16,
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "pitft",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9340",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .chip_select = 0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                                       .init_sequence = pitft_init_sequence,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "pioled",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ssd1351",
-                       .max_speed_hz = 20000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                               },
-                               .bgr = true,
-                               .gamma = PIOLED_GAMMA
-                       }
-               }
-       }, {
-               .name = "rpi-display",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9341",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "s6d02a1",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_s6d02a1",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "sainsmart18",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_st7735r",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                               },
-                       }
-               }
-       }, {
-               .name = "sainsmart32",
-               .pdev = &(struct platform_device) {
-                       .name = "fb_ssd1289",
-                       .id = 0,
-                       .dev = {
-                       .release = fbtft_device_pdev_release,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 16,
-                                       .txbuflen = -2, /* disable buffer */
-                                       .backlight = 1,
-                                       .fbtftops.write = write_gpio16_wr_slow,
-                               },
-                               .bgr = true,
-                       },
-               },
-               }
-       }, {
-               .name = "sainsmart32_fast",
-               .pdev = &(struct platform_device) {
-                       .name = "fb_ssd1289",
-                       .id = 0,
-                       .dev = {
-                       .release = fbtft_device_pdev_release,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 16,
-                                       .txbuflen = -2, /* disable buffer */
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       },
-               },
-               }
-       }, {
-               .name = "sainsmart32_latched",
-               .pdev = &(struct platform_device) {
-                       .name = "fb_ssd1289",
-                       .id = 0,
-                       .dev = {
-                       .release = fbtft_device_pdev_release,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 16,
-                                       .txbuflen = -2, /* disable buffer */
-                                       .backlight = 1,
-                                       .fbtftops.write =
-                                               fbtft_write_gpio16_wr_latched,
-                               },
-                               .bgr = true,
-                       },
-               },
-               }
-       }, {
-               .name = "sainsmart32_spi",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ssd1289",
-                       .max_speed_hz = 16000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "spidev",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "spidev",
-                       .max_speed_hz = 500000,
-                       .bus_num = 0,
-                       .chip_select = 0,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                       }
-               }
-       }, {
-               .name = "ssd1331",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ssd1331",
-                       .max_speed_hz = 20000000,
-                       .mode = SPI_MODE_3,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                               },
-                       }
-               }
-       }, {
-               .name = "tinylcd35",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_tinylcd",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "tm022hdh26",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9341",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "tontec35_9481", /* boards before 02 July 2014 */
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9481",
-                       .max_speed_hz = 128000000,
-                       .mode = SPI_MODE_3,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "tontec35_9486", /* boards after 02 July 2014 */
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9486",
-                       .max_speed_hz = 128000000,
-                       .mode = SPI_MODE_3,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "upd161704",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_upd161704",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                               },
-                       }
-               }
-       }, {
-               .name = "waveshare32b",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_ili9340",
-                       .max_speed_hz = 48000000,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                                       .backlight = 1,
-                                       .init_sequence =
-                                               waveshare32b_init_sequence,
-                               },
-                               .bgr = true,
-                       }
-               }
-       }, {
-               .name = "waveshare22",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "fb_bd663474",
-                       .max_speed_hz = 32000000,
-                       .mode = SPI_MODE_3,
-                       .platform_data = &(struct fbtft_platform_data) {
-                               .display = {
-                                       .buswidth = 8,
-                               },
-                       }
-               }
-       }, {
-               /* This should be the last item.
-                * Used with the custom argument
-                */
-               .name = "",
-               .spi = &(struct spi_board_info) {
-                       .modalias = "",
-                       .max_speed_hz = 0,
-                       .mode = SPI_MODE_0,
-                       .platform_data = &(struct fbtft_platform_data) {
-                       }
-               },
-               .pdev = &(struct platform_device) {
-                       .name = "",
-                       .id = 0,
-                       .dev = {
-                               .release = fbtft_device_pdev_release,
-                               .platform_data = &(struct fbtft_platform_data) {
-                               },
-                       },
-               },
-       }
-};
-
-static int write_gpio16_wr_slow(struct fbtft_par *par, void *buf, size_t len)
-{
-       u16 data;
-       int i;
-#ifndef DO_NOT_OPTIMIZE_FBTFT_WRITE_GPIO
-       static u16 prev_data;
-#endif
-
-       fbtft_par_dbg_hex(DEBUG_WRITE, par, par->info->device, u8, buf, len,
-                         "%s(len=%zu): ", __func__, len);
-
-       while (len) {
-               data = *(u16 *)buf;
-
-               /* Start writing by pulling down /WR */
-               gpiod_set_value(par->gpio.wr, 0);
-
-               /* Set data */
-#ifndef DO_NOT_OPTIMIZE_FBTFT_WRITE_GPIO
-               if (data == prev_data) {
-                       gpiod_set_value(par->gpio.wr, 0); /* used as delay */
-               } else {
-                       for (i = 0; i < 16; i++) {
-                               if ((data & 1) != (prev_data & 1))
-                                       gpiod_set_value(par->gpio.db[i],
-                                                       data & 1);
-                               data >>= 1;
-                               prev_data >>= 1;
-                       }
-               }
-#else
-               for (i = 0; i < 16; i++) {
-                       gpiod_set_value(par->gpio.db[i], data & 1);
-                       data >>= 1;
-               }
-#endif
-
-               /* Pullup /WR */
-               gpiod_set_value(par->gpio.wr, 1);
-
-#ifndef DO_NOT_OPTIMIZE_FBTFT_WRITE_GPIO
-               prev_data = *(u16 *)buf;
-#endif
-               buf += 2;
-               len -= 2;
-       }
-
-       return 0;
-}
-
-static void adafruit18_green_tab_set_addr_win(struct fbtft_par *par,
-                                             int xs, int ys, int xe, int ye)
-{
-       write_reg(par, 0x2A, 0, xs + 2, 0, xe + 2);
-       write_reg(par, 0x2B, 0, ys + 1, 0, ye + 1);
-       write_reg(par, 0x2C);
-}
-
-static void fbtft_device_pdev_release(struct device *dev)
-{
-/* Needed to silence this message:
- * Device 'xxx' does not have a release() function,
- * it is broken and must be fixed
- */
-}
-
-static int spi_device_found(struct device *dev, void *data)
-{
-       struct spi_device *spi = to_spi_device(dev);
-
-       dev_info(dev, "%s %s %dkHz %d bits mode=0x%02X\n", spi->modalias,
-                dev_name(dev), spi->max_speed_hz / 1000, spi->bits_per_word,
-                spi->mode);
-
-       return 0;
-}
-
-static void pr_spi_devices(void)
-{
-       pr_debug("SPI devices registered:\n");
-       bus_for_each_dev(&spi_bus_type, NULL, NULL, spi_device_found);
-}
-
-static int p_device_found(struct device *dev, void *data)
-{
-       struct platform_device
-       *pdev = to_platform_device(dev);
-
-       if (strstr(pdev->name, "fb"))
-               dev_info(dev, "%s id=%d pdata? %s\n", pdev->name, pdev->id,
-                        pdev->dev.platform_data ? "yes" : "no");
-
-       return 0;
-}
-
-static void pr_p_devices(void)
-{
-       pr_debug("'fb' Platform devices registered:\n");
-       bus_for_each_dev(&platform_bus_type, NULL, NULL, p_device_found);
-}
-
-#ifdef MODULE
-static void fbtft_device_spi_delete(struct spi_master *master, unsigned int cs)
-{
-       struct device *dev;
-       char str[32];
-
-       snprintf(str, sizeof(str), "%s.%u", dev_name(&master->dev), cs);
-
-       dev = bus_find_device_by_name(&spi_bus_type, NULL, str);
-       if (dev) {
-               if (verbose)
-                       dev_info(dev, "Deleting %s\n", str);
-               device_del(dev);
-       }
-}
-
-static int fbtft_device_spi_device_register(struct spi_board_info *spi)
-{
-       struct spi_master *master;
-
-       master = spi_busnum_to_master(spi->bus_num);
-       if (!master) {
-               pr_err("spi_busnum_to_master(%d) returned NULL\n",
-                      spi->bus_num);
-               return -EINVAL;
-       }
-       /* make sure it's available */
-       fbtft_device_spi_delete(master, spi->chip_select);
-       spi_device = spi_new_device(master, spi);
-       put_device(&master->dev);
-       if (!spi_device) {
-               dev_err(&master->dev, "spi_new_device() returned NULL\n");
-               return -EPERM;
-       }
-       return 0;
-}
-#else
-static int fbtft_device_spi_device_register(struct spi_board_info *spi)
-{
-       return spi_register_board_info(spi, 1);
-}
-#endif
-
-static int __init fbtft_device_init(void)
-{
-       struct spi_board_info *spi = NULL;
-       struct fbtft_platform_data *pdata;
-       bool found = false;
-       int i = 0;
-       int ret = 0;
-
-       if (!name) {
-#ifdef MODULE
-               pr_err("missing module parameter: 'name'\n");
-               return -EINVAL;
-#else
-               return 0;
-#endif
-       }
-
-       if (init_num > FBTFT_MAX_INIT_SEQUENCE) {
-               pr_err("init parameter: exceeded max array size: %d\n",
-                      FBTFT_MAX_INIT_SEQUENCE);
-               return -EINVAL;
-       }
-
-       if (verbose > 2) {
-               pr_spi_devices(); /* print list of registered SPI devices */
-               pr_p_devices(); /* print list of 'fb' platform devices */
-       }
-
-       pr_debug("name='%s', busnum=%d, cs=%d\n", name, busnum, cs);
-
-       if (rotate > 0 && rotate < 4) {
-               rotate = (4 - rotate) * 90;
-               pr_warn("argument 'rotate' should be an angle. Values 1-3 is deprecated. Setting it to %d.\n",
-                       rotate);
-       }
-       if (rotate != 0 && rotate != 90 && rotate != 180 && rotate != 270) {
-               pr_warn("argument 'rotate' illegal value: %d. Setting it to 0.\n",
-                       rotate);
-               rotate = 0;
-       }
-
-       /* name=list lists all supported displays */
-       if (strcmp(name, "list") == 0) {
-               pr_info("Supported displays:\n");
-
-               for (i = 0; i < ARRAY_SIZE(displays); i++)
-                       pr_info("%s\n", displays[i].name);
-               return -ECANCELED;
-       }
-
-       if (custom) {
-               i = ARRAY_SIZE(displays) - 1;
-               displays[i].name = name;
-               if (speed == 0) {
-                       displays[i].pdev->name = name;
-                       displays[i].spi = NULL;
-               } else {
-                       size_t len;
-
-                       len = strlcpy(displays[i].spi->modalias, name,
-                                     SPI_NAME_SIZE);
-                       if (len >= SPI_NAME_SIZE)
-                               pr_warn("modalias (name) truncated to: %s\n",
-                                       displays[i].spi->modalias);
-                       displays[i].pdev = NULL;
-               }
-       }
-
-       for (i = 0; i < ARRAY_SIZE(displays); i++) {
-               if (strncmp(name, displays[i].name, SPI_NAME_SIZE) == 0) {
-                       if (displays[i].spi) {
-                               spi = displays[i].spi;
-                               spi->chip_select = cs;
-                               spi->bus_num = busnum;
-                               if (speed)
-                                       spi->max_speed_hz = speed;
-                               if (mode != -1)
-                                       spi->mode = mode;
-                               pdata = (void *)spi->platform_data;
-                       } else if (displays[i].pdev) {
-                               p_device = displays[i].pdev;
-                               pdata = p_device->dev.platform_data;
-                       } else {
-                               pr_err("broken displays array\n");
-                               return -EINVAL;
-                       }
-
-                       pdata->rotate = rotate;
-                       if (bgr == 0)
-                               pdata->bgr = false;
-                       else if (bgr == 1)
-                               pdata->bgr = true;
-                       if (startbyte)
-                               pdata->startbyte = startbyte;
-                       if (gamma)
-                               pdata->gamma = gamma;
-                       pdata->display.debug = debug;
-                       if (fps)
-                               pdata->fps = fps;
-                       if (txbuflen)
-                               pdata->txbuflen = txbuflen;
-                       if (init_num)
-                               pdata->display.init_sequence = init;
-                       if (custom) {
-                               pdata->display.width = width;
-                               pdata->display.height = height;
-                               pdata->display.buswidth = buswidth;
-                               pdata->display.backlight = 1;
-                       }
-
-                       if (displays[i].spi) {
-                               ret = fbtft_device_spi_device_register(spi);
-                               if (ret) {
-                                       pr_err("failed to register SPI device\n");
-                                       return ret;
-                               }
-                       } else {
-                               ret = platform_device_register(p_device);
-                               if (ret < 0) {
-                                       pr_err("platform_device_register() returned %d\n",
-                                              ret);
-                                       return ret;
-                               }
-                       }
-                       found = true;
-                       break;
-               }
-       }
-
-       if (!found) {
-               pr_err("display not supported: '%s'\n", name);
-               return -EINVAL;
-       }
-
-       if (spi_device && (verbose > 1))
-               pr_spi_devices();
-       if (p_device && (verbose > 1))
-               pr_p_devices();
-
-       return 0;
-}
-
-static void __exit fbtft_device_exit(void)
-{
-       if (spi_device) {
-               device_del(&spi_device->dev);
-               kfree(spi_device);
-       }
-
-       if (p_device)
-               platform_device_unregister(p_device);
-}
-
-arch_initcall(fbtft_device_init);
-module_exit(fbtft_device_exit);
-
-MODULE_DESCRIPTION("Add a FBTFT device.");
-MODULE_AUTHOR("Noralf Tronnes");
-MODULE_LICENSE("GPL");
diff --git a/drivers/staging/fbtft/flexfb.c b/drivers/staging/fbtft/flexfb.c
deleted file mode 100644 (file)
index 3747321..0000000
+++ /dev/null
@@ -1,851 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Generic FB driver for TFT LCD displays
- *
- * Copyright (C) 2013 Noralf Tronnes
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/gpio/consumer.h>
-#include <linux/spi/spi.h>
-#include <linux/delay.h>
-
-#include "fbtft.h"
-
-#define DRVNAME            "flexfb"
-
-static char *chip;
-module_param(chip, charp, 0000);
-MODULE_PARM_DESC(chip, "LCD controller");
-
-static unsigned int width;
-module_param(width, uint, 0000);
-MODULE_PARM_DESC(width, "Display width");
-
-static unsigned int height;
-module_param(height, uint, 0000);
-MODULE_PARM_DESC(height, "Display height");
-
-static s16 init[512];
-static int init_num;
-module_param_array(init, short, &init_num, 0000);
-MODULE_PARM_DESC(init, "Init sequence");
-
-static unsigned int setaddrwin;
-module_param(setaddrwin, uint, 0000);
-MODULE_PARM_DESC(setaddrwin, "Which set_addr_win() implementation to use");
-
-static unsigned int buswidth = 8;
-module_param(buswidth, uint, 0000);
-MODULE_PARM_DESC(buswidth, "Width of databus (default: 8)");
-
-static unsigned int regwidth = 8;
-module_param(regwidth, uint, 0000);
-MODULE_PARM_DESC(regwidth, "Width of controller register (default: 8)");
-
-static bool nobacklight;
-module_param(nobacklight, bool, 0000);
-MODULE_PARM_DESC(nobacklight, "Turn off backlight functionality.");
-
-static bool latched;
-module_param(latched, bool, 0000);
-MODULE_PARM_DESC(latched, "Use with latched 16-bit databus");
-
-static const s16 *initp;
-static int initp_num;
-
-/* default init sequences */
-static const s16 st7735r_init[] = {
-       -1, 0x01,
-       -2, 150,
-       -1, 0x11,
-       -2, 500,
-       -1, 0xB1, 0x01, 0x2C, 0x2D,
-       -1, 0xB2, 0x01, 0x2C, 0x2D,
-       -1, 0xB3, 0x01, 0x2C, 0x2D, 0x01, 0x2C, 0x2D,
-       -1, 0xB4, 0x07,
-       -1, 0xC0, 0xA2, 0x02, 0x84,
-       -1, 0xC1, 0xC5,
-       -1, 0xC2, 0x0A, 0x00,
-       -1, 0xC3, 0x8A, 0x2A,
-       -1, 0xC4, 0x8A, 0xEE,
-       -1, 0xC5, 0x0E,
-       -1, 0x20,
-       -1, 0x36, 0xC0,
-       -1, 0x3A, 0x05,
-       -1, 0xE0, 0x0f, 0x1a, 0x0f, 0x18, 0x2f, 0x28, 0x20, 0x22,
-           0x1f, 0x1b, 0x23, 0x37, 0x00, 0x07, 0x02, 0x10,
-       -1, 0xE1, 0x0f, 0x1b, 0x0f, 0x17, 0x33, 0x2c, 0x29, 0x2e,
-           0x30, 0x30, 0x39, 0x3f, 0x00, 0x07, 0x03, 0x10,
-       -1, 0x29,
-       -2, 100,
-       -1, 0x13,
-       -2, 10,
-       -3
-};
-
-static const s16 ssd1289_init[] = {
-       -1, 0x00, 0x0001,
-       -1, 0x03, 0xA8A4,
-       -1, 0x0C, 0x0000,
-       -1, 0x0D, 0x080C,
-       -1, 0x0E, 0x2B00,
-       -1, 0x1E, 0x00B7,
-       -1, 0x01, 0x2B3F,
-       -1, 0x02, 0x0600,
-       -1, 0x10, 0x0000,
-       -1, 0x11, 0x6070,
-       -1, 0x05, 0x0000,
-       -1, 0x06, 0x0000,
-       -1, 0x16, 0xEF1C,
-       -1, 0x17, 0x0003,
-       -1, 0x07, 0x0233,
-       -1, 0x0B, 0x0000,
-       -1, 0x0F, 0x0000,
-       -1, 0x41, 0x0000,
-       -1, 0x42, 0x0000,
-       -1, 0x48, 0x0000,
-       -1, 0x49, 0x013F,
-       -1, 0x4A, 0x0000,
-       -1, 0x4B, 0x0000,
-       -1, 0x44, 0xEF00,
-       -1, 0x45, 0x0000,
-       -1, 0x46, 0x013F,
-       -1, 0x30, 0x0707,
-       -1, 0x31, 0x0204,
-       -1, 0x32, 0x0204,
-       -1, 0x33, 0x0502,
-       -1, 0x34, 0x0507,
-       -1, 0x35, 0x0204,
-       -1, 0x36, 0x0204,
-       -1, 0x37, 0x0502,
-       -1, 0x3A, 0x0302,
-       -1, 0x3B, 0x0302,
-       -1, 0x23, 0x0000,
-       -1, 0x24, 0x0000,
-       -1, 0x25, 0x8000,
-       -1, 0x4f, 0x0000,
-       -1, 0x4e, 0x0000,
-       -1, 0x22,
-       -3
-};
-
-static const s16 hx8340bn_init[] = {
-       -1, 0xC1, 0xFF, 0x83, 0x40,
-       -1, 0x11,
-       -2, 150,
-       -1, 0xCA, 0x70, 0x00, 0xD9,
-       -1, 0xB0, 0x01, 0x11,
-       -1, 0xC9, 0x90, 0x49, 0x10, 0x28, 0x28, 0x10, 0x00, 0x06,
-       -2, 20,
-       -1, 0xC2, 0x60, 0x71, 0x01, 0x0E, 0x05, 0x02, 0x09, 0x31, 0x0A,
-       -1, 0xC3, 0x67, 0x30, 0x61, 0x17, 0x48, 0x07, 0x05, 0x33,
-       -2, 10,
-       -1, 0xB5, 0x35, 0x20, 0x45,
-       -1, 0xB4, 0x33, 0x25, 0x4C,
-       -2, 10,
-       -1, 0x3A, 0x05,
-       -1, 0x29,
-       -2, 10,
-       -3
-};
-
-static const s16 ili9225_init[] = {
-       -1, 0x0001, 0x011C,
-       -1, 0x0002, 0x0100,
-       -1, 0x0003, 0x1030,
-       -1, 0x0008, 0x0808,
-       -1, 0x000C, 0x0000,
-       -1, 0x000F, 0x0A01,
-       -1, 0x0020, 0x0000,
-       -1, 0x0021, 0x0000,
-       -2, 50,
-       -1, 0x0010, 0x0A00,
-       -1, 0x0011, 0x1038,
-       -2, 50,
-       -1, 0x0012, 0x1121,
-       -1, 0x0013, 0x004E,
-       -1, 0x0014, 0x676F,
-       -1, 0x0030, 0x0000,
-       -1, 0x0031, 0x00DB,
-       -1, 0x0032, 0x0000,
-       -1, 0x0033, 0x0000,
-       -1, 0x0034, 0x00DB,
-       -1, 0x0035, 0x0000,
-       -1, 0x0036, 0x00AF,
-       -1, 0x0037, 0x0000,
-       -1, 0x0038, 0x00DB,
-       -1, 0x0039, 0x0000,
-       -1, 0x0050, 0x0000,
-       -1, 0x0051, 0x060A,
-       -1, 0x0052, 0x0D0A,
-       -1, 0x0053, 0x0303,
-       -1, 0x0054, 0x0A0D,
-       -1, 0x0055, 0x0A06,
-       -1, 0x0056, 0x0000,
-       -1, 0x0057, 0x0303,
-       -1, 0x0058, 0x0000,
-       -1, 0x0059, 0x0000,
-       -2, 50,
-       -1, 0x0007, 0x1017,
-       -2, 50,
-       -3
-};
-
-static const s16 ili9320_init[] = {
-       -1, 0x00E5, 0x8000,
-       -1, 0x0000, 0x0001,
-       -1, 0x0001, 0x0100,
-       -1, 0x0002, 0x0700,
-       -1, 0x0003, 0x1030,
-       -1, 0x0004, 0x0000,
-       -1, 0x0008, 0x0202,
-       -1, 0x0009, 0x0000,
-       -1, 0x000A, 0x0000,
-       -1, 0x000C, 0x0000,
-       -1, 0x000D, 0x0000,
-       -1, 0x000F, 0x0000,
-       -1, 0x0010, 0x0000,
-       -1, 0x0011, 0x0007,
-       -1, 0x0012, 0x0000,
-       -1, 0x0013, 0x0000,
-       -2, 200,
-       -1, 0x0010, 0x17B0,
-       -1, 0x0011, 0x0031,
-       -2, 50,
-       -1, 0x0012, 0x0138,
-       -2, 50,
-       -1, 0x0013, 0x1800,
-       -1, 0x0029, 0x0008,
-       -2, 50,
-       -1, 0x0020, 0x0000,
-       -1, 0x0021, 0x0000,
-       -1, 0x0030, 0x0000,
-       -1, 0x0031, 0x0505,
-       -1, 0x0032, 0x0004,
-       -1, 0x0035, 0x0006,
-       -1, 0x0036, 0x0707,
-       -1, 0x0037, 0x0105,
-       -1, 0x0038, 0x0002,
-       -1, 0x0039, 0x0707,
-       -1, 0x003C, 0x0704,
-       -1, 0x003D, 0x0807,
-       -1, 0x0050, 0x0000,
-       -1, 0x0051, 0x00EF,
-       -1, 0x0052, 0x0000,
-       -1, 0x0053, 0x013F,
-       -1, 0x0060, 0x2700,
-       -1, 0x0061, 0x0001,
-       -1, 0x006A, 0x0000,
-       -1, 0x0080, 0x0000,
-       -1, 0x0081, 0x0000,
-       -1, 0x0082, 0x0000,
-       -1, 0x0083, 0x0000,
-       -1, 0x0084, 0x0000,
-       -1, 0x0085, 0x0000,
-       -1, 0x0090, 0x0010,
-       -1, 0x0092, 0x0000,
-       -1, 0x0093, 0x0003,
-       -1, 0x0095, 0x0110,
-       -1, 0x0097, 0x0000,
-       -1, 0x0098, 0x0000,
-       -1, 0x0007, 0x0173,
-       -3
-};
-
-static const s16 ili9325_init[] = {
-       -1, 0x00E3, 0x3008,
-       -1, 0x00E7, 0x0012,
-       -1, 0x00EF, 0x1231,
-       -1, 0x0001, 0x0100,
-       -1, 0x0002, 0x0700,
-       -1, 0x0003, 0x1030,
-       -1, 0x0004, 0x0000,
-       -1, 0x0008, 0x0207,
-       -1, 0x0009, 0x0000,
-       -1, 0x000A, 0x0000,
-       -1, 0x000C, 0x0000,
-       -1, 0x000D, 0x0000,
-       -1, 0x000F, 0x0000,
-       -1, 0x0010, 0x0000,
-       -1, 0x0011, 0x0007,
-       -1, 0x0012, 0x0000,
-       -1, 0x0013, 0x0000,
-       -2, 200,
-       -1, 0x0010, 0x1690,
-       -1, 0x0011, 0x0223,
-       -2, 50,
-       -1, 0x0012, 0x000D,
-       -2, 50,
-       -1, 0x0013, 0x1200,
-       -1, 0x0029, 0x000A,
-       -1, 0x002B, 0x000C,
-       -2, 50,
-       -1, 0x0020, 0x0000,
-       -1, 0x0021, 0x0000,
-       -1, 0x0030, 0x0000,
-       -1, 0x0031, 0x0506,
-       -1, 0x0032, 0x0104,
-       -1, 0x0035, 0x0207,
-       -1, 0x0036, 0x000F,
-       -1, 0x0037, 0x0306,
-       -1, 0x0038, 0x0102,
-       -1, 0x0039, 0x0707,
-       -1, 0x003C, 0x0702,
-       -1, 0x003D, 0x1604,
-       -1, 0x0050, 0x0000,
-       -1, 0x0051, 0x00EF,
-       -1, 0x0052, 0x0000,
-       -1, 0x0053, 0x013F,
-       -1, 0x0060, 0xA700,
-       -1, 0x0061, 0x0001,
-       -1, 0x006A, 0x0000,
-       -1, 0x0080, 0x0000,
-       -1, 0x0081, 0x0000,
-       -1, 0x0082, 0x0000,
-       -1, 0x0083, 0x0000,
-       -1, 0x0084, 0x0000,
-       -1, 0x0085, 0x0000,
-       -1, 0x0090, 0x0010,
-       -1, 0x0092, 0x0600,
-       -1, 0x0007, 0x0133,
-       -3
-};
-
-static const s16 ili9341_init[] = {
-       -1, 0x28,
-       -2, 20,
-       -1, 0xCF, 0x00, 0x83, 0x30,
-       -1, 0xED, 0x64, 0x03, 0x12, 0x81,
-       -1, 0xE8, 0x85, 0x01, 0x79,
-       -1, 0xCB, 0x39, 0x2c, 0x00, 0x34, 0x02,
-       -1, 0xF7, 0x20,
-       -1, 0xEA, 0x00, 0x00,
-       -1, 0xC0, 0x26,
-       -1, 0xC1, 0x11,
-       -1, 0xC5, 0x35, 0x3E,
-       -1, 0xC7, 0xBE,
-       -1, 0xB1, 0x00, 0x1B,
-       -1, 0xB6, 0x0a, 0x82, 0x27, 0x00,
-       -1, 0xB7, 0x07,
-       -1, 0x3A, 0x55,
-       -1, 0x36, 0x48,
-       -1, 0x11,
-       -2, 120,
-       -1, 0x29,
-       -2, 20,
-       -3
-};
-
-static const s16 ssd1351_init[] = {
-       -1, 0xfd, 0x12,
-       -1, 0xfd, 0xb1,
-       -1, 0xae,
-       -1, 0xb3, 0xf1,
-       -1, 0xca, 0x7f,
-       -1, 0xa0, 0x74,
-       -1, 0x15, 0x00, 0x7f,
-       -1, 0x75, 0x00, 0x7f,
-       -1, 0xa1, 0x00,
-       -1, 0xa2, 0x00,
-       -1, 0xb5, 0x00,
-       -1, 0xab, 0x01,
-       -1, 0xb1, 0x32,
-       -1, 0xb4, 0xa0, 0xb5, 0x55,
-       -1, 0xbb, 0x17,
-       -1, 0xbe, 0x05,
-       -1, 0xc1, 0xc8, 0x80, 0xc8,
-       -1, 0xc7, 0x0f,
-       -1, 0xb6, 0x01,
-       -1, 0xa6,
-       -1, 0xaf,
-       -3
-};
-
-/**
- * struct flexfb_lcd_controller - Describes the LCD controller properties
- * @name: Model name of the chip
- * @width: Width of display in pixels
- * @height: Height of display in pixels
- * @setaddrwin: Which set_addr_win() implementation to use
- * @regwidth: LCD Controller Register width in bits
- * @init_seq: LCD initialization sequence
- * @init_seq_sz: Size of LCD initialization sequence
- */
-struct flexfb_lcd_controller {
-       const char *name;
-       unsigned int width;
-       unsigned int height;
-       unsigned int setaddrwin;
-       unsigned int regwidth;
-       const s16 *init_seq;
-       int init_seq_sz;
-};
-
-static const struct flexfb_lcd_controller flexfb_chip_table[] = {
-       {
-               .name = "st7735r",
-               .width = 120,
-               .height = 160,
-               .init_seq = st7735r_init,
-               .init_seq_sz = ARRAY_SIZE(st7735r_init),
-       },
-       {
-               .name = "hx8340bn",
-               .width = 176,
-               .height = 220,
-               .init_seq = hx8340bn_init,
-               .init_seq_sz = ARRAY_SIZE(hx8340bn_init),
-       },
-       {
-               .name = "ili9225",
-               .width = 176,
-               .height = 220,
-               .regwidth = 16,
-               .init_seq = ili9225_init,
-               .init_seq_sz = ARRAY_SIZE(ili9225_init),
-       },
-       {
-               .name = "ili9320",
-               .width = 240,
-               .height = 320,
-               .setaddrwin = 1,
-               .regwidth = 16,
-               .init_seq = ili9320_init,
-               .init_seq_sz = ARRAY_SIZE(ili9320_init),
-       },
-       {
-               .name = "ili9325",
-               .width = 240,
-               .height = 320,
-               .setaddrwin = 1,
-               .regwidth = 16,
-               .init_seq = ili9325_init,
-               .init_seq_sz = ARRAY_SIZE(ili9325_init),
-       },
-       {
-               .name = "ili9341",
-               .width = 240,
-               .height = 320,
-               .init_seq = ili9341_init,
-               .init_seq_sz = ARRAY_SIZE(ili9341_init),
-       },
-       {
-               .name = "ssd1289",
-               .width = 240,
-               .height = 320,
-               .setaddrwin = 2,
-               .regwidth = 16,
-               .init_seq = ssd1289_init,
-               .init_seq_sz = ARRAY_SIZE(ssd1289_init),
-       },
-       {
-               .name = "ssd1351",
-               .width = 128,
-               .height = 128,
-               .setaddrwin = 3,
-               .init_seq = ssd1351_init,
-               .init_seq_sz = ARRAY_SIZE(ssd1351_init),
-       },
-};
-
-/* ili9320, ili9325 */
-static void flexfb_set_addr_win_1(struct fbtft_par *par,
-                                 int xs, int ys, int xe, int ye)
-{
-       switch (par->info->var.rotate) {
-       /* R20h = Horizontal GRAM Start Address */
-       /* R21h = Vertical GRAM Start Address */
-       case 0:
-               write_reg(par, 0x0020, xs);
-               write_reg(par, 0x0021, ys);
-               break;
-       case 180:
-               write_reg(par, 0x0020, width - 1 - xs);
-               write_reg(par, 0x0021, height - 1 - ys);
-               break;
-       case 270:
-               write_reg(par, 0x0020, width - 1 - ys);
-               write_reg(par, 0x0021, xs);
-               break;
-       case 90:
-               write_reg(par, 0x0020, ys);
-               write_reg(par, 0x0021, height - 1 - xs);
-               break;
-       }
-       write_reg(par, 0x0022); /* Write Data to GRAM */
-}
-
-/* ssd1289 */
-static void flexfb_set_addr_win_2(struct fbtft_par *par,
-                                 int xs, int ys, int xe, int ye)
-{
-       switch (par->info->var.rotate) {
-       /* R4Eh - Set GDDRAM X address counter */
-       /* R4Fh - Set GDDRAM Y address counter */
-       case 0:
-               write_reg(par, 0x4e, xs);
-               write_reg(par, 0x4f, ys);
-               break;
-       case 180:
-               write_reg(par, 0x4e, par->info->var.xres - 1 - xs);
-               write_reg(par, 0x4f, par->info->var.yres - 1 - ys);
-               break;
-       case 270:
-               write_reg(par, 0x4e, par->info->var.yres - 1 - ys);
-               write_reg(par, 0x4f, xs);
-               break;
-       case 90:
-               write_reg(par, 0x4e, ys);
-               write_reg(par, 0x4f, par->info->var.xres - 1 - xs);
-               break;
-       }
-
-       /* R22h - RAM data write */
-       write_reg(par, 0x22, 0);
-}
-
-/* ssd1351 */
-static void set_addr_win_3(struct fbtft_par *par,
-                          int xs, int ys, int xe, int ye)
-{
-       write_reg(par, 0x15, xs, xe);
-       write_reg(par, 0x75, ys, ye);
-       write_reg(par, 0x5C);
-}
-
-static int flexfb_verify_gpios_dc(struct fbtft_par *par)
-{
-       fbtft_par_dbg(DEBUG_VERIFY_GPIOS, par, "%s()\n", __func__);
-
-       if (!par->gpio.dc) {
-               dev_err(par->info->device,
-                       "Missing info about 'dc' gpio. Aborting.\n");
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-static int flexfb_verify_gpios_db(struct fbtft_par *par)
-{
-       int i;
-       int num_db = buswidth;
-
-       fbtft_par_dbg(DEBUG_VERIFY_GPIOS, par, "%s()\n", __func__);
-
-       if (!par->gpio.dc) {
-               dev_err(par->info->device, "Missing info about 'dc' gpio. Aborting.\n");
-               return -EINVAL;
-       }
-       if (!par->gpio.wr) {
-               dev_err(par->info->device, "Missing info about 'wr' gpio. Aborting.\n");
-               return -EINVAL;
-       }
-       if (latched && !par->gpio.latch) {
-               dev_err(par->info->device, "Missing info about 'latch' gpio. Aborting.\n");
-               return -EINVAL;
-       }
-       if (latched)
-               num_db = buswidth / 2;
-       for (i = 0; i < num_db; i++) {
-               if (!par->gpio.db[i]) {
-                       dev_err(par->info->device,
-                               "Missing info about 'db%02d' gpio. Aborting.\n",
-                               i);
-                       return -EINVAL;
-               }
-       }
-
-       return 0;
-}
-
-static void flexfb_chip_load_param(const struct flexfb_lcd_controller *chip)
-{
-       if (!width)
-               width = chip->width;
-       if (!height)
-               height = chip->height;
-       setaddrwin = chip->setaddrwin;
-       if (chip->regwidth)
-               regwidth = chip->regwidth;
-       if (!init_num) {
-               initp = chip->init_seq;
-               initp_num = chip->init_seq_sz;
-       }
-}
-
-static struct fbtft_display flex_display = { };
-
-static int flexfb_chip_init(const struct device *dev)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(flexfb_chip_table); i++)
-               if (!strcmp(chip, flexfb_chip_table[i].name)) {
-                       flexfb_chip_load_param(&flexfb_chip_table[i]);
-                       return 0;
-               }
-
-       dev_err(dev, "chip=%s is not supported\n", chip);
-
-       return -EINVAL;
-}
-
-static int flexfb_probe_common(struct spi_device *sdev,
-                              struct platform_device *pdev)
-{
-       struct device *dev;
-       struct fb_info *info;
-       struct fbtft_par *par;
-       int ret;
-
-       initp = init;
-       initp_num = init_num;
-
-       if (sdev)
-               dev = &sdev->dev;
-       else
-               dev = &pdev->dev;
-
-       fbtft_init_dbg(dev, "%s(%s)\n", __func__,
-                      sdev ? "'SPI device'" : "'Platform device'");
-
-       if (chip) {
-               ret = flexfb_chip_init(dev);
-               if (ret)
-                       return ret;
-       }
-
-       if (width == 0 || height == 0) {
-               dev_err(dev, "argument(s) missing: width and height has to be set.\n");
-               return -EINVAL;
-       }
-       flex_display.width = width;
-       flex_display.height = height;
-       fbtft_init_dbg(dev, "Display resolution: %dx%d\n", width, height);
-       fbtft_init_dbg(dev, "chip = %s\n", chip ? chip : "not set");
-       fbtft_init_dbg(dev, "setaddrwin = %d\n", setaddrwin);
-       fbtft_init_dbg(dev, "regwidth = %d\n", regwidth);
-       fbtft_init_dbg(dev, "buswidth = %d\n", buswidth);
-
-       info = fbtft_framebuffer_alloc(&flex_display, dev, dev->platform_data);
-       if (!info)
-               return -ENOMEM;
-
-       par = info->par;
-       if (sdev)
-               par->spi = sdev;
-       else
-               par->pdev = pdev;
-       if (!par->init_sequence)
-               par->init_sequence = initp;
-       par->fbtftops.init_display = fbtft_init_display;
-
-       /* registerwrite functions */
-       switch (regwidth) {
-       case 8:
-               par->fbtftops.write_register = fbtft_write_reg8_bus8;
-               break;
-       case 16:
-               par->fbtftops.write_register = fbtft_write_reg16_bus8;
-               break;
-       default:
-               dev_err(dev,
-                       "argument 'regwidth': %d is not supported.\n",
-                       regwidth);
-               return -EINVAL;
-       }
-
-       /* bus functions */
-       if (sdev) {
-               par->fbtftops.write = fbtft_write_spi;
-               switch (buswidth) {
-               case 8:
-                       par->fbtftops.write_vmem = fbtft_write_vmem16_bus8;
-                       if (!par->startbyte)
-                               par->fbtftops.verify_gpios = flexfb_verify_gpios_dc;
-                       break;
-               case 9:
-                       if (regwidth == 16) {
-                               dev_err(dev, "argument 'regwidth': %d is not supported with buswidth=%d and SPI.\n",
-                                       regwidth, buswidth);
-                               return -EINVAL;
-                       }
-                       par->fbtftops.write_register = fbtft_write_reg8_bus9;
-                       par->fbtftops.write_vmem = fbtft_write_vmem16_bus9;
-                       if (par->spi->master->bits_per_word_mask
-                           & SPI_BPW_MASK(9)) {
-                               par->spi->bits_per_word = 9;
-                               break;
-                       }
-
-                       dev_warn(dev,
-                                "9-bit SPI not available, emulating using 8-bit.\n");
-                       /* allocate buffer with room for dc bits */
-                       par->extra = devm_kzalloc(par->info->device,
-                                                 par->txbuf.len
-                                                 + (par->txbuf.len / 8) + 8,
-                                                 GFP_KERNEL);
-                       if (!par->extra) {
-                               ret = -ENOMEM;
-                               goto out_release;
-                       }
-                       par->fbtftops.write = fbtft_write_spi_emulate_9;
-
-                       break;
-               default:
-                       dev_err(dev,
-                               "argument 'buswidth': %d is not supported with SPI.\n",
-                               buswidth);
-                       return -EINVAL;
-               }
-       } else {
-               par->fbtftops.verify_gpios = flexfb_verify_gpios_db;
-               switch (buswidth) {
-               case 8:
-                       par->fbtftops.write = fbtft_write_gpio8_wr;
-                       par->fbtftops.write_vmem = fbtft_write_vmem16_bus8;
-                       break;
-               case 16:
-                       par->fbtftops.write_register = fbtft_write_reg16_bus16;
-                       if (latched)
-                               par->fbtftops.write = fbtft_write_gpio16_wr_latched;
-                       else
-                               par->fbtftops.write = fbtft_write_gpio16_wr;
-                       par->fbtftops.write_vmem = fbtft_write_vmem16_bus16;
-                       break;
-               default:
-                       dev_err(dev,
-                               "argument 'buswidth': %d is not supported with parallel.\n",
-                               buswidth);
-                       return -EINVAL;
-               }
-       }
-
-       /* set_addr_win function */
-       switch (setaddrwin) {
-       case 0:
-               /* use default */
-               break;
-       case 1:
-               par->fbtftops.set_addr_win = flexfb_set_addr_win_1;
-               break;
-       case 2:
-               par->fbtftops.set_addr_win = flexfb_set_addr_win_2;
-               break;
-       case 3:
-               par->fbtftops.set_addr_win = set_addr_win_3;
-               break;
-       default:
-               dev_err(dev, "argument 'setaddrwin': unknown value %d.\n",
-                       setaddrwin);
-               return -EINVAL;
-       }
-
-       if (!nobacklight)
-               par->fbtftops.register_backlight = fbtft_register_backlight;
-
-       ret = fbtft_register_framebuffer(info);
-       if (ret < 0)
-               goto out_release;
-
-       return 0;
-
-out_release:
-       fbtft_framebuffer_release(info);
-
-       return ret;
-}
-
-static int flexfb_remove_common(struct device *dev, struct fb_info *info)
-{
-       struct fbtft_par *par;
-
-       if (!info)
-               return -EINVAL;
-       par = info->par;
-       if (par)
-               fbtft_par_dbg(DEBUG_DRIVER_INIT_FUNCTIONS, par, "%s()\n",
-                             __func__);
-       fbtft_unregister_framebuffer(info);
-       fbtft_framebuffer_release(info);
-
-       return 0;
-}
-
-static int flexfb_probe_spi(struct spi_device *spi)
-{
-       return flexfb_probe_common(spi, NULL);
-}
-
-static int flexfb_remove_spi(struct spi_device *spi)
-{
-       struct fb_info *info = spi_get_drvdata(spi);
-
-       return flexfb_remove_common(&spi->dev, info);
-}
-
-static int flexfb_probe_pdev(struct platform_device *pdev)
-{
-       return flexfb_probe_common(NULL, pdev);
-}
-
-static int flexfb_remove_pdev(struct platform_device *pdev)
-{
-       struct fb_info *info = platform_get_drvdata(pdev);
-
-       return flexfb_remove_common(&pdev->dev, info);
-}
-
-static struct spi_driver flexfb_spi_driver = {
-       .driver = {
-               .name   = DRVNAME,
-       },
-       .probe  = flexfb_probe_spi,
-       .remove = flexfb_remove_spi,
-};
-
-static const struct platform_device_id flexfb_platform_ids[] = {
-       { "flexpfb", 0 },
-       { },
-};
-MODULE_DEVICE_TABLE(platform, flexfb_platform_ids);
-
-static struct platform_driver flexfb_platform_driver = {
-       .driver = {
-               .name   = DRVNAME,
-       },
-       .id_table = flexfb_platform_ids,
-       .probe  = flexfb_probe_pdev,
-       .remove = flexfb_remove_pdev,
-};
-
-static int __init flexfb_init(void)
-{
-       int ret, ret2;
-
-       ret = spi_register_driver(&flexfb_spi_driver);
-       ret2 = platform_driver_register(&flexfb_platform_driver);
-       if (ret < 0)
-               return ret;
-       return ret2;
-}
-
-static void __exit flexfb_exit(void)
-{
-       spi_unregister_driver(&flexfb_spi_driver);
-       platform_driver_unregister(&flexfb_platform_driver);
-}
-
-/* ------------------------------------------------------------------------- */
-
-module_init(flexfb_init);
-module_exit(flexfb_exit);
-
-MODULE_DESCRIPTION("Generic FB driver for TFT LCD displays");
-MODULE_AUTHOR("Noralf Tronnes");
-MODULE_LICENSE("GPL");
index c64728f..8346906 100644 (file)
@@ -261,11 +261,11 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
        /* Build the PKO buffer pointer */
        hw_buffer.u64 = 0;
        if (skb_shinfo(skb)->nr_frags == 0) {
-               hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)skb->data);
+               hw_buffer.s.addr = XKPHYS_TO_PHYS((uintptr_t)skb->data);
                hw_buffer.s.pool = 0;
                hw_buffer.s.size = skb->len;
        } else {
-               hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)skb->data);
+               hw_buffer.s.addr = XKPHYS_TO_PHYS((uintptr_t)skb->data);
                hw_buffer.s.pool = 0;
                hw_buffer.s.size = skb_headlen(skb);
                CVM_OCT_SKB_CB(skb)[0] = hw_buffer.u64;
@@ -273,11 +273,12 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
                        skb_frag_t *fs = skb_shinfo(skb)->frags + i;
 
                        hw_buffer.s.addr =
-                               XKPHYS_TO_PHYS((u64)skb_frag_address(fs));
+                               XKPHYS_TO_PHYS((uintptr_t)skb_frag_address(fs));
                        hw_buffer.s.size = skb_frag_size(fs);
                        CVM_OCT_SKB_CB(skb)[i + 1] = hw_buffer.u64;
                }
-               hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)CVM_OCT_SKB_CB(skb));
+               hw_buffer.s.addr =
+                       XKPHYS_TO_PHYS((uintptr_t)CVM_OCT_SKB_CB(skb));
                hw_buffer.s.size = skb_shinfo(skb)->nr_frags + 1;
                pko_command.s.segs = skb_shinfo(skb)->nr_frags + 1;
                pko_command.s.gather = 1;
@@ -349,10 +350,8 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
         */
        dst_release(skb_dst(skb));
        skb_dst_set(skb, NULL);
-#ifdef CONFIG_XFRM
-       secpath_reset(skb);
-#endif
-       nf_reset(skb);
+       skb_ext_reset(skb);
+       nf_reset_ct(skb);
 
 #ifdef CONFIG_NET_SCHED
        skb->tc_index = 0;
index a4ac3bf..b78ce9e 100644 (file)
@@ -1202,7 +1202,7 @@ static inline int cvmx_wqe_get_grp(cvmx_wqe_t *work)
 
 static inline void *cvmx_phys_to_ptr(uint64_t physical_address)
 {
-       return (void *)(physical_address);
+       return (void *)(uintptr_t)(physical_address);
 }
 
 static inline uint64_t cvmx_ptr_to_phys(void *ptr)
index 9ddd516..5792f49 100644 (file)
@@ -409,7 +409,7 @@ static int odm_ARFBRefresh_8188E(struct odm_dm_struct *dm_odm, struct odm_ra_inf
                pRaInfo->PTModeSS = 3;
        else if (pRaInfo->HighestRate > 0x0b)
                pRaInfo->PTModeSS = 2;
-       else if (pRaInfo->HighestRate > 0x0b)
+       else if (pRaInfo->HighestRate > 0x03)
                pRaInfo->PTModeSS = 1;
        else
                pRaInfo->PTModeSS = 0;
index 664d93a..4fac9dc 100644 (file)
@@ -348,8 +348,10 @@ static struct adapter *rtw_usb_if1_init(struct dvobj_priv *dvobj,
        }
 
        padapter->HalData = kzalloc(sizeof(struct hal_data_8188e), GFP_KERNEL);
-       if (!padapter->HalData)
-               DBG_88E("cant not alloc memory for HAL DATA\n");
+       if (!padapter->HalData) {
+               DBG_88E("Failed to allocate memory for HAL data\n");
+               goto free_adapter;
+       }
 
        /* step read_chip_version */
        rtw_hal_read_chip_version(padapter);
diff --git a/drivers/staging/speakup/sysfs-driver-speakup b/drivers/staging/speakup/sysfs-driver-speakup
new file mode 100644 (file)
index 0000000..be3f5d6
--- /dev/null
@@ -0,0 +1,369 @@
+What:          /sys/accessibility/speakup/attrib_bleep
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Beeps the PC speaker when there is an attribute change such as
+               foreground or background color when using speakup review
+               commands. One = on, zero = off.
+
+What:          /sys/accessibility/speakup/bell_pos
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   This works much like a typewriter bell. If for example 72 is
+               echoed to bell_pos, it will beep the PC speaker when typing on
+               a line past character 72.
+
+What:          /sys/accessibility/speakup/bleeps
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   This controls whether one hears beeps through the PC speaker
+               when using speakup's review commands.
+               TODO: what values does it accept?
+
+What:          /sys/accessibility/speakup/bleep_time
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   This controls the duration of the PC speaker beeps speakup
+               produces.
+               TODO: What are the units? Jiffies?
+
+What:          /sys/accessibility/speakup/cursor_time
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   This controls cursor delay when using arrow keys. When a
+               connection is very slow, with the default setting, when moving
+               with  the arrows, or backspacing etc. speakup says the incorrect
+               characters. Set this to a higher value to adjust for the delay
+               and better synchronisation between cursor position and speech.
+
+What:          /sys/accessibility/speakup/delimiters
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Delimit a word from speakup.
+               TODO: add more info
+
+What:          /sys/accessibility/speakup/ex_num
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   TODO:
+
+What:          /sys/accessibility/speakup/key_echo
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Controls if speakup speaks keys when they are typed. One = on,
+               zero = off or don't echo keys.
+
+What:          /sys/accessibility/speakup/keymap
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Speakup keymap remaps keys to Speakup functions.
+               It uses a binary
+               format. A special program called genmap is needed to compile a
+               textual  keymap into the binary format which is then loaded into
+               /sys/accessibility/speakup/keymap.
+
+What:          /sys/accessibility/speakup/no_interrupt
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Controls if typing interrupts output from speakup. With
+               no_interrupt set to zero, typing on the keyboard will interrupt
+               speakup if for example
+               the say screen command is used before the
+               entire screen  is read.
+               With no_interrupt set to one, if the say
+               screen command is used, and one then types on the keyboard,
+               speakup will continue to say the whole screen regardless until
+               it finishes.
+
+What:          /sys/accessibility/speakup/punc_all
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   This is a list of all the punctuation speakup should speak when
+               punc_level is set to four.
+
+What:          /sys/accessibility/speakup/punc_level
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Controls the level of punctuation spoken as the screen is
+               displayed, not reviewed. Levels range from zero no punctuation,
+               to four, all punctuation. One corresponds to punc_some, two
+               corresponds to punc_most, and three as well as four both
+               correspond to punc_all. Some hardware synthesizers may have
+               different levels each corresponding to  three and four for
+               punc_level. Also note that if punc_level is set to zero, and
+               key_echo is set to one, typed punctuation is still spoken as it
+               is typed.
+
+What:          /sys/accessibility/speakup/punc_most
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   This is a list of all the punctuation speakup should speak when
+               punc_level is set to two.
+
+What:          /sys/accessibility/speakup/punc_some
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   This is a list of all the punctuation speakup should speak when
+               punc_level is set to one.
+
+What:          /sys/accessibility/speakup/reading_punc
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Almost the same as punc_level, the differences being that
+               reading_punc controls the level of punctuation when reviewing
+               the screen with speakup's screen review commands. The other
+               difference is that reading_punc set to three speaks punc_all,
+               and reading_punc set to four speaks all punctuation, including
+               spaces.
+
+What:          /sys/accessibility/speakup/repeats
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   A list of characters speakup repeats. Normally, when there are
+               more than three characters in a row, speakup
+               just reads three of
+               those characters. For example, "......" would be read as dot,
+               dot, dot. If a . is added to the list of characters in repeats,
+               "......" would be read as dot, dot, dot, times six.
+
+What:          /sys/accessibility/speakup/say_control
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   If set to one, speakup speaks shift, alt and control when those
+               keys are pressed. If say_control is set to zero, shift, ctrl,
+               and alt are not spoken when they are pressed.
+
+What:          /sys/accessibility/speakup/say_word_ctl
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   TODO:
+
+What:          /sys/accessibility/speakup/silent
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   TODO:
+
+What:          /sys/accessibility/speakup/spell_delay
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   This controls how fast a word is spelled
+               when speakup's say word
+               review command is pressed twice quickly to speak the current
+               word being reviewed. Zero just speaks the letters one after
+               another, while values one through four
+               seem to introduce more of
+               a pause between the spelling of each letter by speakup.
+
+What:          /sys/accessibility/speakup/synth
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Gets or sets the synthesizer driver currently in use. Reading
+               synth returns the synthesizer driver currently in use. Writing
+               synth switches to the given synthesizer driver, provided it is
+               either built into the kernel, or already loaded as a module.
+
+What:          /sys/accessibility/speakup/synth_direct
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Sends whatever is written to synth_direct
+               directly to the speech synthesizer in use, bypassing speakup.
+               This could be used to make the synthesizer speak
+               a string, or to
+               send control sequences to the synthesizer to change how the
+               synthesizer behaves.
+
+What:          /sys/accessibility/speakup/version
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Reading version returns the version of speakup, and the version
+               of the synthesizer driver currently in use.
+
+What:          /sys/accessibility/speakup/i18n/announcements
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   This file contains various general announcements, most of which
+               cannot be categorized.  You will find messages such as "You
+               killed Speakup", "I'm alive", "leaving help", "parked",
+               "unparked", and others. You will also find the names of the
+               screen edges and cursor tracking modes here.
+
+What:          /sys/accessibility/speakup/i18n/chartab
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   TODO
+
+What:          /sys/accessibility/speakup/i18n/ctl_keys
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Here, you will find names of control keys.  These are used with
+               Speakup's say_control feature.
+
+What:          /sys/accessibility/speakup/i18n/function_names
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Here, you will find a list of names for Speakup functions.
+               These are used by the help system.  For example, suppose that
+               you have activated help mode, and you pressed
+               keypad 3.  Speakup
+               says: "keypad 3 is character, say next."
+               The message "character, say next" names a Speakup function, and
+               it comes from this function_names file.
+
+What:          /sys/accessibility/speakup/i18n/states
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   This file contains names for key states.
+               Again, these are part of the help system.  For instance, if you
+               had pressed speakup + keypad 3, you would hear:
+               "speakup keypad 3 is go to bottom edge."
+               The speakup key is depressed, so the name of the key state is
+               speakup.
+               This part of the message comes from the states collection.
+
+What:          /sys/accessibility/speakup/i18n/characters
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Through this sys entry, Speakup gives you the ability to change
+               how Speakup pronounces a given character. You could, for
+               example, change how some punctuation characters are spoken. You
+               can even change how Speakup will pronounce certain letters. For
+               further details see '12.  Changing the Pronunciation of
+               Characters' in Speakup User's Guide (file spkguide.txt in
+               source).
+
+What:          /sys/accessibility/speakup/i18n/colors
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   When you use the "say attributes" function, Speakup says the
+               name of the foreground and background colors.  These names come
+               from the i18n/colors file.
+
+What:          /sys/accessibility/speakup/i18n/formatted
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   This group of messages contains embedded formatting codes, to
+               specify the type and width of displayed data.  If you change
+               these, you must preserve all of the formatting codes, and they
+               must appear in the order used by the default messages.
+
+What:          /sys/accessibility/speakup/i18n/key_names
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Again, key_names is used by Speakup's help system.  In the
+               previous example, Speakup said that you pressed "keypad 3."
+               This name came from the key_names file.
+
+What:          /sys/accessibility/speakup/<synth-name>/
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   In `/sys/accessibility/speakup` is a directory corresponding to
+               the synthesizer driver currently in use (E.G) `soft` for the
+               soft driver. This directory contains files which control the
+               speech synthesizer itself,
+               as opposed to controlling the speakup
+               screen reader. The parameters in this directory have the same
+               names and functions across all
+               supported synthesizers. The range
+               of values for freq, pitch, rate, and vol is the same for all
+               supported synthesizers, with the given range being internally
+               mapped by the driver to  more or less fit the range of values
+               supported for a given parameter by the individual synthesizer.
+               Below is a description of values and  parameters for soft
+               synthesizer, which is currently the most commonly used.
+
+What:          /sys/accessibility/speakup/soft/caps_start
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   This is the string that is sent to the synthesizer to cause it
+               to start speaking uppercase letters. For the soft synthesizer
+               and most others, this causes the pitch of the voice to rise
+               above the currently set pitch.
+
+What:          /sys/accessibility/speakup/soft/caps_stop
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   This is the string sent to the synthesizer to cause it to stop
+               speaking uppercase letters. In the case of the soft synthesizer
+               and most others, this returns the pitch of the voice
+               down to the
+               currently set pitch.
+
+What:          /sys/accessibility/speakup/soft/delay_time
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   TODO:
+
+What:          /sys/accessibility/speakup/soft/direct
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Controls if punctuation is spoken by speakup, or by the
+               synthesizer.
+               For example, speakup speaks ">" as "greater", while
+               the espeak synthesizer used by the soft driver speaks "greater
+               than". Zero lets speakup speak the punctuation. One lets the
+               synthesizer itself speak punctuation.
+
+What:          /sys/accessibility/speakup/soft/freq
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Gets or sets the frequency of the speech synthesizer. Range is
+               0-9.
+
+What:          /sys/accessibility/speakup/soft/full_time
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   TODO:
+
+What:          /sys/accessibility/speakup/soft/jiffy_delta
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   This controls how many jiffys the kernel gives to the
+               synthesizer. Setting this too high can make a system unstable,
+               or even crash it.
+
+What:          /sys/accessibility/speakup/soft/pitch
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Gets or sets the pitch of the synthesizer. The range is 0-9.
+
+What:          /sys/accessibility/speakup/soft/punct
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Gets or sets the amount of punctuation spoken by the
+               synthesizer. The range for the soft driver seems to be 0-2.
+               TODO: How is this related to speakup's punc_level, or
+               reading_punc.
+
+What:          /sys/accessibility/speakup/soft/rate
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Gets or sets the rate of the synthesizer. Range is from zero
+               slowest, to nine fastest.
+
+What:          /sys/accessibility/speakup/soft/tone
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Gets or sets the tone of the speech synthesizer. The range for
+               the soft driver seems to be 0-2. This seems to make no
+               difference if using espeak and the espeakup connector.
+               TODO: does espeakup support different tonalities?
+
+What:          /sys/accessibility/speakup/soft/trigger_time
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   TODO:
+
+What:          /sys/accessibility/speakup/soft/voice
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Gets or sets the voice used by the synthesizer if the
+               synthesizer can speak in more than one voice. The range for the
+               soft driver is 0-7. Note that while espeak supports multiple
+               voices, this parameter will not set the voice when the espeakup
+               connector is used  between speakup and espeak.
+
+What:          /sys/accessibility/speakup/soft/vol
+KernelVersion: 2.6
+Contact:       speakup@linux-speakup.org
+Description:   Gets or sets the volume of the speech synthesizer. Range is 0-9,
+               with zero being the softest, and nine being the loudest.
+
index bc1eaa3..826016c 100644 (file)
@@ -12,7 +12,7 @@
 static const struct snd_pcm_hardware snd_bcm2835_playback_hw = {
        .info = (SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER |
                 SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID |
-                SNDRV_PCM_INFO_DRAIN_TRIGGER | SNDRV_PCM_INFO_SYNC_APPLPTR),
+                SNDRV_PCM_INFO_SYNC_APPLPTR),
        .formats = SNDRV_PCM_FMTBIT_U8 | SNDRV_PCM_FMTBIT_S16_LE,
        .rates = SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_8000_48000,
        .rate_min = 8000,
@@ -29,7 +29,7 @@ static const struct snd_pcm_hardware snd_bcm2835_playback_hw = {
 static const struct snd_pcm_hardware snd_bcm2835_playback_spdif_hw = {
        .info = (SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER |
                 SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID |
-                SNDRV_PCM_INFO_DRAIN_TRIGGER | SNDRV_PCM_INFO_SYNC_APPLPTR),
+                SNDRV_PCM_INFO_SYNC_APPLPTR),
        .formats = SNDRV_PCM_FMTBIT_S16_LE,
        .rates = SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_44100 |
        SNDRV_PCM_RATE_48000,
index 23fba01..c6f9cf1 100644 (file)
@@ -289,6 +289,7 @@ int bcm2835_audio_stop(struct bcm2835_alsa_stream *alsa_stream)
                                         VC_AUDIO_MSG_TYPE_STOP, false);
 }
 
+/* FIXME: this doesn't seem working as expected for "draining" */
 int bcm2835_audio_drain(struct bcm2835_alsa_stream *alsa_stream)
 {
        struct vc_audio_msg m = {
index c6bb4aa..0823029 100644 (file)
@@ -1748,8 +1748,10 @@ vt6655_probe(struct pci_dev *pcid, const struct pci_device_id *ent)
 
        priv->hw->max_signal = 100;
 
-       if (vnt_init(priv))
+       if (vnt_init(priv)) {
+               device_free_info(priv);
                return -ENODEV;
+       }
 
        device_print_info(priv);
        pci_set_drvdata(pcid, priv);
index eee1998..fac38c8 100644 (file)
@@ -469,10 +469,8 @@ static int prism2_connect(struct wiphy *wiphy, struct net_device *dev,
        /* Set the encryption - we only support wep */
        if (is_wep) {
                if (sme->key) {
-                       if (sme->key_idx >= NUM_WEPKEYS) {
-                               err = -EINVAL;
-                               goto exit;
-                       }
+                       if (sme->key_idx >= NUM_WEPKEYS)
+                               return -EINVAL;
 
                        result = prism2_domibset_uint32(wlandev,
                                DIDMIB_DOT11SMT_PRIVACYTABLE_WEPDEFAULTKEYID,
index c70caf4..a2b5c79 100644 (file)
@@ -1831,7 +1831,7 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb)
 
        while (credits) {
                struct sk_buff *p = cxgbit_sock_peek_wr(csk);
-               const u32 csum = (__force u32)p->csum;
+               u32 csum;
 
                if (unlikely(!p)) {
                        pr_err("csk 0x%p,%u, cr %u,%u+%u, empty.\n",
@@ -1840,6 +1840,7 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb)
                        break;
                }
 
+               csum = (__force u32)p->csum;
                if (unlikely(credits < csum)) {
                        pr_warn("csk 0x%p,%u, cr %u,%u+%u, < %u.\n",
                                csk,  csk->tid,
index 04bf2ac..2d19f0e 100644 (file)
@@ -1074,27 +1074,6 @@ passthrough_parse_cdb(struct se_cmd *cmd,
        struct se_device *dev = cmd->se_dev;
        unsigned int size;
 
-       /*
-        * Clear a lun set in the cdb if the initiator talking to use spoke
-        * and old standards version, as we can't assume the underlying device
-        * won't choke up on it.
-        */
-       switch (cdb[0]) {
-       case READ_10: /* SBC - RDProtect */
-       case READ_12: /* SBC - RDProtect */
-       case READ_16: /* SBC - RDProtect */
-       case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */
-       case VERIFY: /* SBC - VRProtect */
-       case VERIFY_16: /* SBC - VRProtect */
-       case WRITE_VERIFY: /* SBC - VRProtect */
-       case WRITE_VERIFY_12: /* SBC - VRProtect */
-       case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */
-               break;
-       default:
-               cdb[1] &= 0x1f; /* clear logical unit number */
-               break;
-       }
-
        /*
         * For REPORT LUNS we always need to emulate the response, for everything
         * else, pass it up.
index a254792..1354a15 100644 (file)
@@ -136,8 +136,7 @@ int ft_queue_data_in(struct se_cmd *se_cmd)
                                           page, off_in_page, tlen);
                        fr_len(fp) += tlen;
                        fp_skb(fp)->data_len += tlen;
-                       fp_skb(fp)->truesize +=
-                                       PAGE_SIZE << compound_order(page);
+                       fp_skb(fp)->truesize += page_size(page);
                } else {
                        BUG_ON(!page);
                        from = kmap_atomic(page + (mem_off >> PAGE_SHIFT));
index 2da026f..09ddcd0 100644 (file)
@@ -254,6 +254,7 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr,
        shm->teedev = teedev;
        shm->ctx = ctx;
        shm->id = -1;
+       addr = untagged_addr(addr);
        start = rounddown(addr, PAGE_SIZE);
        shm->offset = addr - start;
        shm->size = length;
index 9966364..001a21a 100644 (file)
@@ -310,7 +310,7 @@ config DOVE_THERMAL
 
 config DB8500_THERMAL
        tristate "DB8500 thermal management"
-       depends on MFD_DB8500_PRCMU
+       depends on MFD_DB8500_PRCMU && OF
        default y
        help
          Adds DB8500 thermal management implementation according to the thermal
index 8c07a39..709a22f 100644 (file)
@@ -53,7 +53,6 @@
 #define CONTROL0_TSEN_MODE_EXTERNAL    0x2
 #define CONTROL0_TSEN_MODE_MASK                0x3
 
-#define CONTROL1_TSEN_AVG_SHIFT                0
 #define CONTROL1_TSEN_AVG_MASK         0x7
 #define CONTROL1_EXT_TSEN_SW_RESET     BIT(7)
 #define CONTROL1_EXT_TSEN_HW_RESETn    BIT(8)
@@ -267,8 +266,8 @@ static void armada_cp110_init(struct platform_device *pdev,
 
        /* Average the output value over 2^1 = 2 samples */
        regmap_read(priv->syscon, data->syscon_control1_off, &reg);
-       reg &= ~CONTROL1_TSEN_AVG_MASK << CONTROL1_TSEN_AVG_SHIFT;
-       reg |= 1 << CONTROL1_TSEN_AVG_SHIFT;
+       reg &= ~CONTROL1_TSEN_AVG_MASK;
+       reg |= 1;
        regmap_write(priv->syscon, data->syscon_control1_off, reg);
 }
 
index 391f397..6b9865c 100644 (file)
@@ -88,7 +88,7 @@ struct cpufreq_cooling_device {
        struct cpufreq_policy *policy;
        struct list_head node;
        struct time_in_idle *idle_time;
-       struct dev_pm_qos_request qos_req;
+       struct freq_qos_request qos_req;
 };
 
 static DEFINE_IDA(cpufreq_ida);
@@ -331,7 +331,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
 
        cpufreq_cdev->cpufreq_state = state;
 
-       return dev_pm_qos_update_request(&cpufreq_cdev->qos_req,
+       return freq_qos_update_request(&cpufreq_cdev->qos_req,
                                cpufreq_cdev->freq_table[state].frequency);
 }
 
@@ -615,9 +615,9 @@ __cpufreq_cooling_register(struct device_node *np,
                cooling_ops = &cpufreq_cooling_ops;
        }
 
-       ret = dev_pm_qos_add_request(dev, &cpufreq_cdev->qos_req,
-                                    DEV_PM_QOS_MAX_FREQUENCY,
-                                    cpufreq_cdev->freq_table[0].frequency);
+       ret = freq_qos_add_request(&policy->constraints,
+                                  &cpufreq_cdev->qos_req, FREQ_QOS_MAX,
+                                  cpufreq_cdev->freq_table[0].frequency);
        if (ret < 0) {
                pr_err("%s: Failed to add freq constraint (%d)\n", __func__,
                       ret);
@@ -637,7 +637,7 @@ __cpufreq_cooling_register(struct device_node *np,
        return cdev;
 
 remove_qos_req:
-       dev_pm_qos_remove_request(&cpufreq_cdev->qos_req);
+       freq_qos_remove_request(&cpufreq_cdev->qos_req);
 remove_ida:
        ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
 free_table:
@@ -736,7 +736,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
        mutex_unlock(&cooling_list_lock);
 
        thermal_cooling_device_unregister(cdev);
-       dev_pm_qos_remove_request(&cpufreq_cdev->qos_req);
+       freq_qos_remove_request(&cpufreq_cdev->qos_req);
        ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
        kfree(cpufreq_cdev->idle_time);
        kfree(cpufreq_cdev->freq_table);
index b71a999..372dbba 100644 (file)
@@ -3,9 +3,9 @@
  * db8500_thermal.c - DB8500 Thermal Management Implementation
  *
  * Copyright (C) 2012 ST-Ericsson
- * Copyright (C) 2012 Linaro Ltd.
+ * Copyright (C) 2012-2019 Linaro Ltd.
  *
- * Author: Hongbo Zhang <hongbo.zhang@linaro.com>
+ * Authors: Hongbo Zhang, Linus Walleij
  */
 
 #include <linux/cpu_cooling.h>
@@ -13,7 +13,6 @@
 #include <linux/mfd/dbx500-prcmu.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/platform_data/db8500_thermal.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/thermal.h>
 #define PRCMU_DEFAULT_MEASURE_TIME     0xFFF
 #define PRCMU_DEFAULT_LOW_TEMP         0
 
+/**
+ * db8500_thermal_points - the interpolation points that trigger
+ * interrupts
+ */
+static const unsigned long db8500_thermal_points[] = {
+       15000,
+       20000,
+       25000,
+       30000,
+       35000,
+       40000,
+       45000,
+       50000,
+       55000,
+       60000,
+       65000,
+       70000,
+       75000,
+       80000,
+       /*
+        * This is where things start to get really bad for the
+        * SoC and the thermal zones should be set up to trigger
+        * critical temperature at 85000 mC so we don't get above
+        * this point.
+        */
+       85000,
+       90000,
+       95000,
+       100000,
+};
+
 struct db8500_thermal_zone {
-       struct thermal_zone_device *therm_dev;
-       struct mutex th_lock;
-       struct work_struct therm_work;
-       struct db8500_thsens_platform_data *trip_tab;
-       enum thermal_device_mode mode;
+       struct thermal_zone_device *tz;
        enum thermal_trend trend;
-       unsigned long cur_temp_pseudo;
+       unsigned long interpolated_temp;
        unsigned int cur_index;
 };
 
-/* Local function to check if thermal zone matches cooling devices */
-static int db8500_thermal_match_cdev(struct thermal_cooling_device *cdev,
-               struct db8500_trip_point *trip_point)
-{
-       int i;
-
-       if (!strlen(cdev->type))
-               return -EINVAL;
-
-       for (i = 0; i < COOLING_DEV_MAX; i++) {
-               if (!strcmp(trip_point->cdev_name[i], cdev->type))
-                       return 0;
-       }
-
-       return -ENODEV;
-}
-
-/* Callback to bind cooling device to thermal zone */
-static int db8500_cdev_bind(struct thermal_zone_device *thermal,
-               struct thermal_cooling_device *cdev)
-{
-       struct db8500_thermal_zone *pzone = thermal->devdata;
-       struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
-       unsigned long max_state, upper, lower;
-       int i, ret = -EINVAL;
-
-       cdev->ops->get_max_state(cdev, &max_state);
-
-       for (i = 0; i < ptrips->num_trips; i++) {
-               if (db8500_thermal_match_cdev(cdev, &ptrips->trip_points[i]))
-                       continue;
-
-               upper = lower = i > max_state ? max_state : i;
-
-               ret = thermal_zone_bind_cooling_device(thermal, i, cdev,
-                       upper, lower, THERMAL_WEIGHT_DEFAULT);
-
-               dev_info(&cdev->device, "%s bind to %d: %d-%s\n", cdev->type,
-                       i, ret, ret ? "fail" : "succeed");
-       }
-
-       return ret;
-}
-
-/* Callback to unbind cooling device from thermal zone */
-static int db8500_cdev_unbind(struct thermal_zone_device *thermal,
-               struct thermal_cooling_device *cdev)
-{
-       struct db8500_thermal_zone *pzone = thermal->devdata;
-       struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
-       int i, ret = -EINVAL;
-
-       for (i = 0; i < ptrips->num_trips; i++) {
-               if (db8500_thermal_match_cdev(cdev, &ptrips->trip_points[i]))
-                       continue;
-
-               ret = thermal_zone_unbind_cooling_device(thermal, i, cdev);
-
-               dev_info(&cdev->device, "%s unbind from %d: %s\n", cdev->type,
-                       i, ret ? "fail" : "succeed");
-       }
-
-       return ret;
-}
-
 /* Callback to get current temperature */
-static int db8500_sys_get_temp(struct thermal_zone_device *thermal, int *temp)
+static int db8500_thermal_get_temp(void *data, int *temp)
 {
-       struct db8500_thermal_zone *pzone = thermal->devdata;
+       struct db8500_thermal_zone *th = data;
 
        /*
         * TODO: There is no PRCMU interface to get temperature data currently,
         * so a pseudo temperature is returned , it works for thermal framework
         * and this will be fixed when the PRCMU interface is available.
         */
-       *temp = pzone->cur_temp_pseudo;
+       *temp = th->interpolated_temp;
 
        return 0;
 }
 
 /* Callback to get temperature changing trend */
-static int db8500_sys_get_trend(struct thermal_zone_device *thermal,
-               int trip, enum thermal_trend *trend)
-{
-       struct db8500_thermal_zone *pzone = thermal->devdata;
-
-       *trend = pzone->trend;
-
-       return 0;
-}
-
-/* Callback to get thermal zone mode */
-static int db8500_sys_get_mode(struct thermal_zone_device *thermal,
-               enum thermal_device_mode *mode)
-{
-       struct db8500_thermal_zone *pzone = thermal->devdata;
-
-       mutex_lock(&pzone->th_lock);
-       *mode = pzone->mode;
-       mutex_unlock(&pzone->th_lock);
-
-       return 0;
-}
-
-/* Callback to set thermal zone mode */
-static int db8500_sys_set_mode(struct thermal_zone_device *thermal,
-               enum thermal_device_mode mode)
-{
-       struct db8500_thermal_zone *pzone = thermal->devdata;
-
-       mutex_lock(&pzone->th_lock);
-
-       pzone->mode = mode;
-       if (mode == THERMAL_DEVICE_ENABLED)
-               schedule_work(&pzone->therm_work);
-
-       mutex_unlock(&pzone->th_lock);
-
-       return 0;
-}
-
-/* Callback to get trip point type */
-static int db8500_sys_get_trip_type(struct thermal_zone_device *thermal,
-               int trip, enum thermal_trip_type *type)
-{
-       struct db8500_thermal_zone *pzone = thermal->devdata;
-       struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
-
-       if (trip >= ptrips->num_trips)
-               return -EINVAL;
-
-       *type = ptrips->trip_points[trip].type;
-
-       return 0;
-}
-
-/* Callback to get trip point temperature */
-static int db8500_sys_get_trip_temp(struct thermal_zone_device *thermal,
-               int trip, int *temp)
+static int db8500_thermal_get_trend(void *data, int trip, enum thermal_trend *trend)
 {
-       struct db8500_thermal_zone *pzone = thermal->devdata;
-       struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
+       struct db8500_thermal_zone *th = data;
 
-       if (trip >= ptrips->num_trips)
-               return -EINVAL;
-
-       *temp = ptrips->trip_points[trip].temp;
+       *trend = th->trend;
 
        return 0;
 }
 
-/* Callback to get critical trip point temperature */
-static int db8500_sys_get_crit_temp(struct thermal_zone_device *thermal,
-               int *temp)
-{
-       struct db8500_thermal_zone *pzone = thermal->devdata;
-       struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
-       int i;
-
-       for (i = ptrips->num_trips - 1; i > 0; i--) {
-               if (ptrips->trip_points[i].type == THERMAL_TRIP_CRITICAL) {
-                       *temp = ptrips->trip_points[i].temp;
-                       return 0;
-               }
-       }
-
-       return -EINVAL;
-}
-
-static struct thermal_zone_device_ops thdev_ops = {
-       .bind = db8500_cdev_bind,
-       .unbind = db8500_cdev_unbind,
-       .get_temp = db8500_sys_get_temp,
-       .get_trend = db8500_sys_get_trend,
-       .get_mode = db8500_sys_get_mode,
-       .set_mode = db8500_sys_set_mode,
-       .get_trip_type = db8500_sys_get_trip_type,
-       .get_trip_temp = db8500_sys_get_trip_temp,
-       .get_crit_temp = db8500_sys_get_crit_temp,
+static struct thermal_zone_of_device_ops thdev_ops = {
+       .get_temp = db8500_thermal_get_temp,
+       .get_trend = db8500_thermal_get_trend,
 };
 
-static void db8500_thermal_update_config(struct db8500_thermal_zone *pzone,
-               unsigned int idx, enum thermal_trend trend,
-               unsigned long next_low, unsigned long next_high)
+static void db8500_thermal_update_config(struct db8500_thermal_zone *th,
+                                        unsigned int idx,
+                                        enum thermal_trend trend,
+                                        unsigned long next_low,
+                                        unsigned long next_high)
 {
        prcmu_stop_temp_sense();
 
-       pzone->cur_index = idx;
-       pzone->cur_temp_pseudo = (next_low + next_high)/2;
-       pzone->trend = trend;
+       th->cur_index = idx;
+       th->interpolated_temp = (next_low + next_high)/2;
+       th->trend = trend;
 
+       /*
+        * The PRCMU accept absolute temperatures in celsius so divide
+        * down the millicelsius with 1000
+        */
        prcmu_config_hotmon((u8)(next_low/1000), (u8)(next_high/1000));
        prcmu_start_temp_sense(PRCMU_DEFAULT_MEASURE_TIME);
 }
 
 static irqreturn_t prcmu_low_irq_handler(int irq, void *irq_data)
 {
-       struct db8500_thermal_zone *pzone = irq_data;
-       struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
-       unsigned int idx = pzone->cur_index;
+       struct db8500_thermal_zone *th = irq_data;
+       unsigned int idx = th->cur_index;
        unsigned long next_low, next_high;
 
-       if (unlikely(idx == 0))
+       if (idx == 0)
                /* Meaningless for thermal management, ignoring it */
                return IRQ_HANDLED;
 
        if (idx == 1) {
-               next_high = ptrips->trip_points[0].temp;
+               next_high = db8500_thermal_points[0];
                next_low = PRCMU_DEFAULT_LOW_TEMP;
        } else {
-               next_high = ptrips->trip_points[idx-1].temp;
-               next_low = ptrips->trip_points[idx-2].temp;
+               next_high = db8500_thermal_points[idx - 1];
+               next_low = db8500_thermal_points[idx - 2];
        }
        idx -= 1;
 
-       db8500_thermal_update_config(pzone, idx, THERMAL_TREND_DROPPING,
-               next_low, next_high);
-
-       dev_dbg(&pzone->therm_dev->device,
+       db8500_thermal_update_config(th, idx, THERMAL_TREND_DROPPING,
+                                    next_low, next_high);
+       dev_dbg(&th->tz->device,
                "PRCMU set max %ld, min %ld\n", next_high, next_low);
 
-       schedule_work(&pzone->therm_work);
+       thermal_zone_device_update(th->tz, THERMAL_EVENT_UNSPECIFIED);
 
        return IRQ_HANDLED;
 }
 
 static irqreturn_t prcmu_high_irq_handler(int irq, void *irq_data)
 {
-       struct db8500_thermal_zone *pzone = irq_data;
-       struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
-       unsigned int idx = pzone->cur_index;
+       struct db8500_thermal_zone *th = irq_data;
+       unsigned int idx = th->cur_index;
        unsigned long next_low, next_high;
+       int num_points = ARRAY_SIZE(db8500_thermal_points);
 
-       if (idx < ptrips->num_trips - 1) {
-               next_high = ptrips->trip_points[idx+1].temp;
-               next_low = ptrips->trip_points[idx].temp;
+       if (idx < num_points - 1) {
+               next_high = db8500_thermal_points[idx+1];
+               next_low = db8500_thermal_points[idx];
                idx += 1;
 
-               db8500_thermal_update_config(pzone, idx, THERMAL_TREND_RAISING,
-                       next_low, next_high);
+               db8500_thermal_update_config(th, idx, THERMAL_TREND_RAISING,
+                                            next_low, next_high);
 
-               dev_dbg(&pzone->therm_dev->device,
-               "PRCMU set max %ld, min %ld\n", next_high, next_low);
-       } else if (idx == ptrips->num_trips - 1)
-               pzone->cur_temp_pseudo = ptrips->trip_points[idx].temp + 1;
+               dev_info(&th->tz->device,
+                        "PRCMU set max %ld, min %ld\n", next_high, next_low);
+       } else if (idx == num_points - 1)
+               /* So we roof out 1 degree over the max point */
+               th->interpolated_temp = db8500_thermal_points[idx] + 1;
 
-       schedule_work(&pzone->therm_work);
+       thermal_zone_device_update(th->tz, THERMAL_EVENT_UNSPECIFIED);
 
        return IRQ_HANDLED;
 }
 
-static void db8500_thermal_work(struct work_struct *work)
-{
-       enum thermal_device_mode cur_mode;
-       struct db8500_thermal_zone *pzone;
-
-       pzone = container_of(work, struct db8500_thermal_zone, therm_work);
-
-       mutex_lock(&pzone->th_lock);
-       cur_mode = pzone->mode;
-       mutex_unlock(&pzone->th_lock);
-
-       if (cur_mode == THERMAL_DEVICE_DISABLED)
-               return;
-
-       thermal_zone_device_update(pzone->therm_dev, THERMAL_EVENT_UNSPECIFIED);
-       dev_dbg(&pzone->therm_dev->device, "thermal work finished.\n");
-}
-
-#ifdef CONFIG_OF
-static struct db8500_thsens_platform_data*
-               db8500_thermal_parse_dt(struct platform_device *pdev)
-{
-       struct db8500_thsens_platform_data *ptrips;
-       struct device_node *np = pdev->dev.of_node;
-       char prop_name[32];
-       const char *tmp_str;
-       u32 tmp_data;
-       int i, j;
-
-       ptrips = devm_kzalloc(&pdev->dev, sizeof(*ptrips), GFP_KERNEL);
-       if (!ptrips)
-               return NULL;
-
-       if (of_property_read_u32(np, "num-trips", &tmp_data))
-               goto err_parse_dt;
-
-       if (tmp_data > THERMAL_MAX_TRIPS)
-               goto err_parse_dt;
-
-       ptrips->num_trips = tmp_data;
-
-       for (i = 0; i < ptrips->num_trips; i++) {
-               sprintf(prop_name, "trip%d-temp", i);
-               if (of_property_read_u32(np, prop_name, &tmp_data))
-                       goto err_parse_dt;
-
-               ptrips->trip_points[i].temp = tmp_data;
-
-               sprintf(prop_name, "trip%d-type", i);
-               if (of_property_read_string(np, prop_name, &tmp_str))
-                       goto err_parse_dt;
-
-               if (!strcmp(tmp_str, "active"))
-                       ptrips->trip_points[i].type = THERMAL_TRIP_ACTIVE;
-               else if (!strcmp(tmp_str, "passive"))
-                       ptrips->trip_points[i].type = THERMAL_TRIP_PASSIVE;
-               else if (!strcmp(tmp_str, "hot"))
-                       ptrips->trip_points[i].type = THERMAL_TRIP_HOT;
-               else if (!strcmp(tmp_str, "critical"))
-                       ptrips->trip_points[i].type = THERMAL_TRIP_CRITICAL;
-               else
-                       goto err_parse_dt;
-
-               sprintf(prop_name, "trip%d-cdev-num", i);
-               if (of_property_read_u32(np, prop_name, &tmp_data))
-                       goto err_parse_dt;
-
-               if (tmp_data > COOLING_DEV_MAX)
-                       goto err_parse_dt;
-
-               for (j = 0; j < tmp_data; j++) {
-                       sprintf(prop_name, "trip%d-cdev-name%d", i, j);
-                       if (of_property_read_string(np, prop_name, &tmp_str))
-                               goto err_parse_dt;
-
-                       if (strlen(tmp_str) >= THERMAL_NAME_LENGTH)
-                               goto err_parse_dt;
-
-                       strcpy(ptrips->trip_points[i].cdev_name[j], tmp_str);
-               }
-       }
-       return ptrips;
-
-err_parse_dt:
-       dev_err(&pdev->dev, "Parsing device tree data error.\n");
-       return NULL;
-}
-#else
-static inline struct db8500_thsens_platform_data*
-               db8500_thermal_parse_dt(struct platform_device *pdev)
-{
-       return NULL;
-}
-#endif
-
 static int db8500_thermal_probe(struct platform_device *pdev)
 {
-       struct db8500_thermal_zone *pzone = NULL;
-       struct db8500_thsens_platform_data *ptrips = NULL;
-       struct device_node *np = pdev->dev.of_node;
+       struct db8500_thermal_zone *th = NULL;
+       struct device *dev = &pdev->dev;
        int low_irq, high_irq, ret = 0;
-       unsigned long dft_low, dft_high;
 
-       if (np)
-               ptrips = db8500_thermal_parse_dt(pdev);
-       else
-               ptrips = dev_get_platdata(&pdev->dev);
-
-       if (!ptrips)
-               return -EINVAL;
-
-       pzone = devm_kzalloc(&pdev->dev, sizeof(*pzone), GFP_KERNEL);
-       if (!pzone)
+       th = devm_kzalloc(dev, sizeof(*th), GFP_KERNEL);
+       if (!th)
                return -ENOMEM;
 
-       mutex_init(&pzone->th_lock);
-       mutex_lock(&pzone->th_lock);
-
-       pzone->mode = THERMAL_DEVICE_DISABLED;
-       pzone->trip_tab = ptrips;
-
-       INIT_WORK(&pzone->therm_work, db8500_thermal_work);
-
        low_irq = platform_get_irq_byname(pdev, "IRQ_HOTMON_LOW");
        if (low_irq < 0) {
-               dev_err(&pdev->dev, "Get IRQ_HOTMON_LOW failed.\n");
-               ret = low_irq;
-               goto out_unlock;
+               dev_err(dev, "Get IRQ_HOTMON_LOW failed\n");
+               return low_irq;
        }
 
-       ret = devm_request_threaded_irq(&pdev->dev, low_irq, NULL,
+       ret = devm_request_threaded_irq(dev, low_irq, NULL,
                prcmu_low_irq_handler, IRQF_NO_SUSPEND | IRQF_ONESHOT,
-               "dbx500_temp_low", pzone);
+               "dbx500_temp_low", th);
        if (ret < 0) {
-               dev_err(&pdev->dev, "Failed to allocate temp low irq.\n");
-               goto out_unlock;
+               dev_err(dev, "failed to allocate temp low irq\n");
+               return ret;
        }
 
        high_irq = platform_get_irq_byname(pdev, "IRQ_HOTMON_HIGH");
        if (high_irq < 0) {
-               dev_err(&pdev->dev, "Get IRQ_HOTMON_HIGH failed.\n");
-               ret = high_irq;
-               goto out_unlock;
+               dev_err(dev, "Get IRQ_HOTMON_HIGH failed\n");
+               return high_irq;
        }
 
-       ret = devm_request_threaded_irq(&pdev->dev, high_irq, NULL,
+       ret = devm_request_threaded_irq(dev, high_irq, NULL,
                prcmu_high_irq_handler, IRQF_NO_SUSPEND | IRQF_ONESHOT,
-               "dbx500_temp_high", pzone);
+               "dbx500_temp_high", th);
        if (ret < 0) {
-               dev_err(&pdev->dev, "Failed to allocate temp high irq.\n");
-               goto out_unlock;
+               dev_err(dev, "failed to allocate temp high irq\n");
+               return ret;
        }
 
-       pzone->therm_dev = thermal_zone_device_register("db8500_thermal_zone",
-               ptrips->num_trips, 0, pzone, &thdev_ops, NULL, 0, 0);
-
-       if (IS_ERR(pzone->therm_dev)) {
-               dev_err(&pdev->dev, "Register thermal zone device failed.\n");
-               ret = PTR_ERR(pzone->therm_dev);
-               goto out_unlock;
+       /* register of thermal sensor and get info from DT */
+       th->tz = devm_thermal_zone_of_sensor_register(dev, 0, th, &thdev_ops);
+       if (IS_ERR(th->tz)) {
+               dev_err(dev, "register thermal zone sensor failed\n");
+               return PTR_ERR(th->tz);
        }
-       dev_info(&pdev->dev, "Thermal zone device registered.\n");
-
-       dft_low = PRCMU_DEFAULT_LOW_TEMP;
-       dft_high = ptrips->trip_points[0].temp;
-
-       db8500_thermal_update_config(pzone, 0, THERMAL_TREND_STABLE,
-               dft_low, dft_high);
-
-       platform_set_drvdata(pdev, pzone);
-       pzone->mode = THERMAL_DEVICE_ENABLED;
+       dev_info(dev, "thermal zone sensor registered\n");
 
-out_unlock:
-       mutex_unlock(&pzone->th_lock);
+       /* Start measuring at the lowest point */
+       db8500_thermal_update_config(th, 0, THERMAL_TREND_STABLE,
+                                    PRCMU_DEFAULT_LOW_TEMP,
+                                    db8500_thermal_points[0]);
 
-       return ret;
-}
-
-static int db8500_thermal_remove(struct platform_device *pdev)
-{
-       struct db8500_thermal_zone *pzone = platform_get_drvdata(pdev);
-
-       thermal_zone_device_unregister(pzone->therm_dev);
-       cancel_work_sync(&pzone->therm_work);
-       mutex_destroy(&pzone->th_lock);
+       platform_set_drvdata(pdev, th);
 
        return 0;
 }
@@ -475,9 +222,6 @@ static int db8500_thermal_remove(struct platform_device *pdev)
 static int db8500_thermal_suspend(struct platform_device *pdev,
                pm_message_t state)
 {
-       struct db8500_thermal_zone *pzone = platform_get_drvdata(pdev);
-
-       flush_work(&pzone->therm_work);
        prcmu_stop_temp_sense();
 
        return 0;
@@ -485,26 +229,21 @@ static int db8500_thermal_suspend(struct platform_device *pdev,
 
 static int db8500_thermal_resume(struct platform_device *pdev)
 {
-       struct db8500_thermal_zone *pzone = platform_get_drvdata(pdev);
-       struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
-       unsigned long dft_low, dft_high;
-
-       dft_low = PRCMU_DEFAULT_LOW_TEMP;
-       dft_high = ptrips->trip_points[0].temp;
+       struct db8500_thermal_zone *th = platform_get_drvdata(pdev);
 
-       db8500_thermal_update_config(pzone, 0, THERMAL_TREND_STABLE,
-               dft_low, dft_high);
+       /* Resume and start measuring at the lowest point */
+       db8500_thermal_update_config(th, 0, THERMAL_TREND_STABLE,
+                                    PRCMU_DEFAULT_LOW_TEMP,
+                                    db8500_thermal_points[0]);
 
        return 0;
 }
 
-#ifdef CONFIG_OF
 static const struct of_device_id db8500_thermal_match[] = {
        { .compatible = "stericsson,db8500-thermal" },
        {},
 };
 MODULE_DEVICE_TABLE(of, db8500_thermal_match);
-#endif
 
 static struct platform_driver db8500_thermal_driver = {
        .driver = {
@@ -514,7 +253,6 @@ static struct platform_driver db8500_thermal_driver = {
        .probe = db8500_thermal_probe,
        .suspend = db8500_thermal_suspend,
        .resume = db8500_thermal_resume,
-       .remove = db8500_thermal_remove,
 };
 
 module_platform_driver(db8500_thermal_driver);
index 9716bc3..7130e90 100644 (file)
@@ -77,9 +77,6 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
        struct acpi_buffer element = { 0, NULL };
        struct acpi_buffer trt_format = { sizeof("RRNNNNNN"), "RRNNNNNN" };
 
-       if (!acpi_has_method(handle, "_TRT"))
-               return -ENODEV;
-
        status = acpi_evaluate_object(handle, "_TRT", NULL, &buffer);
        if (ACPI_FAILURE(status))
                return -ENODEV;
@@ -158,9 +155,6 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
        struct acpi_buffer art_format = {
                sizeof("RRNNNNNNNNNNN"), "RRNNNNNNNNNNN" };
 
-       if (!acpi_has_method(handle, "_ART"))
-               return -ENODEV;
-
        status = acpi_evaluate_object(handle, "_ART", NULL, &buffer);
        if (ACPI_FAILURE(status))
                return -ENODEV;
index f5749d4..a7bbd85 100644 (file)
@@ -181,7 +181,7 @@ static int int3403_cdev_add(struct int3403_priv *priv)
 
        p = buf.pointer;
        if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
-               printk(KERN_WARNING "Invalid PPSS data\n");
+               pr_warn("Invalid PPSS data\n");
                kfree(buf.pointer);
                return -EFAULT;
        }
index d3446ac..89a0153 100644 (file)
@@ -39,6 +39,9 @@
 /* GeminiLake thermal reporting device */
 #define PCI_DEVICE_ID_PROC_GLK_THERMAL 0x318C
 
+/* IceLake thermal reporting device */
+#define PCI_DEVICE_ID_PROC_ICL_THERMAL 0x8a03
+
 #define DRV_NAME "proc_thermal"
 
 struct power_config {
@@ -137,6 +140,72 @@ static const struct attribute_group power_limit_attribute_group = {
        .name = "power_limits"
 };
 
+static ssize_t tcc_offset_degree_celsius_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       u64 val;
+       int err;
+
+       err = rdmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, &val);
+       if (err)
+               return err;
+
+       val = (val >> 24) & 0xff;
+       return sprintf(buf, "%d\n", (int)val);
+}
+
+static int tcc_offset_update(int tcc)
+{
+       u64 val;
+       int err;
+
+       if (!tcc)
+               return -EINVAL;
+
+       err = rdmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, &val);
+       if (err)
+               return err;
+
+       val &= ~GENMASK_ULL(31, 24);
+       val |= (tcc & 0xff) << 24;
+
+       err = wrmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, val);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+static int tcc_offset_save;
+
+static ssize_t tcc_offset_degree_celsius_store(struct device *dev,
+                               struct device_attribute *attr, const char *buf,
+                               size_t count)
+{
+       u64 val;
+       int tcc, err;
+
+       err = rdmsrl_safe(MSR_PLATFORM_INFO, &val);
+       if (err)
+               return err;
+
+       if (!(val & BIT(30)))
+               return -EACCES;
+
+       if (kstrtoint(buf, 0, &tcc))
+               return -EINVAL;
+
+       err = tcc_offset_update(tcc);
+       if (err)
+               return err;
+
+       tcc_offset_save = tcc;
+
+       return count;
+}
+
+static DEVICE_ATTR_RW(tcc_offset_degree_celsius);
+
 static int stored_tjmax; /* since it is fixed, we can have local storage */
 
 static int get_tjmax(void)
@@ -332,6 +401,7 @@ static void proc_thermal_remove(struct proc_thermal_device *proc_priv)
        acpi_remove_notify_handler(proc_priv->adev->handle,
                                   ACPI_DEVICE_NOTIFY, proc_thermal_notify);
        int340x_thermal_zone_remove(proc_priv->int340x_zone);
+       sysfs_remove_file(&proc_priv->dev->kobj, &dev_attr_tcc_offset_degree_celsius.attr);
        sysfs_remove_group(&proc_priv->dev->kobj,
                           &power_limit_attribute_group);
 }
@@ -355,8 +425,15 @@ static int int3401_add(struct platform_device *pdev)
 
        dev_info(&pdev->dev, "Creating sysfs group for PROC_THERMAL_PLATFORM_DEV\n");
 
-       return sysfs_create_group(&pdev->dev.kobj,
-                                        &power_limit_attribute_group);
+       ret = sysfs_create_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr);
+       if (ret)
+               return ret;
+
+       ret = sysfs_create_group(&pdev->dev.kobj, &power_limit_attribute_group);
+       if (ret)
+               sysfs_remove_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr);
+
+       return ret;
 }
 
 static int int3401_remove(struct platform_device *pdev)
@@ -588,8 +665,15 @@ static int  proc_thermal_pci_probe(struct pci_dev *pdev,
 
        dev_info(&pdev->dev, "Creating sysfs group for PROC_THERMAL_PCI\n");
 
-       return sysfs_create_group(&pdev->dev.kobj,
-                                        &power_limit_attribute_group);
+       ret = sysfs_create_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr);
+       if (ret)
+               return ret;
+
+       ret = sysfs_create_group(&pdev->dev.kobj, &power_limit_attribute_group);
+       if (ret)
+               sysfs_remove_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr);
+
+       return ret;
 }
 
 static void  proc_thermal_pci_remove(struct pci_dev *pdev)
@@ -615,6 +699,8 @@ static int proc_thermal_resume(struct device *dev)
        proc_dev = dev_get_drvdata(dev);
        proc_thermal_read_ppcc(proc_dev);
 
+       tcc_offset_update(tcc_offset_save);
+
        return 0;
 }
 #else
@@ -636,6 +722,8 @@ static const struct pci_device_id proc_thermal_pci_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_CNL_THERMAL)},
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_CFL_THERMAL)},
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_GLK_THERMAL)},
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_ICL_THERMAL),
+               .driver_data = (kernel_ulong_t)&rapl_mmio_hsw, },
        { 0, },
 };
 
index 99f8b25..4f0bb8f 100644 (file)
@@ -371,16 +371,14 @@ static void intel_pch_thermal_remove(struct pci_dev *pdev)
 
 static int intel_pch_thermal_suspend(struct device *device)
 {
-       struct pci_dev *pdev = to_pci_dev(device);
-       struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
+       struct pch_thermal_device *ptd = dev_get_drvdata(device);
 
        return ptd->ops->suspend(ptd);
 }
 
 static int intel_pch_thermal_resume(struct device *device)
 {
-       struct pci_dev *pdev = to_pci_dev(device);
-       struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
+       struct pch_thermal_device *ptd = dev_get_drvdata(device);
 
        return ptd->ops->resume(ptd);
 }
index 8d9b721..e46a4e3 100644 (file)
@@ -229,6 +229,8 @@ static int calibrate_8960(struct tsens_priv *priv)
        for (i = 0; i < num_read; i++, s++)
                s->offset = data[i];
 
+       kfree(data);
+
        return 0;
 }
 
index 6f26fad..055647b 100644 (file)
@@ -145,8 +145,10 @@ static int calibrate_8916(struct tsens_priv *priv)
                return PTR_ERR(qfprom_cdata);
 
        qfprom_csel = (u32 *)qfprom_read(priv->dev, "calib_sel");
-       if (IS_ERR(qfprom_csel))
+       if (IS_ERR(qfprom_csel)) {
+               kfree(qfprom_cdata);
                return PTR_ERR(qfprom_csel);
+       }
 
        mode = (qfprom_csel[0] & MSM8916_CAL_SEL_MASK) >> MSM8916_CAL_SEL_SHIFT;
        dev_dbg(priv->dev, "calibration mode is %d\n", mode);
@@ -181,6 +183,8 @@ static int calibrate_8916(struct tsens_priv *priv)
        }
 
        compute_intercept_slope(priv, p1, p2, mode);
+       kfree(qfprom_cdata);
+       kfree(qfprom_csel);
 
        return 0;
 }
@@ -198,8 +202,10 @@ static int calibrate_8974(struct tsens_priv *priv)
                return PTR_ERR(calib);
 
        bkp = (u32 *)qfprom_read(priv->dev, "calib_backup");
-       if (IS_ERR(bkp))
+       if (IS_ERR(bkp)) {
+               kfree(calib);
                return PTR_ERR(bkp);
+       }
 
        calib_redun_sel =  bkp[1] & BKP_REDUN_SEL;
        calib_redun_sel >>= BKP_REDUN_SHIFT;
@@ -313,6 +319,8 @@ static int calibrate_8974(struct tsens_priv *priv)
        }
 
        compute_intercept_slope(priv, p1, p2, mode);
+       kfree(calib);
+       kfree(bkp);
 
        return 0;
 }
index 10b595d..870f502 100644 (file)
@@ -138,6 +138,7 @@ static int calibrate_v1(struct tsens_priv *priv)
        }
 
        compute_intercept_slope(priv, p1, p2, mode);
+       kfree(qfprom_cdata);
 
        return 0;
 }
index 2fd9499..b89083b 100644 (file)
@@ -17,6 +17,7 @@
 
 #include <linux/thermal.h>
 #include <linux/regmap.h>
+#include <linux/slab.h>
 
 struct tsens_priv;
 
index 7b36493..39542c6 100644 (file)
@@ -2,6 +2,7 @@
 //
 // Copyright 2016 Freescale Semiconductor, Inc.
 
+#include <linux/clk.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/err.h>
@@ -72,6 +73,7 @@ struct qoriq_sensor {
 
 struct qoriq_tmu_data {
        struct qoriq_tmu_regs __iomem *regs;
+       struct clk *clk;
        bool little_endian;
        struct qoriq_sensor     *sensor[SITES_MAX];
 };
@@ -202,32 +204,39 @@ static int qoriq_tmu_probe(struct platform_device *pdev)
 
        data->little_endian = of_property_read_bool(np, "little-endian");
 
-       data->regs = of_iomap(np, 0);
-       if (!data->regs) {
+       data->regs = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(data->regs)) {
                dev_err(&pdev->dev, "Failed to get memory region\n");
-               ret = -ENODEV;
-               goto err_iomap;
+               return PTR_ERR(data->regs);
+       }
+
+       data->clk = devm_clk_get_optional(&pdev->dev, NULL);
+       if (IS_ERR(data->clk))
+               return PTR_ERR(data->clk);
+
+       ret = clk_prepare_enable(data->clk);
+       if (ret) {
+               dev_err(&pdev->dev, "Failed to enable clock\n");
+               return ret;
        }
 
        qoriq_tmu_init_device(data);    /* TMU initialization */
 
        ret = qoriq_tmu_calibration(pdev);      /* TMU calibration */
        if (ret < 0)
-               goto err_tmu;
+               goto err;
 
        ret = qoriq_tmu_register_tmu_zone(pdev);
        if (ret < 0) {
                dev_err(&pdev->dev, "Failed to register sensors\n");
                ret = -ENODEV;
-               goto err_iomap;
+               goto err;
        }
 
        return 0;
 
-err_tmu:
-       iounmap(data->regs);
-
-err_iomap:
+err:
+       clk_disable_unprepare(data->clk);
        platform_set_drvdata(pdev, NULL);
 
        return ret;
@@ -240,14 +249,14 @@ static int qoriq_tmu_remove(struct platform_device *pdev)
        /* Disable monitoring */
        tmu_write(data, TMR_DISABLE, &data->regs->tmr);
 
-       iounmap(data->regs);
+       clk_disable_unprepare(data->clk);
+
        platform_set_drvdata(pdev, NULL);
 
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int qoriq_tmu_suspend(struct device *dev)
+static int __maybe_unused qoriq_tmu_suspend(struct device *dev)
 {
        u32 tmr;
        struct qoriq_tmu_data *data = dev_get_drvdata(dev);
@@ -257,14 +266,21 @@ static int qoriq_tmu_suspend(struct device *dev)
        tmr &= ~TMR_ME;
        tmu_write(data, tmr, &data->regs->tmr);
 
+       clk_disable_unprepare(data->clk);
+
        return 0;
 }
 
-static int qoriq_tmu_resume(struct device *dev)
+static int __maybe_unused qoriq_tmu_resume(struct device *dev)
 {
        u32 tmr;
+       int ret;
        struct qoriq_tmu_data *data = dev_get_drvdata(dev);
 
+       ret = clk_prepare_enable(data->clk);
+       if (ret)
+               return ret;
+
        /* Enable monitoring */
        tmr = tmu_read(data, &data->regs->tmr);
        tmr |= TMR_ME;
@@ -272,7 +288,6 @@ static int qoriq_tmu_resume(struct device *dev)
 
        return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(qoriq_tmu_pm_ops,
                         qoriq_tmu_suspend, qoriq_tmu_resume);
index a564633..755d2b5 100644 (file)
@@ -443,9 +443,8 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
                if (ret)
                        goto error_unregister;
 
-               ret = devm_add_action(dev, rcar_gen3_hwmon_action, zone);
+               ret = devm_add_action_or_reset(dev, rcar_gen3_hwmon_action, zone);
                if (ret) {
-                       rcar_gen3_hwmon_action(zone);
                        goto error_unregister;
                }
 
index 43941eb..5acaad3 100644 (file)
 /* get dividend from the depth */
 #define THROT_DEPTH_DIVIDEND(depth)    ((256 * (100 - (depth)) / 100) - 1)
 
-/* gk20a nv_therm interface N:3 Mapping. Levels defined in tegra124-sochterm.h
+/* gk20a nv_therm interface N:3 Mapping. Levels defined in tegra124-soctherm.h
  * level       vector
  * NONE                3'b000
  * LOW         3'b001
index 6bab66e..d4481cc 100644 (file)
@@ -304,7 +304,7 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz,
                                 &tz->poll_queue,
                                 msecs_to_jiffies(delay));
        else
-               cancel_delayed_work(&tz->poll_queue);
+               cancel_delayed_work_sync(&tz->poll_queue);
 }
 
 static void monitor_thermal_zone(struct thermal_zone_device *tz)
@@ -985,7 +985,7 @@ __thermal_cooling_device_register(struct device_node *np,
        result = device_register(&cdev->device);
        if (result) {
                ida_simple_remove(&thermal_cdev_ida, cdev->id);
-               kfree(cdev);
+               put_device(&cdev->device);
                return ERR_PTR(result);
        }
 
@@ -1240,21 +1240,31 @@ thermal_zone_device_register(const char *type, int trips, int mask,
        struct thermal_zone_device *tz;
        enum thermal_trip_type trip_type;
        int trip_temp;
+       int id;
        int result;
        int count;
        struct thermal_governor *governor;
 
-       if (!type || strlen(type) == 0)
+       if (!type || strlen(type) == 0) {
+               pr_err("Error: No thermal zone type defined\n");
                return ERR_PTR(-EINVAL);
+       }
 
-       if (type && strlen(type) >= THERMAL_NAME_LENGTH)
+       if (type && strlen(type) >= THERMAL_NAME_LENGTH) {
+               pr_err("Error: Thermal zone name (%s) too long, should be under %d chars\n",
+                      type, THERMAL_NAME_LENGTH);
                return ERR_PTR(-EINVAL);
+       }
 
-       if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips)
+       if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips) {
+               pr_err("Error: Incorrect number of thermal trips\n");
                return ERR_PTR(-EINVAL);
+       }
 
-       if (!ops)
+       if (!ops) {
+               pr_err("Error: Thermal zone device ops not defined\n");
                return ERR_PTR(-EINVAL);
+       }
 
        if (trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp))
                return ERR_PTR(-EINVAL);
@@ -1266,11 +1276,13 @@ thermal_zone_device_register(const char *type, int trips, int mask,
        INIT_LIST_HEAD(&tz->thermal_instances);
        ida_init(&tz->ida);
        mutex_init(&tz->lock);
-       result = ida_simple_get(&thermal_tz_ida, 0, 0, GFP_KERNEL);
-       if (result < 0)
+       id = ida_simple_get(&thermal_tz_ida, 0, 0, GFP_KERNEL);
+       if (id < 0) {
+               result = id;
                goto free_tz;
+       }
 
-       tz->id = result;
+       tz->id = id;
        strlcpy(tz->type, type, sizeof(tz->type));
        tz->ops = ops;
        tz->tzp = tzp;
@@ -1292,7 +1304,7 @@ thermal_zone_device_register(const char *type, int trips, int mask,
        dev_set_name(&tz->device, "thermal_zone%d", tz->id);
        result = device_register(&tz->device);
        if (result)
-               goto remove_device_groups;
+               goto release_device;
 
        for (count = 0; count < trips; count++) {
                if (tz->ops->get_trip_type(tz, count, &trip_type))
@@ -1343,14 +1355,12 @@ thermal_zone_device_register(const char *type, int trips, int mask,
        return tz;
 
 unregister:
-       ida_simple_remove(&thermal_tz_ida, tz->id);
-       device_unregister(&tz->device);
-       return ERR_PTR(result);
-
-remove_device_groups:
-       thermal_zone_destroy_device_groups(tz);
+       device_del(&tz->device);
+release_device:
+       put_device(&tz->device);
+       tz = NULL;
 remove_id:
-       ida_simple_remove(&thermal_tz_ida, tz->id);
+       ida_simple_remove(&thermal_tz_ida, id);
 free_tz:
        kfree(tz);
        return ERR_PTR(result);
index 40c69a5..dd5d8ee 100644 (file)
@@ -87,13 +87,17 @@ static struct thermal_hwmon_device *
 thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz)
 {
        struct thermal_hwmon_device *hwmon;
+       char type[THERMAL_NAME_LENGTH];
 
        mutex_lock(&thermal_hwmon_list_lock);
-       list_for_each_entry(hwmon, &thermal_hwmon_list, node)
-               if (!strcmp(hwmon->type, tz->type)) {
+       list_for_each_entry(hwmon, &thermal_hwmon_list, node) {
+               strcpy(type, tz->type);
+               strreplace(type, '-', '_');
+               if (!strcmp(hwmon->type, type)) {
                        mutex_unlock(&thermal_hwmon_list_lock);
                        return hwmon;
                }
+       }
        mutex_unlock(&thermal_hwmon_list_lock);
 
        return NULL;
index de3ccee..40524fa 100644 (file)
@@ -53,13 +53,6 @@ static int thermal_mmio_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (IS_ERR(resource)) {
-               dev_err(&pdev->dev,
-                       "fail to get platform memory resource (%ld)\n",
-                       PTR_ERR(resource));
-               return PTR_ERR(resource);
-       }
-
        sensor->mmio_base = devm_ioremap_resource(&pdev->dev, resource);
        if (IS_ERR(sensor->mmio_base)) {
                dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n",
index e55c79e..98361ac 100644 (file)
@@ -968,6 +968,11 @@ static int __init n_hdlc_init(void)
        
 }      /* end of init_module() */
 
+#ifdef CONFIG_SPARC
+#undef __exitdata
+#define __exitdata
+#endif
+
 static const char hdlc_unregister_ok[] __exitdata =
        KERN_INFO "N_HDLC: line discipline unregistered\n";
 static const char hdlc_unregister_fail[] __exitdata =
index 02c5aff..8df89e9 100644 (file)
@@ -72,8 +72,8 @@ static int serial_8250_men_mcb_probe(struct mcb_device *mdev,
 {
        struct serial_8250_men_mcb_data *data;
        struct resource *mem;
-       unsigned int num_ports;
-       unsigned int i;
+       int num_ports;
+       int i;
        void __iomem *membase;
 
        mem = mcb_get_resource(mdev, IORESOURCE_MEM);
@@ -88,7 +88,7 @@ static int serial_8250_men_mcb_probe(struct mcb_device *mdev,
        dev_dbg(&mdev->dev, "found a 16z%03u with %u ports\n",
                mdev->id, num_ports);
 
-       if (num_ports == 0 || num_ports > 4) {
+       if (num_ports <= 0 || num_ports > 4) {
                dev_err(&mdev->dev, "unexpected number of ports: %u\n",
                        num_ports);
                return -ENODEV;
@@ -133,7 +133,7 @@ static int serial_8250_men_mcb_probe(struct mcb_device *mdev,
 
 static void serial_8250_men_mcb_remove(struct mcb_device *mdev)
 {
-       unsigned int num_ports, i;
+       int num_ports, i;
        struct serial_8250_men_mcb_data *data = mcb_get_drvdata(mdev);
 
        if (!data)
index c68e2b3..836e736 100644 (file)
@@ -141,7 +141,7 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
 
        serial8250_do_set_mctrl(port, mctrl);
 
-       if (!up->gpios) {
+       if (!mctrl_gpio_to_gpiod(up->gpios, UART_GPIO_RTS)) {
                /*
                 * Turn off autoRTS if RTS is lowered and restore autoRTS
                 * setting if RTS is raised
@@ -456,7 +456,8 @@ static void omap_8250_set_termios(struct uart_port *port,
        up->port.status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS | UPSTAT_AUTOXOFF);
 
        if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW &&
-           !up->gpios) {
+           !mctrl_gpio_to_gpiod(up->gpios, UART_GPIO_RTS) &&
+           !mctrl_gpio_to_gpiod(up->gpios, UART_GPIO_CTS)) {
                /* Enable AUTOCTS (autoRTS is enabled when RTS is raised) */
                up->port.status |= UPSTAT_AUTOCTS | UPSTAT_AUTORTS;
                priv->efr |= UART_EFR_CTS;
index 4789b5d..67a9eb3 100644 (file)
@@ -1032,6 +1032,7 @@ config SERIAL_SIFIVE_CONSOLE
        bool "Console on SiFive UART"
        depends on SERIAL_SIFIVE=y
        select SERIAL_CORE_CONSOLE
+       select SERIAL_EARLYCON
        help
          Select this option if you would like to use a SiFive UART as the
          system console.
index 68d74f2..a32f0d2 100644 (file)
@@ -3,7 +3,7 @@
  * Freescale linflexuart serial port driver
  *
  * Copyright 2012-2016 Freescale Semiconductor, Inc.
- * Copyright 2017-2018 NXP
+ * Copyright 2017-2019 NXP
  */
 
 #if defined(CONFIG_SERIAL_FSL_LINFLEXUART_CONSOLE) && \
@@ -246,12 +246,14 @@ static irqreturn_t linflex_rxint(int irq, void *dev_id)
        struct tty_port *port = &sport->state->port;
        unsigned long flags, status;
        unsigned char rx;
+       bool brk;
 
        spin_lock_irqsave(&sport->lock, flags);
 
        status = readl(sport->membase + UARTSR);
        while (status & LINFLEXD_UARTSR_RMB) {
                rx = readb(sport->membase + BDRM);
+               brk = false;
                flg = TTY_NORMAL;
                sport->icount.rx++;
 
@@ -261,8 +263,11 @@ static irqreturn_t linflex_rxint(int irq, void *dev_id)
                                status |= LINFLEXD_UARTSR_SZF;
                        if (status & LINFLEXD_UARTSR_BOF)
                                status |= LINFLEXD_UARTSR_BOF;
-                       if (status & LINFLEXD_UARTSR_FEF)
+                       if (status & LINFLEXD_UARTSR_FEF) {
+                               if (!rx)
+                                       brk = true;
                                status |= LINFLEXD_UARTSR_FEF;
+                       }
                        if (status & LINFLEXD_UARTSR_PE)
                                status |=  LINFLEXD_UARTSR_PE;
                }
@@ -271,13 +276,15 @@ static irqreturn_t linflex_rxint(int irq, void *dev_id)
                       sport->membase + UARTSR);
                status = readl(sport->membase + UARTSR);
 
-               if (uart_handle_sysrq_char(sport, (unsigned char)rx))
-                       continue;
-
+               if (brk) {
+                       uart_handle_break(sport);
+               } else {
 #ifdef SUPPORT_SYSRQ
-                       sport->sysrq = 0;
+                       if (uart_handle_sysrq_char(sport, (unsigned char)rx))
+                               continue;
 #endif
-               tty_insert_flip_char(port, rx, flg);
+                       tty_insert_flip_char(port, rx, flg);
+               }
        }
 
        spin_unlock_irqrestore(&sport->lock, flags);
index 3e17bb8..537896c 100644 (file)
@@ -548,7 +548,7 @@ static void lpuart_flush_buffer(struct uart_port *port)
                val |= UARTFIFO_TXFLUSH | UARTFIFO_RXFLUSH;
                lpuart32_write(&sport->port, val, UARTFIFO);
        } else {
-               val = readb(sport->port.membase + UARTPFIFO);
+               val = readb(sport->port.membase + UARTCFIFO);
                val |= UARTCFIFO_TXFLUSH | UARTCFIFO_RXFLUSH;
                writeb(val, sport->port.membase + UARTCFIFO);
        }
index 87c58f9..5e08f26 100644 (file)
@@ -2222,8 +2222,8 @@ static int imx_uart_probe(struct platform_device *pdev)
                return PTR_ERR(base);
 
        rxirq = platform_get_irq(pdev, 0);
-       txirq = platform_get_irq(pdev, 1);
-       rtsirq = platform_get_irq(pdev, 2);
+       txirq = platform_get_irq_optional(pdev, 1);
+       rtsirq = platform_get_irq_optional(pdev, 2);
 
        sport->port.dev = &pdev->dev;
        sport->port.mapbase = res->start;
index 03963af..d2d8b34 100644 (file)
@@ -740,7 +740,7 @@ static int __init owl_uart_init(void)
        return ret;
 }
 
-static void __init owl_uart_exit(void)
+static void __exit owl_uart_exit(void)
 {
        platform_driver_unregister(&owl_uart_platform_driver);
        uart_unregister_driver(&owl_uart_driver);
index c1b0d76..ff9a27d 100644 (file)
@@ -815,7 +815,7 @@ static int __init rda_uart_init(void)
        return ret;
 }
 
-static void __init rda_uart_exit(void)
+static void __exit rda_uart_exit(void)
 {
        platform_driver_unregister(&rda_uart_platform_driver);
        uart_unregister_driver(&rda_uart_driver);
index 4223cb4..c4a414a 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/serial_core.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
+#include <linux/security.h>
 
 #include <linux/irq.h>
 #include <linux/uaccess.h>
@@ -862,6 +863,10 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port,
                goto check_and_exit;
        }
 
+       retval = security_locked_down(LOCKDOWN_TIOCSSERIAL);
+       if (retval && (change_irq || change_port))
+               goto exit;
+
        /*
         * Ask the low level driver to verify the settings.
         */
@@ -1959,8 +1964,10 @@ uart_get_console(struct uart_port *ports, int nr, struct console *co)
  *        console=<name>,io|mmio|mmio16|mmio32|mmio32be|mmio32native,<addr>,<options>
  *
  *     The optional form
+ *
  *        earlycon=<name>,0x<addr>,<options>
  *        console=<name>,0x<addr>,<options>
+ *
  *     is also accepted; the returned @iotype will be UPIO_MEM.
  *
  *     Returns 0 on success or -EINVAL on failure
index d907430..fb47812 100644 (file)
@@ -66,6 +66,9 @@ EXPORT_SYMBOL_GPL(mctrl_gpio_set);
 struct gpio_desc *mctrl_gpio_to_gpiod(struct mctrl_gpios *gpios,
                                      enum mctrl_gpio_idx gidx)
 {
+       if (gpios == NULL)
+               return NULL;
+
        return gpios->gpio[gidx];
 }
 EXPORT_SYMBOL_GPL(mctrl_gpio_to_gpiod);
index 4e754a4..22e5d4e 100644 (file)
@@ -2894,8 +2894,12 @@ static int sci_init_single(struct platform_device *dev,
        port->mapbase = res->start;
        sci_port->reg_size = resource_size(res);
 
-       for (i = 0; i < ARRAY_SIZE(sci_port->irqs); ++i)
-               sci_port->irqs[i] = platform_get_irq(dev, i);
+       for (i = 0; i < ARRAY_SIZE(sci_port->irqs); ++i) {
+               if (i)
+                       sci_port->irqs[i] = platform_get_irq_optional(dev, i);
+               else
+                       sci_port->irqs[i] = platform_get_irq(dev, i);
+       }
 
        /* The SCI generates several interrupts. They can be muxed together or
         * connected to different interrupt lines. In the muxed case only one
index b8b912b..06e79c1 100644 (file)
@@ -897,7 +897,8 @@ static int __init ulite_init(void)
 static void __exit ulite_exit(void)
 {
        platform_driver_unregister(&ulite_platform_driver);
-       uart_unregister_driver(&ulite_uart_driver);
+       if (ulite_uart_driver.state)
+               uart_unregister_driver(&ulite_uart_driver);
 }
 
 module_init(ulite_init);
index da4563a..4e55bc3 100644 (file)
@@ -1550,7 +1550,6 @@ static int cdns_uart_probe(struct platform_device *pdev)
                goto err_out_id;
        }
 
-       uartps_major = cdns_uart_uart_driver->tty_driver->major;
        cdns_uart_data->cdns_uart_driver = cdns_uart_uart_driver;
 
        /*
@@ -1680,6 +1679,7 @@ static int cdns_uart_probe(struct platform_device *pdev)
                console_port = NULL;
 #endif
 
+       uartps_major = cdns_uart_uart_driver->tty_driver->major;
        cdns_uart_data->cts_override = of_property_read_bool(pdev->dev.of_node,
                                                             "cts-override");
        return 0;
@@ -1741,6 +1741,12 @@ static int cdns_uart_remove(struct platform_device *pdev)
                console_port = NULL;
 #endif
 
+       /* If this is last instance major number should be initialized */
+       mutex_lock(&bitmap_lock);
+       if (bitmap_empty(bitmap, MAX_UART_INSTANCES))
+               uartps_major = 0;
+       mutex_unlock(&bitmap_lock);
+
        uart_unregister_driver(cdns_uart_data->cdns_uart_driver);
        return rc;
 }
index c41ddb6..b0a29ef 100644 (file)
@@ -159,8 +159,9 @@ static int cdns3_pci_probe(struct pci_dev *pdev,
                wrap->plat_dev = platform_device_register_full(&plat_info);
                if (IS_ERR(wrap->plat_dev)) {
                        pci_disable_device(pdev);
+                       err = PTR_ERR(wrap->plat_dev);
                        kfree(wrap);
-                       return PTR_ERR(wrap->plat_dev);
+                       return err;
                }
        }
 
index 06f1e10..c2123ef 100644 (file)
@@ -160,10 +160,30 @@ static int cdns3_core_init_role(struct cdns3 *cdns)
        if (ret)
                goto err;
 
-       if (cdns->dr_mode != USB_DR_MODE_OTG) {
+       /* Initialize idle role to start with */
+       ret = cdns3_role_start(cdns, USB_ROLE_NONE);
+       if (ret)
+               goto err;
+
+       switch (cdns->dr_mode) {
+       case USB_DR_MODE_OTG:
                ret = cdns3_hw_role_switch(cdns);
                if (ret)
                        goto err;
+               break;
+       case USB_DR_MODE_PERIPHERAL:
+               ret = cdns3_role_start(cdns, USB_ROLE_DEVICE);
+               if (ret)
+                       goto err;
+               break;
+       case USB_DR_MODE_HOST:
+               ret = cdns3_role_start(cdns, USB_ROLE_HOST);
+               if (ret)
+                       goto err;
+               break;
+       default:
+               ret = -EINVAL;
+               goto err;
        }
 
        return ret;
index 44f652e..e71240b 100644 (file)
@@ -234,9 +234,11 @@ static int cdns3_req_ep0_set_address(struct cdns3_device *priv_dev,
 static int cdns3_req_ep0_get_status(struct cdns3_device *priv_dev,
                                    struct usb_ctrlrequest *ctrl)
 {
+       struct cdns3_endpoint *priv_ep;
        __le16 *response_pkt;
        u16 usb_status = 0;
        u32 recip;
+       u8 index;
 
        recip = ctrl->bRequestType & USB_RECIP_MASK;
 
@@ -262,9 +264,13 @@ static int cdns3_req_ep0_get_status(struct cdns3_device *priv_dev,
        case USB_RECIP_INTERFACE:
                return cdns3_ep0_delegate_req(priv_dev, ctrl);
        case USB_RECIP_ENDPOINT:
-               /* check if endpoint is stalled */
+               index = cdns3_ep_addr_to_index(ctrl->wIndex);
+               priv_ep = priv_dev->eps[index];
+
+               /* check if endpoint is stalled or stall is pending */
                cdns3_select_ep(priv_dev, ctrl->wIndex);
-               if (EP_STS_STALL(readl(&priv_dev->regs->ep_sts)))
+               if (EP_STS_STALL(readl(&priv_dev->regs->ep_sts)) ||
+                   (priv_ep->flags & EP_STALL_PENDING))
                        usb_status =  BIT(USB_ENDPOINT_HALT);
                break;
        default:
@@ -332,7 +338,7 @@ static int cdns3_ep0_feature_handle_device(struct cdns3_device *priv_dev,
                         * for sending status stage.
                         * This time should be less then 3ms.
                         */
-                       usleep_range(1000, 2000);
+                       mdelay(1);
                        cdns3_set_register_bit(&priv_dev->regs->usb_cmd,
                                               USB_CMD_STMODE |
                                               USB_STS_TMODE_SEL(tmode - 1));
index 228cdc4..4c1e755 100644 (file)
@@ -1145,6 +1145,14 @@ static void cdns3_transfer_completed(struct cdns3_device *priv_dev,
                request = cdns3_next_request(&priv_ep->pending_req_list);
                priv_req = to_cdns3_request(request);
 
+               trb = priv_ep->trb_pool + priv_ep->dequeue;
+
+               /* Request was dequeued and TRB was changed to TRB_LINK. */
+               if (TRB_FIELD_TO_TYPE(trb->control) == TRB_LINK) {
+                       trace_cdns3_complete_trb(priv_ep, trb);
+                       cdns3_move_deq_to_next_trb(priv_req);
+               }
+
                /* Re-select endpoint. It could be changed by other CPU during
                 * handling usb_gadget_giveback_request.
                 */
@@ -2067,6 +2075,7 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep,
        struct usb_request *req, *req_temp;
        struct cdns3_request *priv_req;
        struct cdns3_trb *link_trb;
+       u8 req_on_hw_ring = 0;
        unsigned long flags;
        int ret = 0;
 
@@ -2083,8 +2092,10 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep,
 
        list_for_each_entry_safe(req, req_temp, &priv_ep->pending_req_list,
                                 list) {
-               if (request == req)
+               if (request == req) {
+                       req_on_hw_ring = 1;
                        goto found;
+               }
        }
 
        list_for_each_entry_safe(req, req_temp, &priv_ep->deferred_req_list,
@@ -2096,27 +2107,21 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep,
        goto not_found;
 
 found:
-
-       if (priv_ep->wa1_trb == priv_req->trb)
-               cdns3_wa1_restore_cycle_bit(priv_ep);
-
        link_trb = priv_req->trb;
-       cdns3_move_deq_to_next_trb(priv_req);
-       cdns3_gadget_giveback(priv_ep, priv_req, -ECONNRESET);
-
-       /* Update ring */
-       request = cdns3_next_request(&priv_ep->deferred_req_list);
-       if (request) {
-               priv_req = to_cdns3_request(request);
 
+       /* Update ring only if removed request is on pending_req_list list */
+       if (req_on_hw_ring) {
                link_trb->buffer = TRB_BUFFER(priv_ep->trb_pool_dma +
                                              (priv_req->start_trb * TRB_SIZE));
                link_trb->control = (link_trb->control & TRB_CYCLE) |
-                                   TRB_TYPE(TRB_LINK) | TRB_CHAIN | TRB_TOGGLE;
-       } else {
-               priv_ep->flags |= EP_UPDATE_EP_TRBADDR;
+                                   TRB_TYPE(TRB_LINK) | TRB_CHAIN;
+
+               if (priv_ep->wa1_trb == priv_req->trb)
+                       cdns3_wa1_restore_cycle_bit(priv_ep);
        }
 
+       cdns3_gadget_giveback(priv_ep, priv_req, -ECONNRESET);
+
 not_found:
        spin_unlock_irqrestore(&priv_dev->lock, flags);
        return ret;
@@ -2324,8 +2329,6 @@ static void cdns3_gadget_config(struct cdns3_device *priv_dev)
        writel(USB_CONF_CLK2OFFDS | USB_CONF_L1DS, &regs->usb_conf);
 
        cdns3_configure_dmult(priv_dev, NULL);
-
-       cdns3_gadget_pullup(&priv_dev->gadget, 1);
 }
 
 /**
@@ -2340,9 +2343,35 @@ static int cdns3_gadget_udc_start(struct usb_gadget *gadget,
 {
        struct cdns3_device *priv_dev = gadget_to_cdns3_device(gadget);
        unsigned long flags;
+       enum usb_device_speed max_speed = driver->max_speed;
 
        spin_lock_irqsave(&priv_dev->lock, flags);
        priv_dev->gadget_driver = driver;
+
+       /* limit speed if necessary */
+       max_speed = min(driver->max_speed, gadget->max_speed);
+
+       switch (max_speed) {
+       case USB_SPEED_FULL:
+               writel(USB_CONF_SFORCE_FS, &priv_dev->regs->usb_conf);
+               writel(USB_CONF_USB3DIS, &priv_dev->regs->usb_conf);
+               break;
+       case USB_SPEED_HIGH:
+               writel(USB_CONF_USB3DIS, &priv_dev->regs->usb_conf);
+               break;
+       case USB_SPEED_SUPER:
+               break;
+       default:
+               dev_err(priv_dev->dev,
+                       "invalid maximum_speed parameter %d\n",
+                       max_speed);
+               /* fall through */
+       case USB_SPEED_UNKNOWN:
+               /* default to superspeed */
+               max_speed = USB_SPEED_SUPER;
+               break;
+       }
+
        cdns3_gadget_config(priv_dev);
        spin_unlock_irqrestore(&priv_dev->lock, flags);
        return 0;
@@ -2376,6 +2405,8 @@ static int cdns3_gadget_udc_stop(struct usb_gadget *gadget)
                writel(EP_CMD_EPRST, &priv_dev->regs->ep_cmd);
                readl_poll_timeout_atomic(&priv_dev->regs->ep_cmd, val,
                                          !(val & EP_CMD_EPRST), 1, 100);
+
+               priv_ep->flags &= ~EP_CLAIMED;
        }
 
        /* disable interrupt for device */
@@ -2570,11 +2601,7 @@ static int cdns3_gadget_start(struct cdns3 *cdns)
        /* Check the maximum_speed parameter */
        switch (max_speed) {
        case USB_SPEED_FULL:
-               writel(USB_CONF_SFORCE_FS, &priv_dev->regs->usb_conf);
-               break;
        case USB_SPEED_HIGH:
-               writel(USB_CONF_USB3DIS, &priv_dev->regs->usb_conf);
-               break;
        case USB_SPEED_SUPER:
                break;
        default:
@@ -2662,6 +2689,13 @@ static int __cdns3_gadget_init(struct cdns3 *cdns)
 {
        int ret = 0;
 
+       /* Ensure 32-bit DMA Mask in case we switched back from Host mode */
+       ret = dma_set_mask_and_coherent(cdns->dev, DMA_BIT_MASK(32));
+       if (ret) {
+               dev_err(cdns->dev, "Failed to set dma mask: %d\n", ret);
+               return ret;
+       }
+
        cdns3_drd_switch_gadget(cdns, 1);
        pm_runtime_get_sync(cdns->dev);
 
@@ -2700,8 +2734,6 @@ static int cdns3_gadget_suspend(struct cdns3 *cdns, bool do_wakeup)
        /* disable interrupt for device */
        writel(0, &priv_dev->regs->usb_ien);
 
-       cdns3_gadget_pullup(&priv_dev->gadget, 0);
-
        return 0;
 }
 
index b498a17..ae11810 100644 (file)
@@ -12,7 +12,6 @@
 #ifdef CONFIG_USB_CDNS3_HOST
 
 int cdns3_host_init(struct cdns3 *cdns);
-void cdns3_host_exit(struct cdns3 *cdns);
 
 #else
 
index 2733a8f..ad788bf 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/platform_device.h>
 #include "core.h"
 #include "drd.h"
+#include "host-export.h"
 
 static int __cdns3_host_init(struct cdns3 *cdns)
 {
index 7fea499..0d8e3f3 100644 (file)
@@ -445,6 +445,7 @@ static void usblp_cleanup(struct usblp *usblp)
        kfree(usblp->readbuf);
        kfree(usblp->device_id_string);
        kfree(usblp->statusbuf);
+       usb_put_intf(usblp->intf);
        kfree(usblp);
 }
 
@@ -461,10 +462,12 @@ static int usblp_release(struct inode *inode, struct file *file)
 
        mutex_lock(&usblp_mutex);
        usblp->used = 0;
-       if (usblp->present) {
+       if (usblp->present)
                usblp_unlink_urbs(usblp);
-               usb_autopm_put_interface(usblp->intf);
-       } else          /* finish cleanup from disconnect */
+
+       usb_autopm_put_interface(usblp->intf);
+
+       if (!usblp->present)            /* finish cleanup from disconnect */
                usblp_cleanup(usblp);
        mutex_unlock(&usblp_mutex);
        return 0;
@@ -1111,7 +1114,7 @@ static int usblp_probe(struct usb_interface *intf,
        init_waitqueue_head(&usblp->wwait);
        init_usb_anchor(&usblp->urbs);
        usblp->ifnum = intf->cur_altsetting->desc.bInterfaceNumber;
-       usblp->intf = intf;
+       usblp->intf = usb_get_intf(intf);
 
        /* Malloc device ID string buffer to the largest expected length,
         * since we can re-query it on an ioctl and a dynamic string
@@ -1196,6 +1199,7 @@ abort:
        kfree(usblp->readbuf);
        kfree(usblp->statusbuf);
        kfree(usblp->device_id_string);
+       usb_put_intf(usblp->intf);
        kfree(usblp);
 abort_ret:
        return retval;
index 151a74a..1ac1095 100644 (file)
@@ -348,6 +348,11 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
 
        /* Validate the wMaxPacketSize field */
        maxp = usb_endpoint_maxp(&endpoint->desc);
+       if (maxp == 0) {
+               dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has wMaxPacketSize 0, skipping\n",
+                   cfgno, inum, asnum, d->bEndpointAddress);
+               goto skip_to_next_endpoint_or_interface_descriptor;
+       }
 
        /* Find the highest legal maxpacket size for this endpoint */
        i = 0;          /* additional transactions per microframe */
index 89abc60..556a876 100644 (file)
@@ -102,6 +102,7 @@ config USB_DWC3_MESON_G12A
        depends on ARCH_MESON || COMPILE_TEST
        default USB_DWC3
        select USB_ROLE_SWITCH
+       select REGMAP_MMIO
        help
          Support USB2/3 functionality in Amlogic G12A platforms.
         Say 'Y' or 'M' if you have one such device.
index 999ce5e..97d6ae3 100644 (file)
@@ -312,8 +312,7 @@ static void dwc3_frame_length_adjustment(struct dwc3 *dwc)
 
        reg = dwc3_readl(dwc->regs, DWC3_GFLADJ);
        dft = reg & DWC3_GFLADJ_30MHZ_MASK;
-       if (!dev_WARN_ONCE(dwc->dev, dft == dwc->fladj,
-           "request value same as default, ignoring\n")) {
+       if (dft != dwc->fladj) {
                reg &= ~DWC3_GFLADJ_30MHZ_MASK;
                reg |= DWC3_GFLADJ_30MHZ_SDBND_SEL | dwc->fladj;
                dwc3_writel(dwc->regs, DWC3_GFLADJ, reg);
index 726100d..c946d64 100644 (file)
@@ -139,14 +139,14 @@ static int dwc3_otg_get_irq(struct dwc3 *dwc)
        struct platform_device *dwc3_pdev = to_platform_device(dwc->dev);
        int irq;
 
-       irq = platform_get_irq_byname(dwc3_pdev, "otg");
+       irq = platform_get_irq_byname_optional(dwc3_pdev, "otg");
        if (irq > 0)
                goto out;
 
        if (irq == -EPROBE_DEFER)
                goto out;
 
-       irq = platform_get_irq_byname(dwc3_pdev, "dwc_usb3");
+       irq = platform_get_irq_byname_optional(dwc3_pdev, "dwc_usb3");
        if (irq > 0)
                goto out;
 
@@ -157,9 +157,6 @@ static int dwc3_otg_get_irq(struct dwc3 *dwc)
        if (irq > 0)
                goto out;
 
-       if (irq != -EPROBE_DEFER)
-               dev_err(dwc->dev, "missing OTG IRQ\n");
-
        if (!irq)
                irq = -EINVAL;
 
index 5e8e182..023f035 100644 (file)
@@ -258,7 +258,7 @@ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
 
        ret = platform_device_add_properties(dwc->dwc3, p);
        if (ret < 0)
-               return ret;
+               goto err;
 
        ret = dwc3_pci_quirks(dwc);
        if (ret)
index 8adb59f..a9aba71 100644 (file)
@@ -707,6 +707,12 @@ static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep)
 
                dwc3_gadget_giveback(dep, req, -ESHUTDOWN);
        }
+
+       while (!list_empty(&dep->cancelled_list)) {
+               req = next_request(&dep->cancelled_list);
+
+               dwc3_gadget_giveback(dep, req, -ESHUTDOWN);
+       }
 }
 
 /**
@@ -3264,14 +3270,14 @@ static int dwc3_gadget_get_irq(struct dwc3 *dwc)
        struct platform_device *dwc3_pdev = to_platform_device(dwc->dev);
        int irq;
 
-       irq = platform_get_irq_byname(dwc3_pdev, "peripheral");
+       irq = platform_get_irq_byname_optional(dwc3_pdev, "peripheral");
        if (irq > 0)
                goto out;
 
        if (irq == -EPROBE_DEFER)
                goto out;
 
-       irq = platform_get_irq_byname(dwc3_pdev, "dwc_usb3");
+       irq = platform_get_irq_byname_optional(dwc3_pdev, "dwc_usb3");
        if (irq > 0)
                goto out;
 
@@ -3282,9 +3288,6 @@ static int dwc3_gadget_get_irq(struct dwc3 *dwc)
        if (irq > 0)
                goto out;
 
-       if (irq != -EPROBE_DEFER)
-               dev_err(dwc->dev, "missing peripheral IRQ\n");
-
        if (!irq)
                irq = -EINVAL;
 
index 8deea8c..5567ed2 100644 (file)
@@ -16,14 +16,14 @@ static int dwc3_host_get_irq(struct dwc3 *dwc)
        struct platform_device  *dwc3_pdev = to_platform_device(dwc->dev);
        int irq;
 
-       irq = platform_get_irq_byname(dwc3_pdev, "host");
+       irq = platform_get_irq_byname_optional(dwc3_pdev, "host");
        if (irq > 0)
                goto out;
 
        if (irq == -EPROBE_DEFER)
                goto out;
 
-       irq = platform_get_irq_byname(dwc3_pdev, "dwc_usb3");
+       irq = platform_get_irq_byname_optional(dwc3_pdev, "dwc_usb3");
        if (irq > 0)
                goto out;
 
@@ -34,9 +34,6 @@ static int dwc3_host_get_irq(struct dwc3 *dwc)
        if (irq > 0)
                goto out;
 
-       if (irq != -EPROBE_DEFER)
-               dev_err(dwc->dev, "missing host IRQ\n");
-
        if (!irq)
                irq = -EINVAL;
 
index d516e8d..5ec54b6 100644 (file)
@@ -2170,14 +2170,18 @@ void composite_dev_cleanup(struct usb_composite_dev *cdev)
                        usb_ep_dequeue(cdev->gadget->ep0, cdev->os_desc_req);
 
                kfree(cdev->os_desc_req->buf);
+               cdev->os_desc_req->buf = NULL;
                usb_ep_free_request(cdev->gadget->ep0, cdev->os_desc_req);
+               cdev->os_desc_req = NULL;
        }
        if (cdev->req) {
                if (cdev->setup_pending)
                        usb_ep_dequeue(cdev->gadget->ep0, cdev->req);
 
                kfree(cdev->req->buf);
+               cdev->req->buf = NULL;
                usb_ep_free_request(cdev->gadget->ep0, cdev->req);
+               cdev->req = NULL;
        }
        cdev->next_string_id = 0;
        device_remove_file(&cdev->gadget->dev, &dev_attr_suspended);
index 0251299..33852c2 100644 (file)
@@ -61,6 +61,8 @@ struct gadget_info {
        bool use_os_desc;
        char b_vendor_code;
        char qw_sign[OS_STRING_QW_SIGN_LEN];
+       spinlock_t spinlock;
+       bool unbind;
 };
 
 static inline struct gadget_info *to_gadget_info(struct config_item *item)
@@ -1244,6 +1246,7 @@ static int configfs_composite_bind(struct usb_gadget *gadget,
        int                             ret;
 
        /* the gi->lock is hold by the caller */
+       gi->unbind = 0;
        cdev->gadget = gadget;
        set_gadget_data(gadget, cdev);
        ret = composite_dev_prepare(composite, cdev);
@@ -1376,31 +1379,128 @@ static void configfs_composite_unbind(struct usb_gadget *gadget)
 {
        struct usb_composite_dev        *cdev;
        struct gadget_info              *gi;
+       unsigned long flags;
 
        /* the gi->lock is hold by the caller */
 
        cdev = get_gadget_data(gadget);
        gi = container_of(cdev, struct gadget_info, cdev);
+       spin_lock_irqsave(&gi->spinlock, flags);
+       gi->unbind = 1;
+       spin_unlock_irqrestore(&gi->spinlock, flags);
 
        kfree(otg_desc[0]);
        otg_desc[0] = NULL;
        purge_configs_funcs(gi);
        composite_dev_cleanup(cdev);
        usb_ep_autoconfig_reset(cdev->gadget);
+       spin_lock_irqsave(&gi->spinlock, flags);
        cdev->gadget = NULL;
        set_gadget_data(gadget, NULL);
+       spin_unlock_irqrestore(&gi->spinlock, flags);
+}
+
+static int configfs_composite_setup(struct usb_gadget *gadget,
+               const struct usb_ctrlrequest *ctrl)
+{
+       struct usb_composite_dev *cdev;
+       struct gadget_info *gi;
+       unsigned long flags;
+       int ret;
+
+       cdev = get_gadget_data(gadget);
+       if (!cdev)
+               return 0;
+
+       gi = container_of(cdev, struct gadget_info, cdev);
+       spin_lock_irqsave(&gi->spinlock, flags);
+       cdev = get_gadget_data(gadget);
+       if (!cdev || gi->unbind) {
+               spin_unlock_irqrestore(&gi->spinlock, flags);
+               return 0;
+       }
+
+       ret = composite_setup(gadget, ctrl);
+       spin_unlock_irqrestore(&gi->spinlock, flags);
+       return ret;
+}
+
+static void configfs_composite_disconnect(struct usb_gadget *gadget)
+{
+       struct usb_composite_dev *cdev;
+       struct gadget_info *gi;
+       unsigned long flags;
+
+       cdev = get_gadget_data(gadget);
+       if (!cdev)
+               return;
+
+       gi = container_of(cdev, struct gadget_info, cdev);
+       spin_lock_irqsave(&gi->spinlock, flags);
+       cdev = get_gadget_data(gadget);
+       if (!cdev || gi->unbind) {
+               spin_unlock_irqrestore(&gi->spinlock, flags);
+               return;
+       }
+
+       composite_disconnect(gadget);
+       spin_unlock_irqrestore(&gi->spinlock, flags);
+}
+
+static void configfs_composite_suspend(struct usb_gadget *gadget)
+{
+       struct usb_composite_dev *cdev;
+       struct gadget_info *gi;
+       unsigned long flags;
+
+       cdev = get_gadget_data(gadget);
+       if (!cdev)
+               return;
+
+       gi = container_of(cdev, struct gadget_info, cdev);
+       spin_lock_irqsave(&gi->spinlock, flags);
+       cdev = get_gadget_data(gadget);
+       if (!cdev || gi->unbind) {
+               spin_unlock_irqrestore(&gi->spinlock, flags);
+               return;
+       }
+
+       composite_suspend(gadget);
+       spin_unlock_irqrestore(&gi->spinlock, flags);
+}
+
+static void configfs_composite_resume(struct usb_gadget *gadget)
+{
+       struct usb_composite_dev *cdev;
+       struct gadget_info *gi;
+       unsigned long flags;
+
+       cdev = get_gadget_data(gadget);
+       if (!cdev)
+               return;
+
+       gi = container_of(cdev, struct gadget_info, cdev);
+       spin_lock_irqsave(&gi->spinlock, flags);
+       cdev = get_gadget_data(gadget);
+       if (!cdev || gi->unbind) {
+               spin_unlock_irqrestore(&gi->spinlock, flags);
+               return;
+       }
+
+       composite_resume(gadget);
+       spin_unlock_irqrestore(&gi->spinlock, flags);
 }
 
 static const struct usb_gadget_driver configfs_driver_template = {
        .bind           = configfs_composite_bind,
        .unbind         = configfs_composite_unbind,
 
-       .setup          = composite_setup,
-       .reset          = composite_disconnect,
-       .disconnect     = composite_disconnect,
+       .setup          = configfs_composite_setup,
+       .reset          = configfs_composite_disconnect,
+       .disconnect     = configfs_composite_disconnect,
 
-       .suspend        = composite_suspend,
-       .resume         = composite_resume,
+       .suspend        = configfs_composite_suspend,
+       .resume         = configfs_composite_resume,
 
        .max_speed      = USB_SPEED_SUPER,
        .driver = {
index 213ff03..59d9d51 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/blkdev.h>
 #include <linux/pagemap.h>
 #include <linux/export.h>
+#include <linux/fs_parser.h>
 #include <linux/hid.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -1451,9 +1452,9 @@ struct ffs_sb_fill_data {
        struct ffs_data *ffs_data;
 };
 
-static int ffs_sb_fill(struct super_block *sb, void *_data, int silent)
+static int ffs_sb_fill(struct super_block *sb, struct fs_context *fc)
 {
-       struct ffs_sb_fill_data *data = _data;
+       struct ffs_sb_fill_data *data = fc->fs_private;
        struct inode    *inode;
        struct ffs_data *ffs = data->ffs_data;
 
@@ -1486,147 +1487,152 @@ static int ffs_sb_fill(struct super_block *sb, void *_data, int silent)
        return 0;
 }
 
-static int ffs_fs_parse_opts(struct ffs_sb_fill_data *data, char *opts)
-{
-       ENTER();
+enum {
+       Opt_no_disconnect,
+       Opt_rmode,
+       Opt_fmode,
+       Opt_mode,
+       Opt_uid,
+       Opt_gid,
+};
 
-       if (!opts || !*opts)
-               return 0;
+static const struct fs_parameter_spec ffs_fs_param_specs[] = {
+       fsparam_bool    ("no_disconnect",       Opt_no_disconnect),
+       fsparam_u32     ("rmode",               Opt_rmode),
+       fsparam_u32     ("fmode",               Opt_fmode),
+       fsparam_u32     ("mode",                Opt_mode),
+       fsparam_u32     ("uid",                 Opt_uid),
+       fsparam_u32     ("gid",                 Opt_gid),
+       {}
+};
 
-       for (;;) {
-               unsigned long value;
-               char *eq, *comma;
-
-               /* Option limit */
-               comma = strchr(opts, ',');
-               if (comma)
-                       *comma = 0;
-
-               /* Value limit */
-               eq = strchr(opts, '=');
-               if (unlikely(!eq)) {
-                       pr_err("'=' missing in %s\n", opts);
-                       return -EINVAL;
-               }
-               *eq = 0;
+static const struct fs_parameter_description ffs_fs_fs_parameters = {
+       .name           = "kAFS",
+       .specs          = ffs_fs_param_specs,
+};
 
-               /* Parse value */
-               if (kstrtoul(eq + 1, 0, &value)) {
-                       pr_err("%s: invalid value: %s\n", opts, eq + 1);
-                       return -EINVAL;
-               }
+static int ffs_fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+       struct ffs_sb_fill_data *data = fc->fs_private;
+       struct fs_parse_result result;
+       int opt;
 
-               /* Interpret option */
-               switch (eq - opts) {
-               case 13:
-                       if (!memcmp(opts, "no_disconnect", 13))
-                               data->no_disconnect = !!value;
-                       else
-                               goto invalid;
-                       break;
-               case 5:
-                       if (!memcmp(opts, "rmode", 5))
-                               data->root_mode  = (value & 0555) | S_IFDIR;
-                       else if (!memcmp(opts, "fmode", 5))
-                               data->perms.mode = (value & 0666) | S_IFREG;
-                       else
-                               goto invalid;
-                       break;
+       ENTER();
 
-               case 4:
-                       if (!memcmp(opts, "mode", 4)) {
-                               data->root_mode  = (value & 0555) | S_IFDIR;
-                               data->perms.mode = (value & 0666) | S_IFREG;
-                       } else {
-                               goto invalid;
-                       }
-                       break;
+       opt = fs_parse(fc, &ffs_fs_fs_parameters, param, &result);
+       if (opt < 0)
+               return opt;
 
-               case 3:
-                       if (!memcmp(opts, "uid", 3)) {
-                               data->perms.uid = make_kuid(current_user_ns(), value);
-                               if (!uid_valid(data->perms.uid)) {
-                                       pr_err("%s: unmapped value: %lu\n", opts, value);
-                                       return -EINVAL;
-                               }
-                       } else if (!memcmp(opts, "gid", 3)) {
-                               data->perms.gid = make_kgid(current_user_ns(), value);
-                               if (!gid_valid(data->perms.gid)) {
-                                       pr_err("%s: unmapped value: %lu\n", opts, value);
-                                       return -EINVAL;
-                               }
-                       } else {
-                               goto invalid;
-                       }
-                       break;
+       switch (opt) {
+       case Opt_no_disconnect:
+               data->no_disconnect = result.boolean;
+               break;
+       case Opt_rmode:
+               data->root_mode  = (result.uint_32 & 0555) | S_IFDIR;
+               break;
+       case Opt_fmode:
+               data->perms.mode = (result.uint_32 & 0666) | S_IFREG;
+               break;
+       case Opt_mode:
+               data->root_mode  = (result.uint_32 & 0555) | S_IFDIR;
+               data->perms.mode = (result.uint_32 & 0666) | S_IFREG;
+               break;
 
-               default:
-invalid:
-                       pr_err("%s: invalid option\n", opts);
-                       return -EINVAL;
-               }
+       case Opt_uid:
+               data->perms.uid = make_kuid(current_user_ns(), result.uint_32);
+               if (!uid_valid(data->perms.uid))
+                       goto unmapped_value;
+               break;
+       case Opt_gid:
+               data->perms.gid = make_kgid(current_user_ns(), result.uint_32);
+               if (!gid_valid(data->perms.gid))
+                       goto unmapped_value;
+               break;
 
-               /* Next iteration */
-               if (!comma)
-                       break;
-               opts = comma + 1;
+       default:
+               return -ENOPARAM;
        }
 
        return 0;
-}
 
-/* "mount -t functionfs dev_name /dev/function" ends up here */
+unmapped_value:
+       return invalf(fc, "%s: unmapped value: %u", param->key, result.uint_32);
+}
 
-static struct dentry *
-ffs_fs_mount(struct file_system_type *t, int flags,
-             const char *dev_name, void *opts)
-{
-       struct ffs_sb_fill_data data = {
-               .perms = {
-                       .mode = S_IFREG | 0600,
-                       .uid = GLOBAL_ROOT_UID,
-                       .gid = GLOBAL_ROOT_GID,
-               },
-               .root_mode = S_IFDIR | 0500,
-               .no_disconnect = false,
-       };
-       struct dentry *rv;
-       int ret;
+/*
+ * Set up the superblock for a mount.
+ */
+static int ffs_fs_get_tree(struct fs_context *fc)
+{
+       struct ffs_sb_fill_data *ctx = fc->fs_private;
        void *ffs_dev;
        struct ffs_data *ffs;
 
        ENTER();
 
-       ret = ffs_fs_parse_opts(&data, opts);
-       if (unlikely(ret < 0))
-               return ERR_PTR(ret);
+       if (!fc->source)
+               return invalf(fc, "No source specified");
 
-       ffs = ffs_data_new(dev_name);
+       ffs = ffs_data_new(fc->source);
        if (unlikely(!ffs))
-               return ERR_PTR(-ENOMEM);
-       ffs->file_perms = data.perms;
-       ffs->no_disconnect = data.no_disconnect;
+               return -ENOMEM;
+       ffs->file_perms = ctx->perms;
+       ffs->no_disconnect = ctx->no_disconnect;
 
-       ffs->dev_name = kstrdup(dev_name, GFP_KERNEL);
+       ffs->dev_name = kstrdup(fc->source, GFP_KERNEL);
        if (unlikely(!ffs->dev_name)) {
                ffs_data_put(ffs);
-               return ERR_PTR(-ENOMEM);
+               return -ENOMEM;
        }
 
-       ffs_dev = ffs_acquire_dev(dev_name);
+       ffs_dev = ffs_acquire_dev(ffs->dev_name);
        if (IS_ERR(ffs_dev)) {
                ffs_data_put(ffs);
-               return ERR_CAST(ffs_dev);
+               return PTR_ERR(ffs_dev);
        }
+
        ffs->private_data = ffs_dev;
-       data.ffs_data = ffs;
+       ctx->ffs_data = ffs;
+       return get_tree_nodev(fc, ffs_sb_fill);
+}
+
+static void ffs_fs_free_fc(struct fs_context *fc)
+{
+       struct ffs_sb_fill_data *ctx = fc->fs_private;
+
+       if (ctx) {
+               if (ctx->ffs_data) {
+                       ffs_release_dev(ctx->ffs_data);
+                       ffs_data_put(ctx->ffs_data);
+               }
 
-       rv = mount_nodev(t, flags, &data, ffs_sb_fill);
-       if (IS_ERR(rv) && data.ffs_data) {
-               ffs_release_dev(data.ffs_data);
-               ffs_data_put(data.ffs_data);
+               kfree(ctx);
        }
-       return rv;
+}
+
+static const struct fs_context_operations ffs_fs_context_ops = {
+       .free           = ffs_fs_free_fc,
+       .parse_param    = ffs_fs_parse_param,
+       .get_tree       = ffs_fs_get_tree,
+};
+
+static int ffs_fs_init_fs_context(struct fs_context *fc)
+{
+       struct ffs_sb_fill_data *ctx;
+
+       ctx = kzalloc(sizeof(struct ffs_sb_fill_data), GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
+
+       ctx->perms.mode = S_IFREG | 0600;
+       ctx->perms.uid = GLOBAL_ROOT_UID;
+       ctx->perms.gid = GLOBAL_ROOT_GID;
+       ctx->root_mode = S_IFDIR | 0500;
+       ctx->no_disconnect = false;
+
+       fc->fs_private = ctx;
+       fc->ops = &ffs_fs_context_ops;
+       return 0;
 }
 
 static void
@@ -1644,7 +1650,8 @@ ffs_fs_kill_sb(struct super_block *sb)
 static struct file_system_type ffs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "functionfs",
-       .mount          = ffs_fs_mount,
+       .init_fs_context = ffs_fs_init_fs_context,
+       .parameters     = &ffs_fs_fs_parameters,
        .kill_sb        = ffs_fs_kill_sb,
 };
 MODULE_ALIAS_FS("functionfs");
index d7e6116..d354036 100644 (file)
@@ -45,7 +45,7 @@ config USB_AT91
 
 config USB_LPC32XX
        tristate "LPC32XX USB Peripheral Controller"
-       depends on ARCH_LPC32XX
+       depends on ARCH_LPC32XX || COMPILE_TEST
        depends on I2C
        select USB_ISP1301
        help
index 86ffc83..1d0d895 100644 (file)
@@ -449,9 +449,11 @@ static void submit_request(struct usba_ep *ep, struct usba_request *req)
                next_fifo_transaction(ep, req);
                if (req->last_transaction) {
                        usba_ep_writel(ep, CTL_DIS, USBA_TX_PK_RDY);
-                       usba_ep_writel(ep, CTL_ENB, USBA_TX_COMPLETE);
+                       if (ep_is_control(ep))
+                               usba_ep_writel(ep, CTL_ENB, USBA_TX_COMPLETE);
                } else {
-                       usba_ep_writel(ep, CTL_DIS, USBA_TX_COMPLETE);
+                       if (ep_is_control(ep))
+                               usba_ep_writel(ep, CTL_DIS, USBA_TX_COMPLETE);
                        usba_ep_writel(ep, CTL_ENB, USBA_TX_PK_RDY);
                }
        }
index 92af8dc..51fa614 100644 (file)
@@ -98,6 +98,17 @@ int usb_ep_enable(struct usb_ep *ep)
        if (ep->enabled)
                goto out;
 
+       /* UDC drivers can't handle endpoints with maxpacket size 0 */
+       if (usb_endpoint_maxp(ep->desc) == 0) {
+               /*
+                * We should log an error message here, but we can't call
+                * dev_err() because there's no way to find the gadget
+                * given only ep.
+                */
+               ret = -EINVAL;
+               goto out;
+       }
+
        ret = ep->ops->enable(ep, ep->desc);
        if (ret)
                goto out;
index 8414fac..3d499d9 100644 (file)
@@ -48,6 +48,7 @@
 #define DRIVER_VERSION "02 May 2005"
 
 #define POWER_BUDGET   500     /* in mA; use 8 for low-power port testing */
+#define POWER_BUDGET_3 900     /* in mA */
 
 static const char      driver_name[] = "dummy_hcd";
 static const char      driver_desc[] = "USB Host+Gadget Emulator";
@@ -2432,7 +2433,7 @@ static int dummy_start_ss(struct dummy_hcd *dum_hcd)
        dum_hcd->rh_state = DUMMY_RH_RUNNING;
        dum_hcd->stream_en_ep = 0;
        INIT_LIST_HEAD(&dum_hcd->urbp_list);
-       dummy_hcd_to_hcd(dum_hcd)->power_budget = POWER_BUDGET;
+       dummy_hcd_to_hcd(dum_hcd)->power_budget = POWER_BUDGET_3;
        dummy_hcd_to_hcd(dum_hcd)->state = HC_STATE_RUNNING;
        dummy_hcd_to_hcd(dum_hcd)->uses_new_polling = 1;
 #ifdef CONFIG_USB_OTG
index 20141c3..9a05863 100644 (file)
@@ -2576,7 +2576,7 @@ static int fsl_udc_remove(struct platform_device *pdev)
        dma_pool_destroy(udc_controller->td_pool);
        free_irq(udc_controller->irq, udc_controller);
        iounmap(dr_regs);
-       if (pdata->operating_mode == FSL_USB2_DR_DEVICE)
+       if (res && (pdata->operating_mode == FSL_USB2_DR_DEVICE))
                release_mem_region(res->start, resource_size(res));
 
        /* free udc --wait for the release() finished */
index b3e073f..bf6c81e 100644 (file)
@@ -1151,7 +1151,7 @@ static void udc_pop_fifo(struct lpc32xx_udc *udc, u8 *data, u32 bytes)
        u32 *p32, tmp, cbytes;
 
        /* Use optimal data transfer method based on source address and size */
-       switch (((u32) data) & 0x3) {
+       switch (((uintptr_t) data) & 0x3) {
        case 0: /* 32-bit aligned */
                p32 = (u32 *) data;
                cbytes = (bytes & ~0x3);
@@ -1177,11 +1177,11 @@ static void udc_pop_fifo(struct lpc32xx_udc *udc, u8 *data, u32 bytes)
                        tmp = readl(USBD_RXDATA(udc->udp_baseaddr));
 
                        bl = bytes - n;
-                       if (bl > 3)
-                               bl = 3;
+                       if (bl > 4)
+                               bl = 4;
 
                        for (i = 0; i < bl; i++)
-                               data[n + i] = (u8) ((tmp >> (n * 8)) & 0xFF);
+                               data[n + i] = (u8) ((tmp >> (i * 8)) & 0xFF);
                }
                break;
 
@@ -1252,7 +1252,7 @@ static void udc_stuff_fifo(struct lpc32xx_udc *udc, u8 *data, u32 bytes)
        u32 *p32, tmp, cbytes;
 
        /* Use optimal data transfer method based on source address and size */
-       switch (((u32) data) & 0x3) {
+       switch (((uintptr_t) data) & 0x3) {
        case 0: /* 32-bit aligned */
                p32 = (u32 *) data;
                cbytes = (bytes & ~0x3);
index e098f16..3370314 100644 (file)
@@ -1544,10 +1544,10 @@ static void usb3_set_device_address(struct renesas_usb3 *usb3, u16 addr)
 static bool usb3_std_req_set_address(struct renesas_usb3 *usb3,
                                     struct usb_ctrlrequest *ctrl)
 {
-       if (ctrl->wValue >= 128)
+       if (le16_to_cpu(ctrl->wValue) >= 128)
                return true;    /* stall */
 
-       usb3_set_device_address(usb3, ctrl->wValue);
+       usb3_set_device_address(usb3, le16_to_cpu(ctrl->wValue));
        usb3_set_p0_con_for_no_data(usb3);
 
        return false;
@@ -1582,6 +1582,7 @@ static bool usb3_std_req_get_status(struct renesas_usb3 *usb3,
        struct renesas_usb3_ep *usb3_ep;
        int num;
        u16 status = 0;
+       __le16 tx_data;
 
        switch (ctrl->bRequestType & USB_RECIP_MASK) {
        case USB_RECIP_DEVICE:
@@ -1604,10 +1605,10 @@ static bool usb3_std_req_get_status(struct renesas_usb3 *usb3,
        }
 
        if (!stall) {
-               status = cpu_to_le16(status);
+               tx_data = cpu_to_le16(status);
                dev_dbg(usb3_to_dev(usb3), "get_status: req = %p\n",
                        usb_req_to_usb3_req(usb3->ep0_req));
-               usb3_pipe0_internal_xfer(usb3, &status, sizeof(status),
+               usb3_pipe0_internal_xfer(usb3, &tx_data, sizeof(tx_data),
                                         usb3_pipe0_get_status_completion);
        }
 
@@ -1772,7 +1773,7 @@ static bool usb3_std_req_set_sel(struct renesas_usb3 *usb3,
 static bool usb3_std_req_set_configuration(struct renesas_usb3 *usb3,
                                           struct usb_ctrlrequest *ctrl)
 {
-       if (ctrl->wValue > 0)
+       if (le16_to_cpu(ctrl->wValue) > 0)
                usb3_set_bit(usb3, USB_COM_CON_CONF, USB3_USB_COM_CON);
        else
                usb3_clear_bit(usb3, USB_COM_CON_CONF, USB3_USB_COM_CON);
index 7ba6afc..76c3f29 100644 (file)
@@ -202,10 +202,10 @@ static void xhci_ring_dump_segment(struct seq_file *s,
                trb = &seg->trbs[i];
                dma = seg->dma + i * sizeof(*trb);
                seq_printf(s, "%pad: %s\n", &dma,
-                          xhci_decode_trb(trb->generic.field[0],
-                                          trb->generic.field[1],
-                                          trb->generic.field[2],
-                                          trb->generic.field[3]));
+                          xhci_decode_trb(le32_to_cpu(trb->generic.field[0]),
+                                          le32_to_cpu(trb->generic.field[1]),
+                                          le32_to_cpu(trb->generic.field[2]),
+                                          le32_to_cpu(trb->generic.field[3])));
        }
 }
 
@@ -263,10 +263,10 @@ static int xhci_slot_context_show(struct seq_file *s, void *unused)
        xhci = hcd_to_xhci(bus_to_hcd(dev->udev->bus));
        slot_ctx = xhci_get_slot_ctx(xhci, dev->out_ctx);
        seq_printf(s, "%pad: %s\n", &dev->out_ctx->dma,
-                  xhci_decode_slot_context(slot_ctx->dev_info,
-                                           slot_ctx->dev_info2,
-                                           slot_ctx->tt_info,
-                                           slot_ctx->dev_state));
+                  xhci_decode_slot_context(le32_to_cpu(slot_ctx->dev_info),
+                                           le32_to_cpu(slot_ctx->dev_info2),
+                                           le32_to_cpu(slot_ctx->tt_info),
+                                           le32_to_cpu(slot_ctx->dev_state)));
 
        return 0;
 }
@@ -286,10 +286,10 @@ static int xhci_endpoint_context_show(struct seq_file *s, void *unused)
                ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, dci);
                dma = dev->out_ctx->dma + dci * CTX_SIZE(xhci->hcc_params);
                seq_printf(s, "%pad: %s\n", &dma,
-                          xhci_decode_ep_context(ep_ctx->ep_info,
-                                                 ep_ctx->ep_info2,
-                                                 ep_ctx->deq,
-                                                 ep_ctx->tx_info));
+                          xhci_decode_ep_context(le32_to_cpu(ep_ctx->ep_info),
+                                                 le32_to_cpu(ep_ctx->ep_info2),
+                                                 le64_to_cpu(ep_ctx->deq),
+                                                 le32_to_cpu(ep_ctx->tx_info)));
        }
 
        return 0;
index f498160..3351d07 100644 (file)
@@ -57,6 +57,7 @@ static int xhci_create_intel_xhci_sw_pdev(struct xhci_hcd *xhci, u32 cap_offset)
                ret = platform_device_add_properties(pdev, role_switch_props);
                if (ret) {
                        dev_err(dev, "failed to register device properties\n");
+                       platform_device_put(pdev);
                        return ret;
                }
        }
index 9741cde..e7aab31 100644 (file)
@@ -3202,10 +3202,10 @@ static int xhci_align_td(struct xhci_hcd *xhci, struct urb *urb, u32 enqd_len,
        if (usb_urb_dir_out(urb)) {
                len = sg_pcopy_to_buffer(urb->sg, urb->num_sgs,
                                   seg->bounce_buf, new_buff_len, enqd_len);
-               if (len != seg->bounce_len)
+               if (len != new_buff_len)
                        xhci_warn(xhci,
                                "WARN Wrong bounce buffer write length: %zu != %d\n",
-                               len, seg->bounce_len);
+                               len, new_buff_len);
                seg->bounce_dma = dma_map_single(dev, seg->bounce_buf,
                                                 max_pkt, DMA_TO_DEVICE);
        } else {
@@ -3330,6 +3330,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
                        if (xhci_urb_suitable_for_idt(urb)) {
                                memcpy(&send_addr, urb->transfer_buffer,
                                       trb_buff_len);
+                               le64_to_cpus(&send_addr);
                                field |= TRB_IDT;
                        }
                }
@@ -3475,6 +3476,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
                if (xhci_urb_suitable_for_idt(urb)) {
                        memcpy(&addr, urb->transfer_buffer,
                               urb->transfer_buffer_length);
+                       le64_to_cpus(&addr);
                        field |= TRB_IDT;
                } else {
                        addr = (u64) urb->transfer_dma;
index 5008659..6c17e3f 100644 (file)
@@ -1032,7 +1032,7 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup)
        writel(command, &xhci->op_regs->command);
        xhci->broken_suspend = 0;
        if (xhci_handshake(&xhci->op_regs->status,
-                               STS_SAVE, 0, 10 * 1000)) {
+                               STS_SAVE, 0, 20 * 1000)) {
        /*
         * AMD SNPS xHC 3.0 occasionally does not clear the
         * SSS bit of USBSTS and when driver tries to poll
@@ -1108,6 +1108,18 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
                hibernated = true;
 
        if (!hibernated) {
+               /*
+                * Some controllers might lose power during suspend, so wait
+                * for controller not ready bit to clear, just as in xHC init.
+                */
+               retval = xhci_handshake(&xhci->op_regs->status,
+                                       STS_CNR, 0, 10 * 1000 * 1000);
+               if (retval) {
+                       xhci_warn(xhci, "Controller not ready at resume %d\n",
+                                 retval);
+                       spin_unlock_irq(&xhci->lock);
+                       return retval;
+               }
                /* step 1: restore register */
                xhci_restore_registers(xhci);
                /* step 2: initialize command ring buffer */
@@ -3059,6 +3071,48 @@ void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci, unsigned int ep_index,
        }
 }
 
+static void xhci_endpoint_disable(struct usb_hcd *hcd,
+                                 struct usb_host_endpoint *host_ep)
+{
+       struct xhci_hcd         *xhci;
+       struct xhci_virt_device *vdev;
+       struct xhci_virt_ep     *ep;
+       struct usb_device       *udev;
+       unsigned long           flags;
+       unsigned int            ep_index;
+
+       xhci = hcd_to_xhci(hcd);
+rescan:
+       spin_lock_irqsave(&xhci->lock, flags);
+
+       udev = (struct usb_device *)host_ep->hcpriv;
+       if (!udev || !udev->slot_id)
+               goto done;
+
+       vdev = xhci->devs[udev->slot_id];
+       if (!vdev)
+               goto done;
+
+       ep_index = xhci_get_endpoint_index(&host_ep->desc);
+       ep = &vdev->eps[ep_index];
+       if (!ep)
+               goto done;
+
+       /* wait for hub_tt_work to finish clearing hub TT */
+       if (ep->ep_state & EP_CLEARING_TT) {
+               spin_unlock_irqrestore(&xhci->lock, flags);
+               schedule_timeout_uninterruptible(1);
+               goto rescan;
+       }
+
+       if (ep->ep_state)
+               xhci_dbg(xhci, "endpoint disable with ep_state 0x%x\n",
+                        ep->ep_state);
+done:
+       host_ep->hcpriv = NULL;
+       spin_unlock_irqrestore(&xhci->lock, flags);
+}
+
 /*
  * Called after usb core issues a clear halt control message.
  * The host side of the halt should already be cleared by a reset endpoint
@@ -3083,6 +3137,7 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
        unsigned int ep_index;
        unsigned long flags;
        u32 ep_flag;
+       int err;
 
        xhci = hcd_to_xhci(hcd);
        if (!host_ep->hcpriv)
@@ -3142,7 +3197,17 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
                xhci_free_command(xhci, cfg_cmd);
                goto cleanup;
        }
-       xhci_queue_stop_endpoint(xhci, stop_cmd, udev->slot_id, ep_index, 0);
+
+       err = xhci_queue_stop_endpoint(xhci, stop_cmd, udev->slot_id,
+                                       ep_index, 0);
+       if (err < 0) {
+               spin_unlock_irqrestore(&xhci->lock, flags);
+               xhci_free_command(xhci, cfg_cmd);
+               xhci_dbg(xhci, "%s: Failed to queue stop ep command, %d ",
+                               __func__, err);
+               goto cleanup;
+       }
+
        xhci_ring_cmd_db(xhci);
        spin_unlock_irqrestore(&xhci->lock, flags);
 
@@ -3156,8 +3221,16 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
                                           ctrl_ctx, ep_flag, ep_flag);
        xhci_endpoint_copy(xhci, cfg_cmd->in_ctx, vdev->out_ctx, ep_index);
 
-       xhci_queue_configure_endpoint(xhci, cfg_cmd, cfg_cmd->in_ctx->dma,
+       err = xhci_queue_configure_endpoint(xhci, cfg_cmd, cfg_cmd->in_ctx->dma,
                                      udev->slot_id, false);
+       if (err < 0) {
+               spin_unlock_irqrestore(&xhci->lock, flags);
+               xhci_free_command(xhci, cfg_cmd);
+               xhci_dbg(xhci, "%s: Failed to queue config ep command, %d ",
+                               __func__, err);
+               goto cleanup;
+       }
+
        xhci_ring_cmd_db(xhci);
        spin_unlock_irqrestore(&xhci->lock, flags);
 
@@ -4674,12 +4747,12 @@ static int xhci_update_timeout_for_endpoint(struct xhci_hcd *xhci,
        alt_timeout = xhci_call_host_update_timeout_for_endpoint(xhci, udev,
                desc, state, timeout);
 
-       /* If we found we can't enable hub-initiated LPM, or
+       /* If we found we can't enable hub-initiated LPM, and
         * the U1 or U2 exit latency was too high to allow
-        * device-initiated LPM as well, just stop searching.
+        * device-initiated LPM as well, then we will disable LPM
+        * for this device, so stop searching any further.
         */
-       if (alt_timeout == USB3_LPM_DISABLED ||
-                       alt_timeout == USB3_LPM_DEVICE_INITIATED) {
+       if (alt_timeout == USB3_LPM_DISABLED) {
                *timeout = alt_timeout;
                return -E2BIG;
        }
@@ -4790,10 +4863,12 @@ static u16 xhci_calculate_lpm_timeout(struct usb_hcd *hcd,
                if (intf->dev.driver) {
                        driver = to_usb_driver(intf->dev.driver);
                        if (driver && driver->disable_hub_initiated_lpm) {
-                               dev_dbg(&udev->dev, "Hub-initiated %s disabled "
-                                               "at request of driver %s\n",
-                                               state_name, driver->name);
-                               return xhci_get_timeout_no_hub_lpm(udev, state);
+                               dev_dbg(&udev->dev, "Hub-initiated %s disabled at request of driver %s\n",
+                                       state_name, driver->name);
+                               timeout = xhci_get_timeout_no_hub_lpm(udev,
+                                                                     state);
+                               if (timeout == USB3_LPM_DISABLED)
+                                       return timeout;
                        }
                }
 
@@ -5077,11 +5152,18 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks)
                hcd->has_tt = 1;
        } else {
                /*
-                * Some 3.1 hosts return sbrn 0x30, use xhci supported protocol
-                * minor revision instead of sbrn. Minor revision is a two digit
-                * BCD containing minor and sub-minor numbers, only show minor.
+                * Early xHCI 1.1 spec did not mention USB 3.1 capable hosts
+                * should return 0x31 for sbrn, or that the minor revision
+                * is a two digit BCD containig minor and sub-minor numbers.
+                * This was later clarified in xHCI 1.2.
+                *
+                * Some USB 3.1 capable hosts therefore have sbrn 0x30, and
+                * minor revision set to 0x1 instead of 0x10.
                 */
-               minor_rev = xhci->usb3_rhub.min_rev / 0x10;
+               if (xhci->usb3_rhub.min_rev == 0x1)
+                       minor_rev = 1;
+               else
+                       minor_rev = xhci->usb3_rhub.min_rev / 0x10;
 
                switch (minor_rev) {
                case 2:
@@ -5199,11 +5281,12 @@ static void xhci_clear_tt_buffer_complete(struct usb_hcd *hcd,
        unsigned long flags;
 
        xhci = hcd_to_xhci(hcd);
+
+       spin_lock_irqsave(&xhci->lock, flags);
        udev = (struct usb_device *)ep->hcpriv;
        slot_id = udev->slot_id;
        ep_index = xhci_get_endpoint_index(&ep->desc);
 
-       spin_lock_irqsave(&xhci->lock, flags);
        xhci->devs[slot_id]->eps[ep_index].ep_state &= ~EP_CLEARING_TT;
        xhci_ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
        spin_unlock_irqrestore(&xhci->lock, flags);
@@ -5240,6 +5323,7 @@ static const struct hc_driver xhci_hc_driver = {
        .free_streams =         xhci_free_streams,
        .add_endpoint =         xhci_add_endpoint,
        .drop_endpoint =        xhci_drop_endpoint,
+       .endpoint_disable =     xhci_endpoint_disable,
        .endpoint_reset =       xhci_endpoint_reset,
        .check_bandwidth =      xhci_check_bandwidth,
        .reset_bandwidth =      xhci_reset_bandwidth,
index 0a57c2c..7a6b122 100644 (file)
@@ -716,6 +716,10 @@ static int mts_usb_probe(struct usb_interface *intf,
 
        }
 
+       if (ep_in_current != &ep_in_set[2]) {
+               MTS_WARNING("couldn't find two input bulk endpoints. Bailing out.\n");
+               return -ENODEV;
+       }
 
        if ( ep_out == -1 ) {
                MTS_WARNING( "couldn't find an output bulk endpoint. Bailing out.\n" );
index bdae62b..9bce583 100644 (file)
@@ -47,16 +47,6 @@ config USB_SEVSEG
          To compile this driver as a module, choose M here: the
          module will be called usbsevseg.
 
-config USB_RIO500
-       tristate "USB Diamond Rio500 support"
-       help
-         Say Y here if you want to connect a USB Rio500 mp3 player to your
-         computer's USB port. Please read <file:Documentation/usb/rio.rst>
-         for more information.
-
-         To compile this driver as a module, choose M here: the
-         module will be called rio500.
-
 config USB_LEGOTOWER
        tristate "USB Lego Infrared Tower support"
        help
index 109f54f..0d416eb 100644 (file)
@@ -17,7 +17,6 @@ obj-$(CONFIG_USB_ISIGHTFW)            += isight_firmware.o
 obj-$(CONFIG_USB_LCD)                  += usblcd.o
 obj-$(CONFIG_USB_LD)                   += ldusb.o
 obj-$(CONFIG_USB_LEGOTOWER)            += legousbtower.o
-obj-$(CONFIG_USB_RIO500)               += rio500.o
 obj-$(CONFIG_USB_TEST)                 += usbtest.o
 obj-$(CONFIG_USB_EHSET_TEST_FIXTURE)    += ehset.o
 obj-$(CONFIG_USB_TRANCEVIBRATOR)       += trancevibrator.o
index 344d523..6f5edb9 100644 (file)
@@ -75,6 +75,7 @@ struct adu_device {
        char                    serial_number[8];
 
        int                     open_count; /* number of times this port has been opened */
+       unsigned long           disconnected:1;
 
        char            *read_buffer_primary;
        int                     read_buffer_length;
@@ -116,7 +117,7 @@ static void adu_abort_transfers(struct adu_device *dev)
 {
        unsigned long flags;
 
-       if (dev->udev == NULL)
+       if (dev->disconnected)
                return;
 
        /* shutdown transfer */
@@ -148,6 +149,7 @@ static void adu_delete(struct adu_device *dev)
        kfree(dev->read_buffer_secondary);
        kfree(dev->interrupt_in_buffer);
        kfree(dev->interrupt_out_buffer);
+       usb_put_dev(dev->udev);
        kfree(dev);
 }
 
@@ -243,7 +245,7 @@ static int adu_open(struct inode *inode, struct file *file)
        }
 
        dev = usb_get_intfdata(interface);
-       if (!dev || !dev->udev) {
+       if (!dev) {
                retval = -ENODEV;
                goto exit_no_device;
        }
@@ -326,7 +328,7 @@ static int adu_release(struct inode *inode, struct file *file)
        }
 
        adu_release_internal(dev);
-       if (dev->udev == NULL) {
+       if (dev->disconnected) {
                /* the device was unplugged before the file was released */
                if (!dev->open_count)   /* ... and we're the last user */
                        adu_delete(dev);
@@ -354,7 +356,7 @@ static ssize_t adu_read(struct file *file, __user char *buffer, size_t count,
                return -ERESTARTSYS;
 
        /* verify that the device wasn't unplugged */
-       if (dev->udev == NULL) {
+       if (dev->disconnected) {
                retval = -ENODEV;
                pr_err("No device or device unplugged %d\n", retval);
                goto exit;
@@ -518,7 +520,7 @@ static ssize_t adu_write(struct file *file, const __user char *buffer,
                goto exit_nolock;
 
        /* verify that the device wasn't unplugged */
-       if (dev->udev == NULL) {
+       if (dev->disconnected) {
                retval = -ENODEV;
                pr_err("No device or device unplugged %d\n", retval);
                goto exit;
@@ -663,7 +665,7 @@ static int adu_probe(struct usb_interface *interface,
 
        mutex_init(&dev->mtx);
        spin_lock_init(&dev->buflock);
-       dev->udev = udev;
+       dev->udev = usb_get_dev(udev);
        init_waitqueue_head(&dev->read_wait);
        init_waitqueue_head(&dev->write_wait);
 
@@ -762,14 +764,18 @@ static void adu_disconnect(struct usb_interface *interface)
 
        dev = usb_get_intfdata(interface);
 
-       mutex_lock(&dev->mtx);  /* not interruptible */
-       dev->udev = NULL;       /* poison */
        usb_deregister_dev(interface, &adu_class);
-       mutex_unlock(&dev->mtx);
+
+       usb_poison_urb(dev->interrupt_in_urb);
+       usb_poison_urb(dev->interrupt_out_urb);
 
        mutex_lock(&adutux_mutex);
        usb_set_intfdata(interface, NULL);
 
+       mutex_lock(&dev->mtx);  /* not interruptible */
+       dev->disconnected = 1;
+       mutex_unlock(&dev->mtx);
+
        /* if the device is not opened, then we clean up right now */
        if (!dev->open_count)
                adu_delete(dev);
index cf5828c..34e6cd6 100644 (file)
@@ -98,6 +98,7 @@ static void chaoskey_free(struct chaoskey *dev)
                usb_free_urb(dev->urb);
                kfree(dev->name);
                kfree(dev->buf);
+               usb_put_intf(dev->interface);
                kfree(dev);
        }
 }
@@ -145,6 +146,8 @@ static int chaoskey_probe(struct usb_interface *interface,
        if (dev == NULL)
                goto out;
 
+       dev->interface = usb_get_intf(interface);
+
        dev->buf = kmalloc(size, GFP_KERNEL);
 
        if (dev->buf == NULL)
@@ -174,8 +177,6 @@ static int chaoskey_probe(struct usb_interface *interface,
                        goto out;
        }
 
-       dev->interface = interface;
-
        dev->in_ep = in_ep;
 
        if (le16_to_cpu(udev->descriptor.idVendor) != ALEA_VENDOR_ID)
index f5bed9f..dce44fb 100644 (file)
@@ -54,11 +54,7 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-/* Module parameters */
-static DEFINE_MUTEX(iowarrior_mutex);
-
 static struct usb_driver iowarrior_driver;
-static DEFINE_MUTEX(iowarrior_open_disc_lock);
 
 /*--------------*/
 /*     data     */
@@ -87,6 +83,7 @@ struct iowarrior {
        char chip_serial[9];            /* the serial number string of the chip connected */
        int report_size;                /* number of bytes in a report */
        u16 product_id;
+       struct usb_anchor submitted;
 };
 
 /*--------------*/
@@ -243,6 +240,7 @@ static inline void iowarrior_delete(struct iowarrior *dev)
        kfree(dev->int_in_buffer);
        usb_free_urb(dev->int_in_urb);
        kfree(dev->read_queue);
+       usb_put_intf(dev->interface);
        kfree(dev);
 }
 
@@ -424,11 +422,13 @@ static ssize_t iowarrior_write(struct file *file,
                        retval = -EFAULT;
                        goto error;
                }
+               usb_anchor_urb(int_out_urb, &dev->submitted);
                retval = usb_submit_urb(int_out_urb, GFP_KERNEL);
                if (retval) {
                        dev_dbg(&dev->interface->dev,
                                "submit error %d for urb nr.%d\n",
                                retval, atomic_read(&dev->write_busy));
+                       usb_unanchor_urb(int_out_urb);
                        goto error;
                }
                /* submit was ok */
@@ -477,8 +477,6 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd,
        if (!buffer)
                return -ENOMEM;
 
-       /* lock this object */
-       mutex_lock(&iowarrior_mutex);
        mutex_lock(&dev->mutex);
 
        /* verify that the device wasn't unplugged */
@@ -571,7 +569,6 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd,
 error_out:
        /* unlock the device */
        mutex_unlock(&dev->mutex);
-       mutex_unlock(&iowarrior_mutex);
        kfree(buffer);
        return retval;
 }
@@ -586,27 +583,20 @@ static int iowarrior_open(struct inode *inode, struct file *file)
        int subminor;
        int retval = 0;
 
-       mutex_lock(&iowarrior_mutex);
        subminor = iminor(inode);
 
        interface = usb_find_interface(&iowarrior_driver, subminor);
        if (!interface) {
-               mutex_unlock(&iowarrior_mutex);
-               printk(KERN_ERR "%s - error, can't find device for minor %d\n",
+               pr_err("%s - error, can't find device for minor %d\n",
                       __func__, subminor);
                return -ENODEV;
        }
 
-       mutex_lock(&iowarrior_open_disc_lock);
        dev = usb_get_intfdata(interface);
-       if (!dev) {
-               mutex_unlock(&iowarrior_open_disc_lock);
-               mutex_unlock(&iowarrior_mutex);
+       if (!dev)
                return -ENODEV;
-       }
 
        mutex_lock(&dev->mutex);
-       mutex_unlock(&iowarrior_open_disc_lock);
 
        /* Only one process can open each device, no sharing. */
        if (dev->opened) {
@@ -628,7 +618,6 @@ static int iowarrior_open(struct inode *inode, struct file *file)
 
 out:
        mutex_unlock(&dev->mutex);
-       mutex_unlock(&iowarrior_mutex);
        return retval;
 }
 
@@ -764,11 +753,13 @@ static int iowarrior_probe(struct usb_interface *interface,
        init_waitqueue_head(&dev->write_wait);
 
        dev->udev = udev;
-       dev->interface = interface;
+       dev->interface = usb_get_intf(interface);
 
        iface_desc = interface->cur_altsetting;
        dev->product_id = le16_to_cpu(udev->descriptor.idProduct);
 
+       init_usb_anchor(&dev->submitted);
+
        res = usb_find_last_int_in_endpoint(iface_desc, &dev->int_in_endpoint);
        if (res) {
                dev_err(&interface->dev, "no interrupt-in endpoint found\n");
@@ -836,7 +827,6 @@ static int iowarrior_probe(struct usb_interface *interface,
        if (retval) {
                /* something prevented us from registering this driver */
                dev_err(&interface->dev, "Not able to get a minor for this device.\n");
-               usb_set_intfdata(interface, NULL);
                goto error;
        }
 
@@ -860,26 +850,15 @@ error:
  */
 static void iowarrior_disconnect(struct usb_interface *interface)
 {
-       struct iowarrior *dev;
-       int minor;
-
-       dev = usb_get_intfdata(interface);
-       mutex_lock(&iowarrior_open_disc_lock);
-       usb_set_intfdata(interface, NULL);
-       /* prevent device read, write and ioctl */
-       dev->present = 0;
-
-       minor = dev->minor;
-       mutex_unlock(&iowarrior_open_disc_lock);
-       /* give back our minor - this will call close() locks need to be dropped at this point*/
+       struct iowarrior *dev = usb_get_intfdata(interface);
+       int minor = dev->minor;
 
        usb_deregister_dev(interface, &iowarrior_class);
 
        mutex_lock(&dev->mutex);
 
        /* prevent device read, write and ioctl */
-
-       mutex_unlock(&dev->mutex);
+       dev->present = 0;
 
        if (dev->opened) {
                /* There is a process that holds a filedescriptor to the device ,
@@ -887,10 +866,13 @@ static void iowarrior_disconnect(struct usb_interface *interface)
                   Deleting the device is postponed until close() was called.
                 */
                usb_kill_urb(dev->int_in_urb);
+               usb_kill_anchored_urbs(&dev->submitted);
                wake_up_interruptible(&dev->read_wait);
                wake_up_interruptible(&dev->write_wait);
+               mutex_unlock(&dev->mutex);
        } else {
                /* no process is using the device, cleanup now */
+               mutex_unlock(&dev->mutex);
                iowarrior_delete(dev);
        }
 
index 6581774..8f86b4e 100644 (file)
@@ -153,6 +153,7 @@ MODULE_PARM_DESC(min_interrupt_out_interval, "Minimum interrupt out interval in
 struct ld_usb {
        struct mutex            mutex;          /* locks this structure */
        struct usb_interface    *intf;          /* save off the usb interface pointer */
+       unsigned long           disconnected:1;
 
        int                     open_count;     /* number of times this port has been opened */
 
@@ -192,12 +193,10 @@ static void ld_usb_abort_transfers(struct ld_usb *dev)
        /* shutdown transfer */
        if (dev->interrupt_in_running) {
                dev->interrupt_in_running = 0;
-               if (dev->intf)
-                       usb_kill_urb(dev->interrupt_in_urb);
+               usb_kill_urb(dev->interrupt_in_urb);
        }
        if (dev->interrupt_out_busy)
-               if (dev->intf)
-                       usb_kill_urb(dev->interrupt_out_urb);
+               usb_kill_urb(dev->interrupt_out_urb);
 }
 
 /**
@@ -205,8 +204,6 @@ static void ld_usb_abort_transfers(struct ld_usb *dev)
  */
 static void ld_usb_delete(struct ld_usb *dev)
 {
-       ld_usb_abort_transfers(dev);
-
        /* free data structures */
        usb_free_urb(dev->interrupt_in_urb);
        usb_free_urb(dev->interrupt_out_urb);
@@ -263,7 +260,7 @@ static void ld_usb_interrupt_in_callback(struct urb *urb)
 
 resubmit:
        /* resubmit if we're still running */
-       if (dev->interrupt_in_running && !dev->buffer_overflow && dev->intf) {
+       if (dev->interrupt_in_running && !dev->buffer_overflow) {
                retval = usb_submit_urb(dev->interrupt_in_urb, GFP_ATOMIC);
                if (retval) {
                        dev_err(&dev->intf->dev,
@@ -383,16 +380,13 @@ static int ld_usb_release(struct inode *inode, struct file *file)
                goto exit;
        }
 
-       if (mutex_lock_interruptible(&dev->mutex)) {
-               retval = -ERESTARTSYS;
-               goto exit;
-       }
+       mutex_lock(&dev->mutex);
 
        if (dev->open_count != 1) {
                retval = -ENODEV;
                goto unlock_exit;
        }
-       if (dev->intf == NULL) {
+       if (dev->disconnected) {
                /* the device was unplugged before the file was released */
                mutex_unlock(&dev->mutex);
                /* unlock here as ld_usb_delete frees dev */
@@ -423,7 +417,7 @@ static __poll_t ld_usb_poll(struct file *file, poll_table *wait)
 
        dev = file->private_data;
 
-       if (!dev->intf)
+       if (dev->disconnected)
                return EPOLLERR | EPOLLHUP;
 
        poll_wait(file, &dev->read_wait, wait);
@@ -462,7 +456,7 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count,
        }
 
        /* verify that the device wasn't unplugged */
-       if (dev->intf == NULL) {
+       if (dev->disconnected) {
                retval = -ENODEV;
                printk(KERN_ERR "ldusb: No device or device unplugged %d\n", retval);
                goto unlock_exit;
@@ -470,7 +464,7 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count,
 
        /* wait for data */
        spin_lock_irq(&dev->rbsl);
-       if (dev->ring_head == dev->ring_tail) {
+       while (dev->ring_head == dev->ring_tail) {
                dev->interrupt_in_done = 0;
                spin_unlock_irq(&dev->rbsl);
                if (file->f_flags & O_NONBLOCK) {
@@ -480,15 +474,20 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count,
                retval = wait_event_interruptible(dev->read_wait, dev->interrupt_in_done);
                if (retval < 0)
                        goto unlock_exit;
-       } else {
-               spin_unlock_irq(&dev->rbsl);
+
+               spin_lock_irq(&dev->rbsl);
        }
+       spin_unlock_irq(&dev->rbsl);
 
        /* actual_buffer contains actual_length + interrupt_in_buffer */
        actual_buffer = (size_t *)(dev->ring_buffer + dev->ring_tail * (sizeof(size_t)+dev->interrupt_in_endpoint_size));
+       if (*actual_buffer > dev->interrupt_in_endpoint_size) {
+               retval = -EIO;
+               goto unlock_exit;
+       }
        bytes_to_read = min(count, *actual_buffer);
        if (bytes_to_read < *actual_buffer)
-               dev_warn(&dev->intf->dev, "Read buffer overflow, %zd bytes dropped\n",
+               dev_warn(&dev->intf->dev, "Read buffer overflow, %zu bytes dropped\n",
                         *actual_buffer-bytes_to_read);
 
        /* copy one interrupt_in_buffer from ring_buffer into userspace */
@@ -496,11 +495,11 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count,
                retval = -EFAULT;
                goto unlock_exit;
        }
-       dev->ring_tail = (dev->ring_tail+1) % ring_buffer_size;
-
        retval = bytes_to_read;
 
        spin_lock_irq(&dev->rbsl);
+       dev->ring_tail = (dev->ring_tail + 1) % ring_buffer_size;
+
        if (dev->buffer_overflow) {
                dev->buffer_overflow = 0;
                spin_unlock_irq(&dev->rbsl);
@@ -542,7 +541,7 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer,
        }
 
        /* verify that the device wasn't unplugged */
-       if (dev->intf == NULL) {
+       if (dev->disconnected) {
                retval = -ENODEV;
                printk(KERN_ERR "ldusb: No device or device unplugged %d\n", retval);
                goto unlock_exit;
@@ -563,8 +562,9 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer,
        /* write the data into interrupt_out_buffer from userspace */
        bytes_to_write = min(count, write_buffer_size*dev->interrupt_out_endpoint_size);
        if (bytes_to_write < count)
-               dev_warn(&dev->intf->dev, "Write buffer overflow, %zd bytes dropped\n", count-bytes_to_write);
-       dev_dbg(&dev->intf->dev, "%s: count = %zd, bytes_to_write = %zd\n",
+               dev_warn(&dev->intf->dev, "Write buffer overflow, %zu bytes dropped\n",
+                       count - bytes_to_write);
+       dev_dbg(&dev->intf->dev, "%s: count = %zu, bytes_to_write = %zu\n",
                __func__, count, bytes_to_write);
 
        if (copy_from_user(dev->interrupt_out_buffer, buffer, bytes_to_write)) {
@@ -581,7 +581,7 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer,
                                         1 << 8, 0,
                                         dev->interrupt_out_buffer,
                                         bytes_to_write,
-                                        USB_CTRL_SET_TIMEOUT * HZ);
+                                        USB_CTRL_SET_TIMEOUT);
                if (retval < 0)
                        dev_err(&dev->intf->dev,
                                "Couldn't submit HID_REQ_SET_REPORT %d\n",
@@ -696,10 +696,9 @@ static int ld_usb_probe(struct usb_interface *intf, const struct usb_device_id *
                dev_warn(&intf->dev, "Interrupt out endpoint not found (using control endpoint instead)\n");
 
        dev->interrupt_in_endpoint_size = usb_endpoint_maxp(dev->interrupt_in_endpoint);
-       dev->ring_buffer =
-               kmalloc_array(ring_buffer_size,
-                             sizeof(size_t) + dev->interrupt_in_endpoint_size,
-                             GFP_KERNEL);
+       dev->ring_buffer = kcalloc(ring_buffer_size,
+                       sizeof(size_t) + dev->interrupt_in_endpoint_size,
+                       GFP_KERNEL);
        if (!dev->ring_buffer)
                goto error;
        dev->interrupt_in_buffer = kmalloc(dev->interrupt_in_endpoint_size, GFP_KERNEL);
@@ -764,6 +763,9 @@ static void ld_usb_disconnect(struct usb_interface *intf)
        /* give back our minor */
        usb_deregister_dev(intf, &ld_usb_class);
 
+       usb_poison_urb(dev->interrupt_in_urb);
+       usb_poison_urb(dev->interrupt_out_urb);
+
        mutex_lock(&dev->mutex);
 
        /* if the device is not opened, then we clean up right now */
@@ -771,7 +773,7 @@ static void ld_usb_disconnect(struct usb_interface *intf)
                mutex_unlock(&dev->mutex);
                ld_usb_delete(dev);
        } else {
-               dev->intf = NULL;
+               dev->disconnected = 1;
                /* wake up pollers */
                wake_up_interruptible_all(&dev->read_wait);
                wake_up_interruptible_all(&dev->write_wait);
index 006cf13..23061f1 100644 (file)
@@ -179,7 +179,6 @@ static const struct usb_device_id tower_table[] = {
 };
 
 MODULE_DEVICE_TABLE (usb, tower_table);
-static DEFINE_MUTEX(open_disc_mutex);
 
 #define LEGO_USB_TOWER_MINOR_BASE      160
 
@@ -191,6 +190,7 @@ struct lego_usb_tower {
        unsigned char           minor;          /* the starting minor number for this device */
 
        int                     open_count;     /* number of times this port has been opened */
+       unsigned long           disconnected:1;
 
        char*                   read_buffer;
        size_t                  read_buffer_length; /* this much came in */
@@ -290,14 +290,13 @@ static inline void lego_usb_tower_debug_data(struct device *dev,
  */
 static inline void tower_delete (struct lego_usb_tower *dev)
 {
-       tower_abort_transfers (dev);
-
        /* free data structures */
        usb_free_urb(dev->interrupt_in_urb);
        usb_free_urb(dev->interrupt_out_urb);
        kfree (dev->read_buffer);
        kfree (dev->interrupt_in_buffer);
        kfree (dev->interrupt_out_buffer);
+       usb_put_dev(dev->udev);
        kfree (dev);
 }
 
@@ -332,18 +331,14 @@ static int tower_open (struct inode *inode, struct file *file)
                goto exit;
        }
 
-       mutex_lock(&open_disc_mutex);
        dev = usb_get_intfdata(interface);
-
        if (!dev) {
-               mutex_unlock(&open_disc_mutex);
                retval = -ENODEV;
                goto exit;
        }
 
        /* lock this device */
        if (mutex_lock_interruptible(&dev->lock)) {
-               mutex_unlock(&open_disc_mutex);
                retval = -ERESTARTSYS;
                goto exit;
        }
@@ -351,12 +346,9 @@ static int tower_open (struct inode *inode, struct file *file)
 
        /* allow opening only once */
        if (dev->open_count) {
-               mutex_unlock(&open_disc_mutex);
                retval = -EBUSY;
                goto unlock_exit;
        }
-       dev->open_count = 1;
-       mutex_unlock(&open_disc_mutex);
 
        /* reset the tower */
        result = usb_control_msg (dev->udev,
@@ -396,13 +388,14 @@ static int tower_open (struct inode *inode, struct file *file)
                dev_err(&dev->udev->dev,
                        "Couldn't submit interrupt_in_urb %d\n", retval);
                dev->interrupt_in_running = 0;
-               dev->open_count = 0;
                goto unlock_exit;
        }
 
        /* save device in the file's private structure */
        file->private_data = dev;
 
+       dev->open_count = 1;
+
 unlock_exit:
        mutex_unlock(&dev->lock);
 
@@ -423,22 +416,19 @@ static int tower_release (struct inode *inode, struct file *file)
 
        if (dev == NULL) {
                retval = -ENODEV;
-               goto exit_nolock;
-       }
-
-       mutex_lock(&open_disc_mutex);
-       if (mutex_lock_interruptible(&dev->lock)) {
-               retval = -ERESTARTSYS;
                goto exit;
        }
 
+       mutex_lock(&dev->lock);
+
        if (dev->open_count != 1) {
                dev_dbg(&dev->udev->dev, "%s: device not opened exactly once\n",
                        __func__);
                retval = -ENODEV;
                goto unlock_exit;
        }
-       if (dev->udev == NULL) {
+
+       if (dev->disconnected) {
                /* the device was unplugged before the file was released */
 
                /* unlock here as tower_delete frees dev */
@@ -456,10 +446,7 @@ static int tower_release (struct inode *inode, struct file *file)
 
 unlock_exit:
        mutex_unlock(&dev->lock);
-
 exit:
-       mutex_unlock(&open_disc_mutex);
-exit_nolock:
        return retval;
 }
 
@@ -477,10 +464,9 @@ static void tower_abort_transfers (struct lego_usb_tower *dev)
        if (dev->interrupt_in_running) {
                dev->interrupt_in_running = 0;
                mb();
-               if (dev->udev)
-                       usb_kill_urb (dev->interrupt_in_urb);
+               usb_kill_urb(dev->interrupt_in_urb);
        }
-       if (dev->interrupt_out_busy && dev->udev)
+       if (dev->interrupt_out_busy)
                usb_kill_urb(dev->interrupt_out_urb);
 }
 
@@ -516,7 +502,7 @@ static __poll_t tower_poll (struct file *file, poll_table *wait)
 
        dev = file->private_data;
 
-       if (!dev->udev)
+       if (dev->disconnected)
                return EPOLLERR | EPOLLHUP;
 
        poll_wait(file, &dev->read_wait, wait);
@@ -563,7 +549,7 @@ static ssize_t tower_read (struct file *file, char __user *buffer, size_t count,
        }
 
        /* verify that the device wasn't unplugged */
-       if (dev->udev == NULL) {
+       if (dev->disconnected) {
                retval = -ENODEV;
                pr_err("No device or device unplugged %d\n", retval);
                goto unlock_exit;
@@ -649,7 +635,7 @@ static ssize_t tower_write (struct file *file, const char __user *buffer, size_t
        }
 
        /* verify that the device wasn't unplugged */
-       if (dev->udev == NULL) {
+       if (dev->disconnected) {
                retval = -ENODEV;
                pr_err("No device or device unplugged %d\n", retval);
                goto unlock_exit;
@@ -759,7 +745,7 @@ static void tower_interrupt_in_callback (struct urb *urb)
 
 resubmit:
        /* resubmit if we're still running */
-       if (dev->interrupt_in_running && dev->udev) {
+       if (dev->interrupt_in_running) {
                retval = usb_submit_urb (dev->interrupt_in_urb, GFP_ATOMIC);
                if (retval)
                        dev_err(&dev->udev->dev,
@@ -822,8 +808,9 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device
 
        mutex_init(&dev->lock);
 
-       dev->udev = udev;
+       dev->udev = usb_get_dev(udev);
        dev->open_count = 0;
+       dev->disconnected = 0;
 
        dev->read_buffer = NULL;
        dev->read_buffer_length = 0;
@@ -891,8 +878,10 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device
                                  get_version_reply,
                                  sizeof(*get_version_reply),
                                  1000);
-       if (result < 0) {
-               dev_err(idev, "LEGO USB Tower get version control request failed\n");
+       if (result != sizeof(*get_version_reply)) {
+               if (result >= 0)
+                       result = -EIO;
+               dev_err(idev, "get version request failed: %d\n", result);
                retval = result;
                goto error;
        }
@@ -910,7 +899,6 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device
        if (retval) {
                /* something prevented us from registering this driver */
                dev_err(idev, "Not able to get a minor for this device.\n");
-               usb_set_intfdata (interface, NULL);
                goto error;
        }
        dev->minor = interface->minor;
@@ -942,23 +930,24 @@ static void tower_disconnect (struct usb_interface *interface)
        int minor;
 
        dev = usb_get_intfdata (interface);
-       mutex_lock(&open_disc_mutex);
-       usb_set_intfdata (interface, NULL);
 
        minor = dev->minor;
 
-       /* give back our minor */
+       /* give back our minor and prevent further open() */
        usb_deregister_dev (interface, &tower_class);
 
+       /* stop I/O */
+       usb_poison_urb(dev->interrupt_in_urb);
+       usb_poison_urb(dev->interrupt_out_urb);
+
        mutex_lock(&dev->lock);
-       mutex_unlock(&open_disc_mutex);
 
        /* if the device is not opened, then we clean up right now */
        if (!dev->open_count) {
                mutex_unlock(&dev->lock);
                tower_delete (dev);
        } else {
-               dev->udev = NULL;
+               dev->disconnected = 1;
                /* wake up pollers */
                wake_up_interruptible_all(&dev->read_wait);
                wake_up_interruptible_all(&dev->write_wait);
diff --git a/drivers/usb/misc/rio500.c b/drivers/usb/misc/rio500.c
deleted file mode 100644 (file)
index 30cae5e..0000000
+++ /dev/null
@@ -1,554 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/* -*- linux-c -*- */
-
-/* 
- * Driver for USB Rio 500
- *
- * Cesar Miquel (miquel@df.uba.ar)
- * 
- * based on hp_scanner.c by David E. Nelson (dnelson@jump.net)
- *
- * Based upon mouse.c (Brad Keryan) and printer.c (Michael Gee).
- *
- * Changelog:
- * 30/05/2003  replaced lock/unlock kernel with up/down
- *             Daniele Bellucci  bellucda@tiscali.it
- * */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/sched/signal.h>
-#include <linux/mutex.h>
-#include <linux/errno.h>
-#include <linux/random.h>
-#include <linux/poll.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/usb.h>
-#include <linux/wait.h>
-
-#include "rio500_usb.h"
-
-#define DRIVER_AUTHOR "Cesar Miquel <miquel@df.uba.ar>"
-#define DRIVER_DESC "USB Rio 500 driver"
-
-#define RIO_MINOR      64
-
-/* stall/wait timeout for rio */
-#define NAK_TIMEOUT (HZ)
-
-#define IBUF_SIZE 0x1000
-
-/* Size of the rio buffer */
-#define OBUF_SIZE 0x10000
-
-struct rio_usb_data {
-        struct usb_device *rio_dev;     /* init: probe_rio */
-        unsigned int ifnum;             /* Interface number of the USB device */
-        int isopen;                     /* nz if open */
-        int present;                    /* Device is present on the bus */
-        char *obuf, *ibuf;              /* transfer buffers */
-        char bulk_in_ep, bulk_out_ep;   /* Endpoint assignments */
-        wait_queue_head_t wait_q;       /* for timeouts */
-};
-
-static DEFINE_MUTEX(rio500_mutex);
-static struct rio_usb_data rio_instance;
-
-static int open_rio(struct inode *inode, struct file *file)
-{
-       struct rio_usb_data *rio = &rio_instance;
-
-       /* against disconnect() */
-       mutex_lock(&rio500_mutex);
-
-       if (rio->isopen || !rio->present) {
-               mutex_unlock(&rio500_mutex);
-               return -EBUSY;
-       }
-       rio->isopen = 1;
-
-       init_waitqueue_head(&rio->wait_q);
-
-
-       dev_info(&rio->rio_dev->dev, "Rio opened.\n");
-       mutex_unlock(&rio500_mutex);
-
-       return 0;
-}
-
-static int close_rio(struct inode *inode, struct file *file)
-{
-       struct rio_usb_data *rio = &rio_instance;
-
-       /* against disconnect() */
-       mutex_lock(&rio500_mutex);
-
-       rio->isopen = 0;
-       if (!rio->present) {
-               /* cleanup has been delayed */
-               kfree(rio->ibuf);
-               kfree(rio->obuf);
-               rio->ibuf = NULL;
-               rio->obuf = NULL;
-       } else {
-               dev_info(&rio->rio_dev->dev, "Rio closed.\n");
-       }
-       mutex_unlock(&rio500_mutex);
-       return 0;
-}
-
-static long ioctl_rio(struct file *file, unsigned int cmd, unsigned long arg)
-{
-       struct RioCommand rio_cmd;
-       struct rio_usb_data *rio = &rio_instance;
-       void __user *data;
-       unsigned char *buffer;
-       int result, requesttype;
-       int retries;
-       int retval=0;
-
-       mutex_lock(&rio500_mutex);
-        /* Sanity check to make sure rio is connected, powered, etc */
-        if (rio->present == 0 || rio->rio_dev == NULL) {
-               retval = -ENODEV;
-               goto err_out;
-       }
-
-       switch (cmd) {
-       case RIO_RECV_COMMAND:
-               data = (void __user *) arg;
-               if (data == NULL)
-                       break;
-               if (copy_from_user(&rio_cmd, data, sizeof(struct RioCommand))) {
-                       retval = -EFAULT;
-                       goto err_out;
-               }
-               if (rio_cmd.length < 0 || rio_cmd.length > PAGE_SIZE) {
-                       retval = -EINVAL;
-                       goto err_out;
-               }
-               buffer = (unsigned char *) __get_free_page(GFP_KERNEL);
-               if (buffer == NULL) {
-                       retval = -ENOMEM;
-                       goto err_out;
-               }
-               if (copy_from_user(buffer, rio_cmd.buffer, rio_cmd.length)) {
-                       retval = -EFAULT;
-                       free_page((unsigned long) buffer);
-                       goto err_out;
-               }
-
-               requesttype = rio_cmd.requesttype | USB_DIR_IN |
-                   USB_TYPE_VENDOR | USB_RECIP_DEVICE;
-               dev_dbg(&rio->rio_dev->dev,
-                       "sending command:reqtype=%0x req=%0x value=%0x index=%0x len=%0x\n",
-                       requesttype, rio_cmd.request, rio_cmd.value,
-                       rio_cmd.index, rio_cmd.length);
-               /* Send rio control message */
-               retries = 3;
-               while (retries) {
-                       result = usb_control_msg(rio->rio_dev,
-                                                usb_rcvctrlpipe(rio-> rio_dev, 0),
-                                                rio_cmd.request,
-                                                requesttype,
-                                                rio_cmd.value,
-                                                rio_cmd.index, buffer,
-                                                rio_cmd.length,
-                                                jiffies_to_msecs(rio_cmd.timeout));
-                       if (result == -ETIMEDOUT)
-                               retries--;
-                       else if (result < 0) {
-                               dev_err(&rio->rio_dev->dev,
-                                       "Error executing ioctrl. code = %d\n",
-                                       result);
-                               retries = 0;
-                       } else {
-                               dev_dbg(&rio->rio_dev->dev,
-                                       "Executed ioctl. Result = %d (data=%02x)\n",
-                                       result, buffer[0]);
-                               if (copy_to_user(rio_cmd.buffer, buffer,
-                                                rio_cmd.length)) {
-                                       free_page((unsigned long) buffer);
-                                       retval = -EFAULT;
-                                       goto err_out;
-                               }
-                               retries = 0;
-                       }
-
-                       /* rio_cmd.buffer contains a raw stream of single byte
-                          data which has been returned from rio.  Data is
-                          interpreted at application level.  For data that
-                          will be cast to data types longer than 1 byte, data
-                          will be little_endian and will potentially need to
-                          be swapped at the app level */
-
-               }
-               free_page((unsigned long) buffer);
-               break;
-
-       case RIO_SEND_COMMAND:
-               data = (void __user *) arg;
-               if (data == NULL)
-                       break;
-               if (copy_from_user(&rio_cmd, data, sizeof(struct RioCommand))) {
-                       retval = -EFAULT;
-                       goto err_out;
-               }
-               if (rio_cmd.length < 0 || rio_cmd.length > PAGE_SIZE) {
-                       retval = -EINVAL;
-                       goto err_out;
-               }
-               buffer = (unsigned char *) __get_free_page(GFP_KERNEL);
-               if (buffer == NULL) {
-                       retval = -ENOMEM;
-                       goto err_out;
-               }
-               if (copy_from_user(buffer, rio_cmd.buffer, rio_cmd.length)) {
-                       free_page((unsigned long)buffer);
-                       retval = -EFAULT;
-                       goto err_out;
-               }
-
-               requesttype = rio_cmd.requesttype | USB_DIR_OUT |
-                   USB_TYPE_VENDOR | USB_RECIP_DEVICE;
-               dev_dbg(&rio->rio_dev->dev,
-                       "sending command: reqtype=%0x req=%0x value=%0x index=%0x len=%0x\n",
-                       requesttype, rio_cmd.request, rio_cmd.value,
-                       rio_cmd.index, rio_cmd.length);
-               /* Send rio control message */
-               retries = 3;
-               while (retries) {
-                       result = usb_control_msg(rio->rio_dev,
-                                                usb_sndctrlpipe(rio-> rio_dev, 0),
-                                                rio_cmd.request,
-                                                requesttype,
-                                                rio_cmd.value,
-                                                rio_cmd.index, buffer,
-                                                rio_cmd.length,
-                                                jiffies_to_msecs(rio_cmd.timeout));
-                       if (result == -ETIMEDOUT)
-                               retries--;
-                       else if (result < 0) {
-                               dev_err(&rio->rio_dev->dev,
-                                       "Error executing ioctrl. code = %d\n",
-                                       result);
-                               retries = 0;
-                       } else {
-                               dev_dbg(&rio->rio_dev->dev,
-                                       "Executed ioctl. Result = %d\n", result);
-                               retries = 0;
-
-                       }
-
-               }
-               free_page((unsigned long) buffer);
-               break;
-
-       default:
-               retval = -ENOTTY;
-               break;
-       }
-
-
-err_out:
-       mutex_unlock(&rio500_mutex);
-       return retval;
-}
-
-static ssize_t
-write_rio(struct file *file, const char __user *buffer,
-         size_t count, loff_t * ppos)
-{
-       DEFINE_WAIT(wait);
-       struct rio_usb_data *rio = &rio_instance;
-
-       unsigned long copy_size;
-       unsigned long bytes_written = 0;
-       unsigned int partial;
-
-       int result = 0;
-       int maxretry;
-       int errn = 0;
-       int intr;
-
-       intr = mutex_lock_interruptible(&rio500_mutex);
-       if (intr)
-               return -EINTR;
-        /* Sanity check to make sure rio is connected, powered, etc */
-        if (rio->present == 0 || rio->rio_dev == NULL) {
-               mutex_unlock(&rio500_mutex);
-               return -ENODEV;
-       }
-
-
-
-       do {
-               unsigned long thistime;
-               char *obuf = rio->obuf;
-
-               thistime = copy_size =
-                   (count >= OBUF_SIZE) ? OBUF_SIZE : count;
-               if (copy_from_user(rio->obuf, buffer, copy_size)) {
-                       errn = -EFAULT;
-                       goto error;
-               }
-               maxretry = 5;
-               while (thistime) {
-                       if (!rio->rio_dev) {
-                               errn = -ENODEV;
-                               goto error;
-                       }
-                       if (signal_pending(current)) {
-                               mutex_unlock(&rio500_mutex);
-                               return bytes_written ? bytes_written : -EINTR;
-                       }
-
-                       result = usb_bulk_msg(rio->rio_dev,
-                                        usb_sndbulkpipe(rio->rio_dev, 2),
-                                        obuf, thistime, &partial, 5000);
-
-                       dev_dbg(&rio->rio_dev->dev,
-                               "write stats: result:%d thistime:%lu partial:%u\n",
-                               result, thistime, partial);
-
-                       if (result == -ETIMEDOUT) {     /* NAK - so hold for a while */
-                               if (!maxretry--) {
-                                       errn = -ETIME;
-                                       goto error;
-                               }
-                               prepare_to_wait(&rio->wait_q, &wait, TASK_INTERRUPTIBLE);
-                               schedule_timeout(NAK_TIMEOUT);
-                               finish_wait(&rio->wait_q, &wait);
-                               continue;
-                       } else if (!result && partial) {
-                               obuf += partial;
-                               thistime -= partial;
-                       } else
-                               break;
-               }
-               if (result) {
-                       dev_err(&rio->rio_dev->dev, "Write Whoops - %x\n",
-                               result);
-                       errn = -EIO;
-                       goto error;
-               }
-               bytes_written += copy_size;
-               count -= copy_size;
-               buffer += copy_size;
-       } while (count > 0);
-
-       mutex_unlock(&rio500_mutex);
-
-       return bytes_written ? bytes_written : -EIO;
-
-error:
-       mutex_unlock(&rio500_mutex);
-       return errn;
-}
-
-static ssize_t
-read_rio(struct file *file, char __user *buffer, size_t count, loff_t * ppos)
-{
-       DEFINE_WAIT(wait);
-       struct rio_usb_data *rio = &rio_instance;
-       ssize_t read_count;
-       unsigned int partial;
-       int this_read;
-       int result;
-       int maxretry = 10;
-       char *ibuf;
-       int intr;
-
-       intr = mutex_lock_interruptible(&rio500_mutex);
-       if (intr)
-               return -EINTR;
-       /* Sanity check to make sure rio is connected, powered, etc */
-        if (rio->present == 0 || rio->rio_dev == NULL) {
-               mutex_unlock(&rio500_mutex);
-               return -ENODEV;
-       }
-
-       ibuf = rio->ibuf;
-
-       read_count = 0;
-
-
-       while (count > 0) {
-               if (signal_pending(current)) {
-                       mutex_unlock(&rio500_mutex);
-                       return read_count ? read_count : -EINTR;
-               }
-               if (!rio->rio_dev) {
-                       mutex_unlock(&rio500_mutex);
-                       return -ENODEV;
-               }
-               this_read = (count >= IBUF_SIZE) ? IBUF_SIZE : count;
-
-               result = usb_bulk_msg(rio->rio_dev,
-                                     usb_rcvbulkpipe(rio->rio_dev, 1),
-                                     ibuf, this_read, &partial,
-                                     8000);
-
-               dev_dbg(&rio->rio_dev->dev,
-                       "read stats: result:%d this_read:%u partial:%u\n",
-                       result, this_read, partial);
-
-               if (partial) {
-                       count = this_read = partial;
-               } else if (result == -ETIMEDOUT || result == 15) {      /* FIXME: 15 ??? */
-                       if (!maxretry--) {
-                               mutex_unlock(&rio500_mutex);
-                               dev_err(&rio->rio_dev->dev,
-                                       "read_rio: maxretry timeout\n");
-                               return -ETIME;
-                       }
-                       prepare_to_wait(&rio->wait_q, &wait, TASK_INTERRUPTIBLE);
-                       schedule_timeout(NAK_TIMEOUT);
-                       finish_wait(&rio->wait_q, &wait);
-                       continue;
-               } else if (result != -EREMOTEIO) {
-                       mutex_unlock(&rio500_mutex);
-                       dev_err(&rio->rio_dev->dev,
-                               "Read Whoops - result:%d partial:%u this_read:%u\n",
-                               result, partial, this_read);
-                       return -EIO;
-               } else {
-                       mutex_unlock(&rio500_mutex);
-                       return (0);
-               }
-
-               if (this_read) {
-                       if (copy_to_user(buffer, ibuf, this_read)) {
-                               mutex_unlock(&rio500_mutex);
-                               return -EFAULT;
-                       }
-                       count -= this_read;
-                       read_count += this_read;
-                       buffer += this_read;
-               }
-       }
-       mutex_unlock(&rio500_mutex);
-       return read_count;
-}
-
-static const struct file_operations usb_rio_fops = {
-       .owner =        THIS_MODULE,
-       .read =         read_rio,
-       .write =        write_rio,
-       .unlocked_ioctl = ioctl_rio,
-       .open =         open_rio,
-       .release =      close_rio,
-       .llseek =       noop_llseek,
-};
-
-static struct usb_class_driver usb_rio_class = {
-       .name =         "rio500%d",
-       .fops =         &usb_rio_fops,
-       .minor_base =   RIO_MINOR,
-};
-
-static int probe_rio(struct usb_interface *intf,
-                    const struct usb_device_id *id)
-{
-       struct usb_device *dev = interface_to_usbdev(intf);
-       struct rio_usb_data *rio = &rio_instance;
-       int retval = -ENOMEM;
-       char *ibuf, *obuf;
-
-       if (rio->present) {
-               dev_info(&intf->dev, "Second USB Rio at address %d refused\n", dev->devnum);
-               return -EBUSY;
-       }
-       dev_info(&intf->dev, "USB Rio found at address %d\n", dev->devnum);
-
-       obuf = kmalloc(OBUF_SIZE, GFP_KERNEL);
-       if (!obuf) {
-               dev_err(&dev->dev,
-                       "probe_rio: Not enough memory for the output buffer\n");
-               goto err_obuf;
-       }
-       dev_dbg(&intf->dev, "obuf address: %p\n", obuf);
-
-       ibuf = kmalloc(IBUF_SIZE, GFP_KERNEL);
-       if (!ibuf) {
-               dev_err(&dev->dev,
-                       "probe_rio: Not enough memory for the input buffer\n");
-               goto err_ibuf;
-       }
-       dev_dbg(&intf->dev, "ibuf address: %p\n", ibuf);
-
-       mutex_lock(&rio500_mutex);
-       rio->rio_dev = dev;
-       rio->ibuf = ibuf;
-       rio->obuf = obuf;
-       rio->present = 1;
-       mutex_unlock(&rio500_mutex);
-
-       retval = usb_register_dev(intf, &usb_rio_class);
-       if (retval) {
-               dev_err(&dev->dev,
-                       "Not able to get a minor for this device.\n");
-               goto err_register;
-       }
-
-       usb_set_intfdata(intf, rio);
-       return retval;
-
- err_register:
-       mutex_lock(&rio500_mutex);
-       rio->present = 0;
-       mutex_unlock(&rio500_mutex);
- err_ibuf:
-       kfree(obuf);
- err_obuf:
-       return retval;
-}
-
-static void disconnect_rio(struct usb_interface *intf)
-{
-       struct rio_usb_data *rio = usb_get_intfdata (intf);
-
-       usb_set_intfdata (intf, NULL);
-       if (rio) {
-               usb_deregister_dev(intf, &usb_rio_class);
-
-               mutex_lock(&rio500_mutex);
-               if (rio->isopen) {
-                       rio->isopen = 0;
-                       /* better let it finish - the release will do whats needed */
-                       rio->rio_dev = NULL;
-                       mutex_unlock(&rio500_mutex);
-                       return;
-               }
-               kfree(rio->ibuf);
-               kfree(rio->obuf);
-
-               dev_info(&intf->dev, "USB Rio disconnected.\n");
-
-               rio->present = 0;
-               mutex_unlock(&rio500_mutex);
-       }
-}
-
-static const struct usb_device_id rio_table[] = {
-       { USB_DEVICE(0x0841, 1) },              /* Rio 500 */
-       { }                                     /* Terminating entry */
-};
-
-MODULE_DEVICE_TABLE (usb, rio_table);
-
-static struct usb_driver rio_driver = {
-       .name =         "rio500",
-       .probe =        probe_rio,
-       .disconnect =   disconnect_rio,
-       .id_table =     rio_table,
-};
-
-module_usb_driver(rio_driver);
-
-MODULE_AUTHOR( DRIVER_AUTHOR );
-MODULE_DESCRIPTION( DRIVER_DESC );
-MODULE_LICENSE("GPL");
-
diff --git a/drivers/usb/misc/rio500_usb.h b/drivers/usb/misc/rio500_usb.h
deleted file mode 100644 (file)
index 6db7a58..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*  ----------------------------------------------------------------------
-    Copyright (C) 2000  Cesar Miquel  (miquel@df.uba.ar)
-    ---------------------------------------------------------------------- */
-
-#define RIO_SEND_COMMAND                       0x1
-#define RIO_RECV_COMMAND                       0x2
-
-#define RIO_DIR_OUT                            0x0
-#define RIO_DIR_IN                             0x1
-
-struct RioCommand {
-       short length;
-       int request;
-       int requesttype;
-       int value;
-       int index;
-       void __user *buffer;
-       int timeout;
-};
index 9ba4a4e..61e9e98 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/uaccess.h>
 #include <linux/usb.h>
 
 #define IOCTL_GET_DRV_VERSION  2
 
 
-static DEFINE_MUTEX(lcd_mutex);
 static const struct usb_device_id id_table[] = {
        { .idVendor = 0x10D2, .match_flags = USB_DEVICE_ID_MATCH_VENDOR, },
        { },
 };
 MODULE_DEVICE_TABLE(usb, id_table);
 
-static DEFINE_MUTEX(open_disc_mutex);
-
-
 struct usb_lcd {
        struct usb_device       *udev;                  /* init: probe_lcd */
        struct usb_interface    *interface;             /* the interface for
@@ -57,6 +54,8 @@ struct usb_lcd {
                                                           using up all RAM */
        struct usb_anchor       submitted;              /* URBs to wait for
                                                           before suspend */
+       struct rw_semaphore     io_rwsem;
+       unsigned long           disconnected:1;
 };
 #define to_lcd_dev(d) container_of(d, struct usb_lcd, kref)
 
@@ -81,40 +80,29 @@ static int lcd_open(struct inode *inode, struct file *file)
        struct usb_interface *interface;
        int subminor, r;
 
-       mutex_lock(&lcd_mutex);
        subminor = iminor(inode);
 
        interface = usb_find_interface(&lcd_driver, subminor);
        if (!interface) {
-               mutex_unlock(&lcd_mutex);
-               printk(KERN_ERR "USBLCD: %s - error, can't find device for minor %d\n",
+               pr_err("USBLCD: %s - error, can't find device for minor %d\n",
                       __func__, subminor);
                return -ENODEV;
        }
 
-       mutex_lock(&open_disc_mutex);
        dev = usb_get_intfdata(interface);
-       if (!dev) {
-               mutex_unlock(&open_disc_mutex);
-               mutex_unlock(&lcd_mutex);
-               return -ENODEV;
-       }
 
        /* increment our usage count for the device */
        kref_get(&dev->kref);
-       mutex_unlock(&open_disc_mutex);
 
        /* grab a power reference */
        r = usb_autopm_get_interface(interface);
        if (r < 0) {
                kref_put(&dev->kref, lcd_delete);
-               mutex_unlock(&lcd_mutex);
                return r;
        }
 
        /* save our object in the file's private structure */
        file->private_data = dev;
-       mutex_unlock(&lcd_mutex);
 
        return 0;
 }
@@ -142,6 +130,13 @@ static ssize_t lcd_read(struct file *file, char __user * buffer,
 
        dev = file->private_data;
 
+       down_read(&dev->io_rwsem);
+
+       if (dev->disconnected) {
+               retval = -ENODEV;
+               goto out_up_io;
+       }
+
        /* do a blocking bulk read to get data from the device */
        retval = usb_bulk_msg(dev->udev,
                              usb_rcvbulkpipe(dev->udev,
@@ -158,6 +153,9 @@ static ssize_t lcd_read(struct file *file, char __user * buffer,
                        retval = bytes_read;
        }
 
+out_up_io:
+       up_read(&dev->io_rwsem);
+
        return retval;
 }
 
@@ -173,14 +171,12 @@ static long lcd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
        switch (cmd) {
        case IOCTL_GET_HARD_VERSION:
-               mutex_lock(&lcd_mutex);
                bcdDevice = le16_to_cpu((dev->udev)->descriptor.bcdDevice);
                sprintf(buf, "%1d%1d.%1d%1d",
                        (bcdDevice & 0xF000)>>12,
                        (bcdDevice & 0xF00)>>8,
                        (bcdDevice & 0xF0)>>4,
                        (bcdDevice & 0xF));
-               mutex_unlock(&lcd_mutex);
                if (copy_to_user((void __user *)arg, buf, strlen(buf)) != 0)
                        return -EFAULT;
                break;
@@ -237,11 +233,18 @@ static ssize_t lcd_write(struct file *file, const char __user * user_buffer,
        if (r < 0)
                return -EINTR;
 
+       down_read(&dev->io_rwsem);
+
+       if (dev->disconnected) {
+               retval = -ENODEV;
+               goto err_up_io;
+       }
+
        /* create a urb, and a buffer for it, and copy the data to the urb */
        urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!urb) {
                retval = -ENOMEM;
-               goto err_no_buf;
+               goto err_up_io;
        }
 
        buf = usb_alloc_coherent(dev->udev, count, GFP_KERNEL,
@@ -278,6 +281,7 @@ static ssize_t lcd_write(struct file *file, const char __user * user_buffer,
           the USB core will eventually free it entirely */
        usb_free_urb(urb);
 
+       up_read(&dev->io_rwsem);
 exit:
        return count;
 error_unanchor:
@@ -285,7 +289,8 @@ error_unanchor:
 error:
        usb_free_coherent(dev->udev, count, buf, urb->transfer_dma);
        usb_free_urb(urb);
-err_no_buf:
+err_up_io:
+       up_read(&dev->io_rwsem);
        up(&dev->limit_sem);
        return retval;
 }
@@ -325,6 +330,7 @@ static int lcd_probe(struct usb_interface *interface,
 
        kref_init(&dev->kref);
        sema_init(&dev->limit_sem, USB_LCD_CONCURRENT_WRITES);
+       init_rwsem(&dev->io_rwsem);
        init_usb_anchor(&dev->submitted);
 
        dev->udev = usb_get_dev(interface_to_usbdev(interface));
@@ -365,7 +371,6 @@ static int lcd_probe(struct usb_interface *interface,
                /* something prevented us from registering this driver */
                dev_err(&interface->dev,
                        "Not able to get a minor for this device.\n");
-               usb_set_intfdata(interface, NULL);
                goto error;
        }
 
@@ -411,17 +416,18 @@ static int lcd_resume(struct usb_interface *intf)
 
 static void lcd_disconnect(struct usb_interface *interface)
 {
-       struct usb_lcd *dev;
+       struct usb_lcd *dev = usb_get_intfdata(interface);
        int minor = interface->minor;
 
-       mutex_lock(&open_disc_mutex);
-       dev = usb_get_intfdata(interface);
-       usb_set_intfdata(interface, NULL);
-       mutex_unlock(&open_disc_mutex);
-
        /* give back our minor */
        usb_deregister_dev(interface, &lcd_class);
 
+       down_write(&dev->io_rwsem);
+       dev->disconnected = 1;
+       up_write(&dev->io_rwsem);
+
+       usb_kill_anchored_urbs(&dev->submitted);
+
        /* decrement our usage count */
        kref_put(&dev->kref, lcd_delete);
 
index 6715a12..be0505b 100644 (file)
@@ -60,6 +60,7 @@ struct usb_yurex {
 
        struct kref             kref;
        struct mutex            io_mutex;
+       unsigned long           disconnected:1;
        struct fasync_struct    *async_queue;
        wait_queue_head_t       waitq;
 
@@ -107,6 +108,7 @@ static void yurex_delete(struct kref *kref)
                                dev->int_buffer, dev->urb->transfer_dma);
                usb_free_urb(dev->urb);
        }
+       usb_put_intf(dev->interface);
        usb_put_dev(dev->udev);
        kfree(dev);
 }
@@ -132,6 +134,7 @@ static void yurex_interrupt(struct urb *urb)
        switch (status) {
        case 0: /*success*/
                break;
+       /* The device is terminated or messed up, give up */
        case -EOVERFLOW:
                dev_err(&dev->interface->dev,
                        "%s - overflow with length %d, actual length is %d\n",
@@ -140,12 +143,13 @@ static void yurex_interrupt(struct urb *urb)
        case -ENOENT:
        case -ESHUTDOWN:
        case -EILSEQ:
-               /* The device is terminated, clean up */
+       case -EPROTO:
+       case -ETIME:
                return;
        default:
                dev_err(&dev->interface->dev,
                        "%s - unknown status received: %d\n", __func__, status);
-               goto exit;
+               return;
        }
 
        /* handle received message */
@@ -177,7 +181,6 @@ static void yurex_interrupt(struct urb *urb)
                break;
        }
 
-exit:
        retval = usb_submit_urb(dev->urb, GFP_ATOMIC);
        if (retval) {
                dev_err(&dev->interface->dev, "%s - usb_submit_urb failed: %d\n",
@@ -204,7 +207,7 @@ static int yurex_probe(struct usb_interface *interface, const struct usb_device_
        init_waitqueue_head(&dev->waitq);
 
        dev->udev = usb_get_dev(interface_to_usbdev(interface));
-       dev->interface = interface;
+       dev->interface = usb_get_intf(interface);
 
        /* set up the endpoint information */
        iface_desc = interface->cur_altsetting;
@@ -315,8 +318,9 @@ static void yurex_disconnect(struct usb_interface *interface)
 
        /* prevent more I/O from starting */
        usb_poison_urb(dev->urb);
+       usb_poison_urb(dev->cntl_urb);
        mutex_lock(&dev->io_mutex);
-       dev->interface = NULL;
+       dev->disconnected = 1;
        mutex_unlock(&dev->io_mutex);
 
        /* wakeup waiters */
@@ -404,7 +408,7 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count,
        dev = file->private_data;
 
        mutex_lock(&dev->io_mutex);
-       if (!dev->interface) {          /* already disconnected */
+       if (dev->disconnected) {                /* already disconnected */
                mutex_unlock(&dev->io_mutex);
                return -ENODEV;
        }
@@ -439,7 +443,7 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer,
                goto error;
 
        mutex_lock(&dev->io_mutex);
-       if (!dev->interface) {          /* already disconnected */
+       if (dev->disconnected) {                /* already disconnected */
                mutex_unlock(&dev->io_mutex);
                retval = -ENODEV;
                goto error;
index c3d5c12..9dd0216 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/platform_device.h>
 
 #include "mtu3.h"
+#include "mtu3_dr.h"
 #include "mtu3_debug.h"
 #include "mtu3_trace.h"
 
index 4c3de77..a3c30b6 100644 (file)
@@ -162,17 +162,17 @@ void usbhs_usbreq_get_val(struct usbhs_priv *priv, struct usb_ctrlrequest *req)
        req->bRequest           = (val >> 8) & 0xFF;
        req->bRequestType       = (val >> 0) & 0xFF;
 
-       req->wValue     = usbhs_read(priv, USBVAL);
-       req->wIndex     = usbhs_read(priv, USBINDX);
-       req->wLength    = usbhs_read(priv, USBLENG);
+       req->wValue     = cpu_to_le16(usbhs_read(priv, USBVAL));
+       req->wIndex     = cpu_to_le16(usbhs_read(priv, USBINDX));
+       req->wLength    = cpu_to_le16(usbhs_read(priv, USBLENG));
 }
 
 void usbhs_usbreq_set_val(struct usbhs_priv *priv, struct usb_ctrlrequest *req)
 {
        usbhs_write(priv, USBREQ,  (req->bRequest << 8) | req->bRequestType);
-       usbhs_write(priv, USBVAL,  req->wValue);
-       usbhs_write(priv, USBINDX, req->wIndex);
-       usbhs_write(priv, USBLENG, req->wLength);
+       usbhs_write(priv, USBVAL,  le16_to_cpu(req->wValue));
+       usbhs_write(priv, USBINDX, le16_to_cpu(req->wIndex));
+       usbhs_write(priv, USBLENG, le16_to_cpu(req->wLength));
 
        usbhs_bset(priv, DCPCTR, SUREQ, SUREQ);
 }
index d1a0a35..0824099 100644 (file)
@@ -211,6 +211,7 @@ struct usbhs_priv;
 /* DCPCTR */
 #define BSTS           (1 << 15)       /* Buffer Status */
 #define SUREQ          (1 << 14)       /* Sending SETUP Token */
+#define INBUFM         (1 << 14)       /* (PIPEnCTR) Transfer Buffer Monitor */
 #define CSSTS          (1 << 12)       /* CSSTS Status */
 #define        ACLRM           (1 << 9)        /* Buffer Auto-Clear Mode */
 #define SQCLR          (1 << 8)        /* Toggle Bit Clear */
index 2a01ceb..86637cd 100644 (file)
@@ -89,7 +89,7 @@ static void __usbhsf_pkt_del(struct usbhs_pkt *pkt)
        list_del_init(&pkt->node);
 }
 
-static struct usbhs_pkt *__usbhsf_pkt_get(struct usbhs_pipe *pipe)
+struct usbhs_pkt *__usbhsf_pkt_get(struct usbhs_pipe *pipe)
 {
        return list_first_entry_or_null(&pipe->list, struct usbhs_pkt, node);
 }
index 88d1816..c3d3cc3 100644 (file)
@@ -97,5 +97,6 @@ void usbhs_pkt_push(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt,
                    void *buf, int len, int zero, int sequence);
 struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt);
 void usbhs_pkt_start(struct usbhs_pipe *pipe);
+struct usbhs_pkt *__usbhsf_pkt_get(struct usbhs_pipe *pipe);
 
 #endif /* RENESAS_USB_FIFO_H */
index 4d571a5..cd38d74 100644 (file)
@@ -265,7 +265,7 @@ static int usbhsg_recip_handler_std_set_device(struct usbhs_priv *priv,
        case USB_DEVICE_TEST_MODE:
                usbhsg_recip_handler_std_control_done(priv, uep, ctrl);
                udelay(100);
-               usbhs_sys_set_test_mode(priv, le16_to_cpu(ctrl->wIndex >> 8));
+               usbhs_sys_set_test_mode(priv, le16_to_cpu(ctrl->wIndex) >> 8);
                break;
        default:
                usbhsg_recip_handler_std_control_done(priv, uep, ctrl);
@@ -315,7 +315,7 @@ static void __usbhsg_recip_send_status(struct usbhsg_gpriv *gpriv,
        struct usbhs_pipe *pipe = usbhsg_uep_to_pipe(dcp);
        struct device *dev = usbhsg_gpriv_to_dev(gpriv);
        struct usb_request *req;
-       unsigned short *buf;
+       __le16 *buf;
 
        /* alloc new usb_request for recip */
        req = usb_ep_alloc_request(&dcp->ep, GFP_ATOMIC);
@@ -722,8 +722,7 @@ static int __usbhsg_ep_set_halt_wedge(struct usb_ep *ep, int halt, int wedge)
        struct usbhs_priv *priv = usbhsg_gpriv_to_priv(gpriv);
        struct device *dev = usbhsg_gpriv_to_dev(gpriv);
        unsigned long flags;
-
-       usbhsg_pipe_disable(uep);
+       int ret = 0;
 
        dev_dbg(dev, "set halt %d (pipe %d)\n",
                halt, usbhs_pipe_number(pipe));
@@ -731,6 +730,18 @@ static int __usbhsg_ep_set_halt_wedge(struct usb_ep *ep, int halt, int wedge)
        /********************  spin lock ********************/
        usbhs_lock(priv, flags);
 
+       /*
+        * According to usb_ep_set_halt()'s description, this function should
+        * return -EAGAIN if the IN endpoint has any queue or data. Note
+        * that the usbhs_pipe_is_dir_in() returns false if the pipe is an
+        * IN endpoint in the gadget mode.
+        */
+       if (!usbhs_pipe_is_dir_in(pipe) && (__usbhsf_pkt_get(pipe) ||
+           usbhs_pipe_contains_transmittable_data(pipe))) {
+               ret = -EAGAIN;
+               goto out;
+       }
+
        if (halt)
                usbhs_pipe_stall(pipe);
        else
@@ -741,10 +752,11 @@ static int __usbhsg_ep_set_halt_wedge(struct usb_ep *ep, int halt, int wedge)
        else
                usbhsg_status_clr(gpriv, USBHSG_STATUS_WEDGE);
 
+out:
        usbhs_unlock(priv, flags);
        /********************  spin unlock ******************/
 
-       return 0;
+       return ret;
 }
 
 static int usbhsg_ep_set_halt(struct usb_ep *ep, int value)
index c4922b9..9e5afdd 100644 (file)
@@ -277,6 +277,21 @@ int usbhs_pipe_is_accessible(struct usbhs_pipe *pipe)
        return -EBUSY;
 }
 
+bool usbhs_pipe_contains_transmittable_data(struct usbhs_pipe *pipe)
+{
+       u16 val;
+
+       /* Do not support for DCP pipe */
+       if (usbhs_pipe_is_dcp(pipe))
+               return false;
+
+       val = usbhsp_pipectrl_get(pipe);
+       if (val & INBUFM)
+               return true;
+
+       return false;
+}
+
 /*
  *             PID ctrl
  */
index 3080423..3b13052 100644 (file)
@@ -83,6 +83,7 @@ void usbhs_pipe_clear(struct usbhs_pipe *pipe);
 void usbhs_pipe_clear_without_sequence(struct usbhs_pipe *pipe,
                                       int needs_bfre, int bfre_enable);
 int usbhs_pipe_is_accessible(struct usbhs_pipe *pipe);
+bool usbhs_pipe_contains_transmittable_data(struct usbhs_pipe *pipe);
 void usbhs_pipe_enable(struct usbhs_pipe *pipe);
 void usbhs_pipe_disable(struct usbhs_pipe *pipe);
 void usbhs_pipe_stall(struct usbhs_pipe *pipe);
index f0688c4..25e81fa 100644 (file)
@@ -1030,6 +1030,9 @@ static const struct usb_device_id id_table_combined[] = {
        /* EZPrototypes devices */
        { USB_DEVICE(EZPROTOTYPES_VID, HJELMSLUND_USB485_ISO_PID) },
        { USB_DEVICE_INTERFACE_NUMBER(UNJO_VID, UNJO_ISODEBUG_V1_PID, 1) },
+       /* Sienna devices */
+       { USB_DEVICE(FTDI_VID, FTDI_SIENNA_PID) },
+       { USB_DEVICE(ECHELON_VID, ECHELON_U20_PID) },
        { }                                     /* Terminating entry */
 };
 
index f12d806..22d6621 100644 (file)
@@ -39,6 +39,9 @@
 
 #define FTDI_LUMEL_PD12_PID    0x6002
 
+/* Sienna Serial Interface by Secyourit GmbH */
+#define FTDI_SIENNA_PID                0x8348
+
 /* Cyber Cortex AV by Fabulous Silicon (http://fabuloussilicon.com) */
 #define CYBER_CORTEX_AV_PID    0x8698
 
 #define BANDB_TTL3USB9M_PID    0xAC50
 #define BANDB_ZZ_PROG1_USB_PID 0xBA02
 
+/*
+ * Echelon USB Serial Interface
+ */
+#define ECHELON_VID            0x0920
+#define ECHELON_U20_PID                0x7500
+
 /*
  * Intrepid Control Systems (http://www.intrepidcs.com/) ValueCAN and NeoVI
  */
index d34779f..e66a59e 100644 (file)
@@ -1741,8 +1741,8 @@ static struct urb *keyspan_setup_urb(struct usb_serial *serial, int endpoint,
 
        ep_desc = find_ep(serial, endpoint);
        if (!ep_desc) {
-               /* leak the urb, something's wrong and the callers don't care */
-               return urb;
+               usb_free_urb(urb);
+               return NULL;
        }
        if (usb_endpoint_xfer_int(ep_desc)) {
                ep_type_name = "INT";
index 38e920a..06ab016 100644 (file)
@@ -419,6 +419,7 @@ static void option_instat_callback(struct urb *urb);
 #define CINTERION_PRODUCT_PH8_AUDIO            0x0083
 #define CINTERION_PRODUCT_AHXX_2RMNET          0x0084
 #define CINTERION_PRODUCT_AHXX_AUDIO           0x0085
+#define CINTERION_PRODUCT_CLS8                 0x00b0
 
 /* Olivetti products */
 #define OLIVETTI_VENDOR_ID                     0x0b3c
@@ -1154,6 +1155,14 @@ static const struct usb_device_id option_ids[] = {
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff),
          .driver_info = RSVD(0) | RSVD(1) | NCTRL(2) | RSVD(3) },
+       { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1050, 0xff),    /* Telit FN980 (rmnet) */
+         .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
+       { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1051, 0xff),    /* Telit FN980 (MBIM) */
+         .driver_info = NCTRL(0) | RSVD(1) },
+       { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1052, 0xff),    /* Telit FN980 (RNDIS) */
+         .driver_info = NCTRL(2) | RSVD(3) },
+       { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1053, 0xff),    /* Telit FN980 (ECM) */
+         .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
@@ -1847,6 +1856,8 @@ static const struct usb_device_id option_ids[] = {
          .driver_info = RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX_2RMNET, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX_AUDIO, 0xff) },
+       { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_CLS8, 0xff),
+         .driver_info = RSVD(0) | RSVD(4) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDMNET) },
        { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC25_MDM) },
index dd0ad67..ef23acc 100644 (file)
@@ -776,7 +776,6 @@ static void ti_close(struct usb_serial_port *port)
        struct ti_port *tport;
        int port_number;
        int status;
-       int do_unlock;
        unsigned long flags;
 
        tdev = usb_get_serial_data(port->serial);
@@ -800,16 +799,13 @@ static void ti_close(struct usb_serial_port *port)
                        "%s - cannot send close port command, %d\n"
                                                        , __func__, status);
 
-       /* if mutex_lock is interrupted, continue anyway */
-       do_unlock = !mutex_lock_interruptible(&tdev->td_open_close_lock);
-       --tport->tp_tdev->td_open_port_count;
-       if (tport->tp_tdev->td_open_port_count <= 0) {
+       mutex_lock(&tdev->td_open_close_lock);
+       --tdev->td_open_port_count;
+       if (tdev->td_open_port_count == 0) {
                /* last port is closed, shut down interrupt urb */
                usb_kill_urb(port->serial->port[0]->interrupt_in_urb);
-               tport->tp_tdev->td_open_port_count = 0;
        }
-       if (do_unlock)
-               mutex_unlock(&tdev->td_open_close_lock);
+       mutex_unlock(&tdev->td_open_close_lock);
 }
 
 
index a3179fe..8f066bb 100644 (file)
@@ -314,10 +314,7 @@ static void serial_cleanup(struct tty_struct *tty)
        serial = port->serial;
        owner = serial->type->driver.owner;
 
-       mutex_lock(&serial->disc_mutex);
-       if (!serial->disconnected)
-               usb_autopm_put_interface(serial->interface);
-       mutex_unlock(&serial->disc_mutex);
+       usb_autopm_put_interface(serial->interface);
 
        usb_serial_put(serial);
        module_put(owner);
index 79314d8..ca3bd58 100644 (file)
@@ -559,6 +559,10 @@ static int firm_send_command(struct usb_serial_port *port, __u8 command,
 
        command_port = port->serial->port[COMMAND_PORT];
        command_info = usb_get_serial_port_data(command_port);
+
+       if (command_port->bulk_out_size < datasize + 1)
+               return -EIO;
+
        mutex_lock(&command_info->mutex);
        command_info->command_finished = false;
 
@@ -632,6 +636,7 @@ static void firm_setup_port(struct tty_struct *tty)
        struct device *dev = &port->dev;
        struct whiteheat_port_settings port_settings;
        unsigned int cflag = tty->termios.c_cflag;
+       speed_t baud;
 
        port_settings.port = port->port_number + 1;
 
@@ -692,11 +697,13 @@ static void firm_setup_port(struct tty_struct *tty)
        dev_dbg(dev, "%s - XON = %2x, XOFF = %2x\n", __func__, port_settings.xon, port_settings.xoff);
 
        /* get the baud rate wanted */
-       port_settings.baud = tty_get_baud_rate(tty);
-       dev_dbg(dev, "%s - baud rate = %d\n", __func__, port_settings.baud);
+       baud = tty_get_baud_rate(tty);
+       port_settings.baud = cpu_to_le32(baud);
+       dev_dbg(dev, "%s - baud rate = %u\n", __func__, baud);
 
        /* fixme: should set validated settings */
-       tty_encode_baud_rate(tty, port_settings.baud, port_settings.baud);
+       tty_encode_baud_rate(tty, baud, baud);
+
        /* handle any settings that aren't specified in the tty structure */
        port_settings.lloop = 0;
 
index 0039814..269e727 100644 (file)
@@ -87,7 +87,7 @@ struct whiteheat_simple {
 
 struct whiteheat_port_settings {
        __u8    port;           /* port number (1 to N) */
-       __u32   baud;           /* any value 7 - 460800, firmware calculates
+       __le32  baud;           /* any value 7 - 460800, firmware calculates
                                   best fit; arrives little endian */
        __u8    bits;           /* 5, 6, 7, or 8 */
        __u8    stop;           /* 1 or 2, default 1 (2 = 1.5 if bits = 5) */
index 6737fab..54a3c81 100644 (file)
@@ -68,7 +68,6 @@ static const char* host_info(struct Scsi_Host *host)
 static int slave_alloc (struct scsi_device *sdev)
 {
        struct us_data *us = host_to_us(sdev->host);
-       int maxp;
 
        /*
         * Set the INQUIRY transfer length to 36.  We don't use any of
@@ -77,15 +76,6 @@ static int slave_alloc (struct scsi_device *sdev)
         */
        sdev->inquiry_len = 36;
 
-       /*
-        * USB has unusual scatter-gather requirements: the length of each
-        * scatterlist element except the last must be divisible by the
-        * Bulk maxpacket value.  Fortunately this value is always a
-        * power of 2.  Inform the block layer about this requirement.
-        */
-       maxp = usb_maxpacket(us->pusb_dev, us->recv_bulk_pipe, 0);
-       blk_queue_virt_boundary(sdev->request_queue, maxp - 1);
-
        /*
         * Some host controllers may have alignment requirements.
         * We'll play it safe by requiring 512-byte alignment always.
index bf80d6f..3453825 100644 (file)
@@ -789,29 +789,9 @@ static int uas_slave_alloc(struct scsi_device *sdev)
 {
        struct uas_dev_info *devinfo =
                (struct uas_dev_info *)sdev->host->hostdata;
-       int maxp;
 
        sdev->hostdata = devinfo;
 
-       /*
-        * We have two requirements here. We must satisfy the requirements
-        * of the physical HC and the demands of the protocol, as we
-        * definitely want no additional memory allocation in this path
-        * ruling out using bounce buffers.
-        *
-        * For a transmission on USB to continue we must never send
-        * a package that is smaller than maxpacket. Hence the length of each
-         * scatterlist element except the last must be divisible by the
-         * Bulk maxpacket value.
-        * If the HC does not ensure that through SG,
-        * the upper layer must do that. We must assume nothing
-        * about the capabilities off the HC, so we use the most
-        * pessimistic requirement.
-        */
-
-       maxp = usb_maxpacket(devinfo->udev, devinfo->data_in_pipe, 0);
-       blk_queue_virt_boundary(sdev->request_queue, maxp - 1);
-
        /*
         * The protocol has no requirements on alignment in the strict sense.
         * Controllers may or may not have alignment restrictions.
index 9656274..5f61d99 100644 (file)
@@ -4409,18 +4409,20 @@ static int tcpm_fw_get_caps(struct tcpm_port *port,
        /* USB data support is optional */
        ret = fwnode_property_read_string(fwnode, "data-role", &cap_str);
        if (ret == 0) {
-               port->typec_caps.data = typec_find_port_data_role(cap_str);
-               if (port->typec_caps.data < 0)
-                       return -EINVAL;
+               ret = typec_find_port_data_role(cap_str);
+               if (ret < 0)
+                       return ret;
+               port->typec_caps.data = ret;
        }
 
        ret = fwnode_property_read_string(fwnode, "power-role", &cap_str);
        if (ret < 0)
                return ret;
 
-       port->typec_caps.type = typec_find_port_power_role(cap_str);
-       if (port->typec_caps.type < 0)
-               return -EINVAL;
+       ret = typec_find_port_power_role(cap_str);
+       if (ret < 0)
+               return ret;
+       port->typec_caps.type = ret;
        port->port_type = port->typec_caps.type;
 
        if (port->port_type == TYPEC_PORT_SNK)
index 6c10369..d99700c 100644 (file)
@@ -75,6 +75,8 @@ static int ucsi_displayport_enter(struct typec_altmode *alt)
 
        if (cur != 0xff) {
                mutex_unlock(&dp->con->lock);
+               if (dp->con->port_altmode[cur] == alt)
+                       return 0;
                return -EBUSY;
        }
 
index 907e20e..d772fce 100644 (file)
@@ -195,7 +195,6 @@ struct ucsi_ccg {
 
        /* fw build with vendor information */
        u16 fw_build;
-       bool run_isr; /* flag to call ISR routine during resume */
        struct work_struct pm_work;
 };
 
@@ -224,18 +223,6 @@ static int ccg_read(struct ucsi_ccg *uc, u16 rab, u8 *data, u32 len)
        if (quirks && quirks->max_read_len)
                max_read_len = quirks->max_read_len;
 
-       if (uc->fw_build == CCG_FW_BUILD_NVIDIA &&
-           uc->fw_version <= CCG_OLD_FW_VERSION) {
-               mutex_lock(&uc->lock);
-               /*
-                * Do not schedule pm_work to run ISR in
-                * ucsi_ccg_runtime_resume() after pm_runtime_get_sync()
-                * since we are already in ISR path.
-                */
-               uc->run_isr = false;
-               mutex_unlock(&uc->lock);
-       }
-
        pm_runtime_get_sync(uc->dev);
        while (rem_len > 0) {
                msgs[1].buf = &data[len - rem_len];
@@ -278,18 +265,6 @@ static int ccg_write(struct ucsi_ccg *uc, u16 rab, u8 *data, u32 len)
        msgs[0].len = len + sizeof(rab);
        msgs[0].buf = buf;
 
-       if (uc->fw_build == CCG_FW_BUILD_NVIDIA &&
-           uc->fw_version <= CCG_OLD_FW_VERSION) {
-               mutex_lock(&uc->lock);
-               /*
-                * Do not schedule pm_work to run ISR in
-                * ucsi_ccg_runtime_resume() after pm_runtime_get_sync()
-                * since we are already in ISR path.
-                */
-               uc->run_isr = false;
-               mutex_unlock(&uc->lock);
-       }
-
        pm_runtime_get_sync(uc->dev);
        status = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
        if (status < 0) {
@@ -1130,7 +1105,6 @@ static int ucsi_ccg_probe(struct i2c_client *client,
        uc->ppm.sync = ucsi_ccg_sync;
        uc->dev = dev;
        uc->client = client;
-       uc->run_isr = true;
        mutex_init(&uc->lock);
        INIT_WORK(&uc->work, ccg_update_firmware);
        INIT_WORK(&uc->pm_work, ccg_pm_workaround_work);
@@ -1188,6 +1162,8 @@ static int ucsi_ccg_probe(struct i2c_client *client,
 
        pm_runtime_set_active(uc->dev);
        pm_runtime_enable(uc->dev);
+       pm_runtime_use_autosuspend(uc->dev);
+       pm_runtime_set_autosuspend_delay(uc->dev, 5000);
        pm_runtime_idle(uc->dev);
 
        return 0;
@@ -1229,7 +1205,6 @@ static int ucsi_ccg_runtime_resume(struct device *dev)
 {
        struct i2c_client *client = to_i2c_client(dev);
        struct ucsi_ccg *uc = i2c_get_clientdata(client);
-       bool schedule = true;
 
        /*
         * Firmware version 3.1.10 or earlier, built for NVIDIA has known issue
@@ -1237,17 +1212,8 @@ static int ucsi_ccg_runtime_resume(struct device *dev)
         * Schedule a work to call ISR as a workaround.
         */
        if (uc->fw_build == CCG_FW_BUILD_NVIDIA &&
-           uc->fw_version <= CCG_OLD_FW_VERSION) {
-               mutex_lock(&uc->lock);
-               if (!uc->run_isr) {
-                       uc->run_isr = true;
-                       schedule = false;
-               }
-               mutex_unlock(&uc->lock);
-
-               if (schedule)
-                       schedule_work(&uc->pm_work);
-       }
+           uc->fw_version <= CCG_OLD_FW_VERSION)
+               schedule_work(&uc->pm_work);
 
        return 0;
 }
index c31d17d..2dc5876 100644 (file)
@@ -61,6 +61,7 @@ struct usb_skel {
        spinlock_t              err_lock;               /* lock for errors */
        struct kref             kref;
        struct mutex            io_mutex;               /* synchronize I/O with disconnect */
+       unsigned long           disconnected:1;
        wait_queue_head_t       bulk_in_wait;           /* to wait for an ongoing read */
 };
 #define to_skel_dev(d) container_of(d, struct usb_skel, kref)
@@ -73,6 +74,7 @@ static void skel_delete(struct kref *kref)
        struct usb_skel *dev = to_skel_dev(kref);
 
        usb_free_urb(dev->bulk_in_urb);
+       usb_put_intf(dev->interface);
        usb_put_dev(dev->udev);
        kfree(dev->bulk_in_buffer);
        kfree(dev);
@@ -124,10 +126,7 @@ static int skel_release(struct inode *inode, struct file *file)
                return -ENODEV;
 
        /* allow the device to be autosuspended */
-       mutex_lock(&dev->io_mutex);
-       if (dev->interface)
-               usb_autopm_put_interface(dev->interface);
-       mutex_unlock(&dev->io_mutex);
+       usb_autopm_put_interface(dev->interface);
 
        /* decrement the count on our device */
        kref_put(&dev->kref, skel_delete);
@@ -231,8 +230,7 @@ static ssize_t skel_read(struct file *file, char *buffer, size_t count,
 
        dev = file->private_data;
 
-       /* if we cannot read at all, return EOF */
-       if (!dev->bulk_in_urb || !count)
+       if (!count)
                return 0;
 
        /* no concurrent readers */
@@ -240,7 +238,7 @@ static ssize_t skel_read(struct file *file, char *buffer, size_t count,
        if (rv < 0)
                return rv;
 
-       if (!dev->interface) {          /* disconnect() was called */
+       if (dev->disconnected) {                /* disconnect() was called */
                rv = -ENODEV;
                goto exit;
        }
@@ -422,7 +420,7 @@ static ssize_t skel_write(struct file *file, const char *user_buffer,
 
        /* this lock makes sure we don't submit URBs to gone devices */
        mutex_lock(&dev->io_mutex);
-       if (!dev->interface) {          /* disconnect() was called */
+       if (dev->disconnected) {                /* disconnect() was called */
                mutex_unlock(&dev->io_mutex);
                retval = -ENODEV;
                goto error;
@@ -507,7 +505,7 @@ static int skel_probe(struct usb_interface *interface,
        init_waitqueue_head(&dev->bulk_in_wait);
 
        dev->udev = usb_get_dev(interface_to_usbdev(interface));
-       dev->interface = interface;
+       dev->interface = usb_get_intf(interface);
 
        /* set up the endpoint information */
        /* use only the first bulk-in and bulk-out endpoints */
@@ -573,9 +571,10 @@ static void skel_disconnect(struct usb_interface *interface)
 
        /* prevent more I/O from starting */
        mutex_lock(&dev->io_mutex);
-       dev->interface = NULL;
+       dev->disconnected = 1;
        mutex_unlock(&dev->io_mutex);
 
+       usb_kill_urb(dev->bulk_in_urb);
        usb_kill_anchored_urbs(&dev->submitted);
 
        /* decrement our usage count */
index 585a84d..65850e9 100644 (file)
@@ -1195,12 +1195,12 @@ static int vhci_start(struct usb_hcd *hcd)
        if (id == 0 && usb_hcd_is_primary_hcd(hcd)) {
                err = vhci_init_attr_group();
                if (err) {
-                       pr_err("init attr group\n");
+                       dev_err(hcd_dev(hcd), "init attr group failed, err = %d\n", err);
                        return err;
                }
                err = sysfs_create_group(&hcd_dev(hcd)->kobj, &vhci_attr_group);
                if (err) {
-                       pr_err("create sysfs files\n");
+                       dev_err(hcd_dev(hcd), "create sysfs files failed, err = %d\n", err);
                        vhci_finish_attr_group();
                        return err;
                }
index c380378..0ae40a1 100644 (file)
@@ -147,7 +147,10 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev)
                }
 
                kfree(iov);
+               /* This is only for isochronous case */
                kfree(iso_buffer);
+               iso_buffer = NULL;
+
                usbip_dbg_vhci_tx("send txdata\n");
 
                total_size += txsize;
index 3b18fa4..26cef65 100644 (file)
@@ -176,13 +176,13 @@ put_exit:
 }
 
 static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
-               unsigned int page_shift)
+               unsigned int it_page_shift)
 {
        struct page *page;
        unsigned long size = 0;
 
-       if (mm_iommu_is_devmem(mm, hpa, page_shift, &size))
-               return size == (1UL << page_shift);
+       if (mm_iommu_is_devmem(mm, hpa, it_page_shift, &size))
+               return size == (1UL << it_page_shift);
 
        page = pfn_to_page(hpa >> PAGE_SHIFT);
        /*
@@ -190,7 +190,7 @@ static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
         * a page we just found. Otherwise the hardware can get access to
         * a bigger memory chunk that it should.
         */
-       return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
+       return page_shift(compound_head(page)) >= it_page_shift;
 }
 
 static inline bool tce_groups_attached(struct tce_container *container)
index 9a50b05..d864277 100644 (file)
@@ -375,6 +375,8 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
 
        down_read(&mm->mmap_sem);
 
+       vaddr = untagged_addr(vaddr);
+
        vma = find_vma_intersection(mm, vaddr, vaddr + 1);
 
        if (vma && vma->vm_flags & VM_PFNMAP) {
@@ -1656,7 +1658,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
        struct bus_type *bus = NULL;
        int ret;
        bool resv_msi, msi_remap;
-       phys_addr_t resv_msi_base;
+       phys_addr_t resv_msi_base = 0;
        struct iommu_domain_geometry geo;
        LIST_HEAD(iova_copy);
        LIST_HEAD(group_resv_regions);
index 7804869..0563080 100644 (file)
@@ -161,6 +161,7 @@ static int vhost_test_release(struct inode *inode, struct file *f)
 
        vhost_test_stop(n, &private);
        vhost_test_flush(n);
+       vhost_dev_stop(&n->dev);
        vhost_dev_cleanup(&n->dev);
        /* We do an extra flush before freeing memory,
         * since jobs can re-queue themselves. */
@@ -237,6 +238,7 @@ static long vhost_test_reset_owner(struct vhost_test *n)
        }
        vhost_test_stop(n, &priv);
        vhost_test_flush(n);
+       vhost_dev_stop(&n->dev);
        vhost_dev_reset_owner(&n->dev, umem);
 done:
        mutex_unlock(&n->dev.mutex);
index 08ad0d1..a0a2d74 100644 (file)
@@ -852,6 +852,12 @@ static inline int xfer_kern(void *src, void *dst, size_t len)
        return 0;
 }
 
+static inline int kern_xfer(void *dst, void *src, size_t len)
+{
+       memcpy(dst, src, len);
+       return 0;
+}
+
 /**
  * vringh_init_kern - initialize a vringh for a kernelspace vring.
  * @vrh: the vringh to initialize.
@@ -958,7 +964,7 @@ EXPORT_SYMBOL(vringh_iov_pull_kern);
 ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
                             const void *src, size_t len)
 {
-       return vringh_iov_xfer(wiov, (void *)src, len, xfer_kern);
+       return vringh_iov_xfer(wiov, (void *)src, len, kern_xfer);
 }
 EXPORT_SYMBOL(vringh_iov_push_kern);
 
index 8b081d6..40676be 100644 (file)
@@ -10,7 +10,6 @@ menu "Backlight & LCD device support"
 #
 config LCD_CLASS_DEVICE
         tristate "Lowlevel LCD controls"
-       default m
        help
          This framework adds support for low-level control of LCD.
          Some framebuffer devices connect to platform-specific LCD modules
@@ -143,7 +142,6 @@ endif # LCD_CLASS_DEVICE
 #
 config BACKLIGHT_CLASS_DEVICE
         tristate "Lowlevel Backlight controls"
-       default m
        help
          This framework adds support for low-level control of the LCD
           backlight. This includes support for brightness and power.
index 5dc0710..cac3e35 100644 (file)
@@ -32,6 +32,12 @@ static const char *const backlight_types[] = {
        [BACKLIGHT_FIRMWARE] = "firmware",
 };
 
+static const char *const backlight_scale_types[] = {
+       [BACKLIGHT_SCALE_UNKNOWN]       = "unknown",
+       [BACKLIGHT_SCALE_LINEAR]        = "linear",
+       [BACKLIGHT_SCALE_NON_LINEAR]    = "non-linear",
+};
+
 #if defined(CONFIG_FB) || (defined(CONFIG_FB_MODULE) && \
                           defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE))
 /* This callback gets called when something important happens inside a
@@ -246,6 +252,18 @@ static ssize_t actual_brightness_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(actual_brightness);
 
+static ssize_t scale_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct backlight_device *bd = to_backlight_device(dev);
+
+       if (WARN_ON(bd->props.scale > BACKLIGHT_SCALE_NON_LINEAR))
+               return sprintf(buf, "unknown\n");
+
+       return sprintf(buf, "%s\n", backlight_scale_types[bd->props.scale]);
+}
+static DEVICE_ATTR_RO(scale);
+
 static struct class *backlight_class;
 
 #ifdef CONFIG_PM_SLEEP
@@ -292,6 +310,7 @@ static struct attribute *bl_device_attrs[] = {
        &dev_attr_brightness.attr,
        &dev_attr_actual_brightness.attr,
        &dev_attr_max_brightness.attr,
+       &dev_attr_scale.attr,
        &dev_attr_type.attr,
        NULL,
 };
index e84f308..18e053e 100644 (file)
@@ -59,13 +59,11 @@ static int gpio_backlight_probe_dt(struct platform_device *pdev,
                                   struct gpio_backlight *gbl)
 {
        struct device *dev = &pdev->dev;
-       enum gpiod_flags flags;
        int ret;
 
        gbl->def_value = device_property_read_bool(dev, "default-on");
-       flags = gbl->def_value ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
 
-       gbl->gpiod = devm_gpiod_get(dev, NULL, flags);
+       gbl->gpiod = devm_gpiod_get(dev, NULL, GPIOD_ASIS);
        if (IS_ERR(gbl->gpiod)) {
                ret = PTR_ERR(gbl->gpiod);
 
@@ -79,6 +77,22 @@ static int gpio_backlight_probe_dt(struct platform_device *pdev,
        return 0;
 }
 
+static int gpio_backlight_initial_power_state(struct gpio_backlight *gbl)
+{
+       struct device_node *node = gbl->dev->of_node;
+
+       /* Not booted with device tree or no phandle link to the node */
+       if (!node || !node->phandle)
+               return gbl->def_value ? FB_BLANK_UNBLANK : FB_BLANK_POWERDOWN;
+
+       /* if the enable GPIO is disabled, do not enable the backlight */
+       if (gpiod_get_value_cansleep(gbl->gpiod) == 0)
+               return FB_BLANK_POWERDOWN;
+
+       return FB_BLANK_UNBLANK;
+}
+
+
 static int gpio_backlight_probe(struct platform_device *pdev)
 {
        struct gpio_backlight_platform_data *pdata =
@@ -136,7 +150,9 @@ static int gpio_backlight_probe(struct platform_device *pdev)
                return PTR_ERR(bl);
        }
 
-       bl->props.brightness = gbl->def_value;
+       bl->props.power = gpio_backlight_initial_power_state(gbl);
+       bl->props.brightness = 1;
+
        backlight_update_status(bl);
 
        platform_set_drvdata(pdev, bl);
index b04b35d..2d8e819 100644 (file)
@@ -377,8 +377,7 @@ static int lm3630a_parse_led_sources(struct fwnode_handle *node,
        u32 sources[LM3630A_NUM_SINKS];
        int ret, num_sources, i;
 
-       num_sources = fwnode_property_read_u32_array(node, "led-sources", NULL,
-                                                    0);
+       num_sources = fwnode_property_count_u32(node, "led-sources");
        if (num_sources < 0)
                return default_led_sources;
        else if (num_sources > ARRAY_SIZE(sources))
index 35bc012..0e45685 100644 (file)
@@ -158,7 +158,7 @@ static int lms283gf05_probe(struct spi_device *spi)
                ret = devm_gpio_request_one(&spi->dev, pdata->reset_gpio,
                                GPIOF_DIR_OUT | (!pdata->reset_inverted ?
                                GPIOF_INIT_HIGH : GPIOF_INIT_LOW),
-                               "LMS285GF05 RESET");
+                               "LMS283GF05 RESET");
                if (ret)
                        return ret;
        }
index 2201b8c..746eebc 100644 (file)
@@ -387,6 +387,31 @@ int pwm_backlight_brightness_default(struct device *dev,
 }
 #endif
 
+static bool pwm_backlight_is_linear(struct platform_pwm_backlight_data *data)
+{
+       unsigned int nlevels = data->max_brightness + 1;
+       unsigned int min_val = data->levels[0];
+       unsigned int max_val = data->levels[nlevels - 1];
+       /*
+        * Multiplying by 128 means that even in pathological cases such
+        * as (max_val - min_val) == nlevels the error at max_val is less
+        * than 1%.
+        */
+       unsigned int slope = (128 * (max_val - min_val)) / nlevels;
+       unsigned int margin = (max_val - min_val) / 20; /* 5% */
+       int i;
+
+       for (i = 1; i < nlevels; i++) {
+               unsigned int linear_value = min_val + ((i * slope) / 128);
+               unsigned int delta = abs(linear_value - data->levels[i]);
+
+               if (delta > margin)
+                       return false;
+       }
+
+       return true;
+}
+
 static int pwm_backlight_initial_power_state(const struct pwm_bl_data *pb)
 {
        struct device_node *node = pb->dev->of_node;
@@ -536,6 +561,8 @@ static int pwm_backlight_probe(struct platform_device *pdev)
                goto err_alloc;
        }
 
+       memset(&props, 0, sizeof(struct backlight_properties));
+
        if (data->levels) {
                /*
                 * For the DT case, only when brightness levels is defined
@@ -548,6 +575,11 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
                        pb->levels = data->levels;
                }
+
+               if (pwm_backlight_is_linear(data))
+                       props.scale = BACKLIGHT_SCALE_LINEAR;
+               else
+                       props.scale = BACKLIGHT_SCALE_NON_LINEAR;
        } else if (!data->max_brightness) {
                /*
                 * If no brightness levels are provided and max_brightness is
@@ -574,6 +606,8 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
                        pb->levels = data->levels;
                }
+
+               props.scale = BACKLIGHT_SCALE_NON_LINEAR;
        } else {
                /*
                 * That only happens for the non-DT case, where platform data
@@ -584,7 +618,6 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
        pb->lth_brightness = data->lth_brightness * (state.period / pb->scale);
 
-       memset(&props, 0, sizeof(struct backlight_properties));
        props.type = BACKLIGHT_RAW;
        props.max_brightness = data->max_brightness;
        bl = backlight_device_register(dev_name(&pdev->dev), &pdev->dev, pb,
index 462f14a..05b5f00 100644 (file)
@@ -48,14 +48,20 @@ static int rave_sp_backlight_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct backlight_device *bd;
 
-       bd = devm_backlight_device_register(dev, pdev->name, dev->parent,
+       bd = devm_backlight_device_register(dev, pdev->name, dev,
                                            dev_get_drvdata(dev->parent),
                                            &rave_sp_backlight_ops,
                                            &rave_sp_backlight_props);
        if (IS_ERR(bd))
                return PTR_ERR(bd);
 
-       backlight_update_status(bd);
+       /*
+        * If there is a phandle pointing to the device node we can
+        * assume that another device will manage the status changes.
+        * If not we make sure the backlight is in a consistent state.
+        */
+       if (!dev->of_node->phandle)
+               backlight_update_status(bd);
 
        return 0;
 }
index 65cb757..29af8e2 100644 (file)
@@ -222,8 +222,7 @@ static int tosa_lcd_remove(struct spi_device *spi)
 {
        struct tosa_lcd_data *data = spi_get_drvdata(spi);
 
-       if (data->i2c)
-               i2c_unregister_device(data->i2c);
+       i2c_unregister_device(data->i2c);
 
        tosa_lcd_tg_off(data);
 
index 228a89b..16f60c1 100644 (file)
@@ -18,23 +18,6 @@ obj-$(CONFIG_SPU_BASE)                       += logo_spe_clut224.o
 
 # How to generate logo's
 
-# Use logo-cfiles to retrieve list of .c files to be built
-logo-cfiles = $(notdir $(patsubst %.$(2), %.c, \
-              $(wildcard $(srctree)/$(src)/*$(1).$(2))))
-
-
-# Mono logos
-extra-y += $(call logo-cfiles,_mono,pbm)
-
-# VGA16 logos
-extra-y += $(call logo-cfiles,_vga16,ppm)
-
-# 224 Logos
-extra-y += $(call logo-cfiles,_clut224,ppm)
-
-# Gray 256
-extra-y += $(call logo-cfiles,_gray256,pgm)
-
 pnmtologo := scripts/pnmtologo
 
 # Create commands like "pnmtologo -t mono -n logo_mac_mono -o ..."
@@ -55,5 +38,5 @@ $(obj)/%_clut224.c: $(src)/%_clut224.ppm $(pnmtologo) FORCE
 $(obj)/%_gray256.c: $(src)/%_gray256.pgm $(pnmtologo) FORCE
        $(call if_changed,logo)
 
-# Files generated that shall be removed upon make clean
-clean-files := *.o *_mono.c *_vga16.c *_clut224.c *_gray256.c
+# generated C files
+targets += *_mono.c *_vga16.c *_clut224.c *_gray256.c
index 75fd140..43c3916 100644 (file)
@@ -220,6 +220,8 @@ static int hgcm_call_preprocess_linaddr(
        if (!bounce_buf)
                return -ENOMEM;
 
+       *bounce_buf_ret = bounce_buf;
+
        if (copy_in) {
                ret = copy_from_user(bounce_buf, (void __user *)buf, len);
                if (ret)
@@ -228,7 +230,6 @@ static int hgcm_call_preprocess_linaddr(
                memset(bounce_buf, 0, len);
        }
 
-       *bounce_buf_ret = bounce_buf;
        hgcm_call_add_pagelist_size(bounce_buf, len, extra);
        return 0;
 }
index bdc0824..a8041e4 100644 (file)
@@ -1499,9 +1499,6 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
                 * counter first before updating event flags.
                 */
                virtio_wmb(vq->weak_barriers);
-       } else {
-               used_idx = vq->last_used_idx;
-               wrap_counter = vq->packed.used_wrap_counter;
        }
 
        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
@@ -1518,7 +1515,9 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
         */
        virtio_mb(vq->weak_barriers);
 
-       if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
+       if (is_used_desc_packed(vq,
+                               vq->last_used_idx,
+                               vq->packed.used_wrap_counter)) {
                END_USE(vq);
                return false;
        }
index ebed495..b784763 100644 (file)
@@ -103,6 +103,7 @@ config W1_SLAVE_DS2438
 
 config W1_SLAVE_DS250X
        tristate "512b/1kb/16kb EPROM family support"
+       select CRC16
        help
          Say Y here if you want to use a 1-wire
          512b/1kb/16kb EPROM family device (DS250x).
index a45f9e3..58e7c10 100644 (file)
@@ -477,13 +477,6 @@ config IXP4XX_WATCHDOG
 
          Say N if you are unsure.
 
-config KS8695_WATCHDOG
-       tristate "KS8695 watchdog"
-       depends on ARCH_KS8695
-       help
-         Watchdog timer embedded into KS8695 processor. This will reboot your
-         system when the timeout is reached.
-
 config HAVE_S3C2410_WATCHDOG
        bool
        help
@@ -662,15 +655,6 @@ config STMP3XXX_RTC_WATCHDOG
          To compile this driver as a module, choose M here: the
          module will be called stmp3xxx_rtc_wdt.
 
-config NUC900_WATCHDOG
-       tristate "Nuvoton NUC900 watchdog"
-       depends on ARCH_W90X900 || COMPILE_TEST
-       help
-         Say Y here if to include support for the watchdog timer
-         for the Nuvoton NUC900 series SoCs.
-         To compile this driver as a module, choose M here: the
-         module will be called nuc900_wdt.
-
 config TS4800_WATCHDOG
        tristate "TS-4800 Watchdog"
        depends on HAS_IOMEM && OF
@@ -740,6 +724,19 @@ config IMX_SC_WDT
          To compile this driver as a module, choose M here: the
          module will be called imx_sc_wdt.
 
+config IMX7ULP_WDT
+       tristate "IMX7ULP Watchdog"
+       depends on ARCH_MXC || COMPILE_TEST
+       select WATCHDOG_CORE
+       help
+         This is the driver for the hardware watchdog on the Freescale
+         IMX7ULP and later processors. If you have one of these
+         processors and wish to have watchdog support enabled,
+         say Y, otherwise say N.
+
+         To compile this driver as a module, choose M here: the
+         module will be called imx7ulp_wdt.
+
 config UX500_WATCHDOG
        tristate "ST-Ericsson Ux500 watchdog"
        depends on MFD_DB8500_PRCMU
@@ -1046,8 +1043,8 @@ config F71808E_WDT
        depends on X86
        help
          This is the driver for the hardware watchdog on the Fintek F71808E,
-         F71862FG, F71868, F71869, F71882FG, F71889FG, F81865 and F81866
-         Super I/O controllers.
+         F71862FG, F71868, F71869, F71882FG, F71889FG, F81803, F81865, and
+         F81866 Super I/O controllers.
 
          You can compile this driver directly into the kernel, or use
          it as a module.  The module will be called f71808e_wdt.
index 7caa920..2ee352b 100644 (file)
@@ -49,7 +49,6 @@ obj-$(CONFIG_21285_WATCHDOG) += wdt285.o
 obj-$(CONFIG_977_WATCHDOG) += wdt977.o
 obj-$(CONFIG_FTWDT010_WATCHDOG) += ftwdt010_wdt.o
 obj-$(CONFIG_IXP4XX_WATCHDOG) += ixp4xx_wdt.o
-obj-$(CONFIG_KS8695_WATCHDOG) += ks8695_wdt.o
 obj-$(CONFIG_S3C2410_WATCHDOG) += s3c2410_wdt.o
 obj-$(CONFIG_SA1100_WATCHDOG) += sa1100_wdt.o
 obj-$(CONFIG_SAMA5D4_WATCHDOG) += sama5d4_wdt.o
@@ -64,11 +63,11 @@ obj-$(CONFIG_RN5T618_WATCHDOG) += rn5t618_wdt.o
 obj-$(CONFIG_COH901327_WATCHDOG) += coh901327_wdt.o
 obj-$(CONFIG_NPCM7XX_WATCHDOG) += npcm_wdt.o
 obj-$(CONFIG_STMP3XXX_RTC_WATCHDOG) += stmp3xxx_rtc_wdt.o
-obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o
 obj-$(CONFIG_TS4800_WATCHDOG) += ts4800_wdt.o
 obj-$(CONFIG_TS72XX_WATCHDOG) += ts72xx_wdt.o
 obj-$(CONFIG_IMX2_WDT) += imx2_wdt.o
 obj-$(CONFIG_IMX_SC_WDT) += imx_sc_wdt.o
+obj-$(CONFIG_IMX7ULP_WDT) += imx7ulp_wdt.o
 obj-$(CONFIG_UX500_WATCHDOG) += ux500_wdt.o
 obj-$(CONFIG_RETU_WATCHDOG) += retu_wdt.o
 obj-$(CONFIG_BCM2835_WDT) += bcm2835_wdt.o
index cc71861..4ec0906 100644 (file)
@@ -34,6 +34,7 @@ static const struct aspeed_wdt_config ast2500_config = {
 static const struct of_device_id aspeed_wdt_of_table[] = {
        { .compatible = "aspeed,ast2400-wdt", .data = &ast2400_config },
        { .compatible = "aspeed,ast2500-wdt", .data = &ast2500_config },
+       { .compatible = "aspeed,ast2600-wdt", .data = &ast2500_config },
        { },
 };
 MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table);
@@ -53,6 +54,8 @@ MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table);
 #define   WDT_CTRL_ENABLE              BIT(0)
 #define WDT_TIMEOUT_STATUS     0x10
 #define   WDT_TIMEOUT_STATUS_BOOT_SECONDARY    BIT(1)
+#define WDT_CLEAR_TIMEOUT_STATUS       0x14
+#define   WDT_CLEAR_TIMEOUT_AND_BOOT_CODE_SELECTION    BIT(0)
 
 /*
  * WDT_RESET_WIDTH controls the characteristics of the external pulse (if
@@ -165,6 +168,60 @@ static int aspeed_wdt_restart(struct watchdog_device *wdd,
        return 0;
 }
 
+/* access_cs0 shows if cs0 is accessible, hence the reverted bit */
+static ssize_t access_cs0_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       struct aspeed_wdt *wdt = dev_get_drvdata(dev);
+       u32 status = readl(wdt->base + WDT_TIMEOUT_STATUS);
+
+       return sprintf(buf, "%u\n",
+                     !(status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY));
+}
+
+static ssize_t access_cs0_store(struct device *dev,
+                               struct device_attribute *attr, const char *buf,
+                               size_t size)
+{
+       struct aspeed_wdt *wdt = dev_get_drvdata(dev);
+       unsigned long val;
+
+       if (kstrtoul(buf, 10, &val))
+               return -EINVAL;
+
+       if (val)
+               writel(WDT_CLEAR_TIMEOUT_AND_BOOT_CODE_SELECTION,
+                      wdt->base + WDT_CLEAR_TIMEOUT_STATUS);
+
+       return size;
+}
+
+/*
+ * This attribute exists only if the system has booted from the alternate
+ * flash with 'alt-boot' option.
+ *
+ * At alternate flash the 'access_cs0' sysfs node provides:
+ *   ast2400: a way to get access to the primary SPI flash chip at CS0
+ *            after booting from the alternate chip at CS1.
+ *   ast2500: a way to restore the normal address mapping from
+ *            (CS0->CS1, CS1->CS0) to (CS0->CS0, CS1->CS1).
+ *
+ * Clearing the boot code selection and timeout counter also resets to the
+ * initial state the chip select line mapping. When the SoC is in normal
+ * mapping state (i.e. booted from CS0), clearing those bits does nothing for
+ * both versions of the SoC. For alternate boot mode (booted from CS1 due to
+ * wdt2 expiration) the behavior differs as described above.
+ *
+ * This option can be used with wdt2 (watchdog1) only.
+ */
+static DEVICE_ATTR_RW(access_cs0);
+
+static struct attribute *bswitch_attrs[] = {
+       &dev_attr_access_cs0.attr,
+       NULL
+};
+ATTRIBUTE_GROUPS(bswitch);
+
 static const struct watchdog_ops aspeed_wdt_ops = {
        .start          = aspeed_wdt_start,
        .stop           = aspeed_wdt_stop,
@@ -259,7 +316,8 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
                set_bit(WDOG_HW_RUNNING, &wdt->wdd.status);
        }
 
-       if (of_device_is_compatible(np, "aspeed,ast2500-wdt")) {
+       if ((of_device_is_compatible(np, "aspeed,ast2500-wdt")) ||
+               (of_device_is_compatible(np, "aspeed,ast2600-wdt"))) {
                u32 reg = readl(wdt->base + WDT_RESET_WIDTH);
 
                reg &= config->ext_pulse_width_mask;
@@ -306,9 +364,16 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
        }
 
        status = readl(wdt->base + WDT_TIMEOUT_STATUS);
-       if (status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY)
+       if (status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY) {
                wdt->wdd.bootstatus = WDIOF_CARDRESET;
 
+               if (of_device_is_compatible(np, "aspeed,ast2400-wdt") ||
+                   of_device_is_compatible(np, "aspeed,ast2500-wdt"))
+                       wdt->wdd.groups = bswitch_groups;
+       }
+
+       dev_set_drvdata(dev, wdt);
+
        return devm_watchdog_register_device(dev, &wdt->wdd);
 }
 
index 2e09981..75de664 100644 (file)
@@ -302,7 +302,7 @@ static int ath79_wdt_remove(struct platform_device *pdev)
        return 0;
 }
 
-static void ath97_wdt_shutdown(struct platform_device *pdev)
+static void ath79_wdt_shutdown(struct platform_device *pdev)
 {
        ath79_wdt_disable();
 }
@@ -318,7 +318,7 @@ MODULE_DEVICE_TABLE(of, ath79_wdt_match);
 static struct platform_driver ath79_wdt_driver = {
        .probe          = ath79_wdt_probe,
        .remove         = ath79_wdt_remove,
-       .shutdown       = ath97_wdt_shutdown,
+       .shutdown       = ath79_wdt_shutdown,
        .driver         = {
                .name   = DRIVER_NAME,
                .of_match_table = of_match_ptr(ath79_wdt_match),
index b973b31..9393be5 100644 (file)
@@ -473,29 +473,6 @@ static long cpwd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        return 0;
 }
 
-static long cpwd_compat_ioctl(struct file *file, unsigned int cmd,
-                             unsigned long arg)
-{
-       int rval = -ENOIOCTLCMD;
-
-       switch (cmd) {
-       /* solaris ioctls are specific to this driver */
-       case WIOCSTART:
-       case WIOCSTOP:
-       case WIOCGSTAT:
-               mutex_lock(&cpwd_mutex);
-               rval = cpwd_ioctl(file, cmd, arg);
-               mutex_unlock(&cpwd_mutex);
-               break;
-
-       /* everything else is handled by the generic compat layer */
-       default:
-               break;
-       }
-
-       return rval;
-}
-
 static ssize_t cpwd_write(struct file *file, const char __user *buf,
                          size_t count, loff_t *ppos)
 {
@@ -520,7 +497,7 @@ static ssize_t cpwd_read(struct file *file, char __user *buffer,
 static const struct file_operations cpwd_fops = {
        .owner =                THIS_MODULE,
        .unlocked_ioctl =       cpwd_ioctl,
-       .compat_ioctl =         cpwd_compat_ioctl,
+       .compat_ioctl =         compat_ptr_ioctl,
        .open =                 cpwd_open,
        .write =                cpwd_write,
        .read =                 cpwd_read,
index 181440b..aafc8d9 100644 (file)
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
-#include <linux/miscdevice.h>
 #include <linux/watchdog.h>
 #include <linux/suspend.h>
 #include <asm/ebcdic.h>
 #include <asm/diag.h>
 #include <linux/io.h>
-#include <linux/uaccess.h>
 
 #define MAX_CMDLEN 240
 #define DEFAULT_CMD "SYSTEM RESTART"
@@ -70,7 +68,6 @@ MODULE_PARM_DESC(conceal, "Enable the CONCEAL CP option while the watchdog is ac
 module_param_named(nowayout, nowayout_info, bool, 0444);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default = CONFIG_WATCHDOG_NOWAYOUT)");
 
-MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
 MODULE_ALIAS("vmwatchdog");
 
 static int __diag288(unsigned int func, unsigned int timeout,
index ff5cf1b..e46104c 100644 (file)
 #define SIO_REG_DEVID          0x20    /* Device ID (2 bytes) */
 #define SIO_REG_DEVREV         0x22    /* Device revision */
 #define SIO_REG_MANID          0x23    /* Fintek ID (2 bytes) */
+#define SIO_REG_CLOCK_SEL      0x26    /* Clock select */
 #define SIO_REG_ROM_ADDR_SEL   0x27    /* ROM address select */
 #define SIO_F81866_REG_PORT_SEL        0x27    /* F81866 Multi-Function Register */
+#define SIO_REG_TSI_LEVEL_SEL  0x28    /* TSI Level select */
 #define SIO_REG_MFUNCT1                0x29    /* Multi function select 1 */
 #define SIO_REG_MFUNCT2                0x2a    /* Multi function select 2 */
 #define SIO_REG_MFUNCT3                0x2b    /* Multi function select 3 */
@@ -49,6 +51,7 @@
 #define SIO_F71869A_ID         0x1007  /* Chipset ID */
 #define SIO_F71882_ID          0x0541  /* Chipset ID */
 #define SIO_F71889_ID          0x0723  /* Chipset ID */
+#define SIO_F81803_ID          0x1210  /* Chipset ID */
 #define SIO_F81865_ID          0x0704  /* Chipset ID */
 #define SIO_F81866_ID          0x1010  /* Chipset ID */
 
@@ -108,7 +111,7 @@ MODULE_PARM_DESC(start_withtimeout, "Start watchdog timer on module load with"
        " given initial timeout. Zero (default) disables this feature.");
 
 enum chips { f71808fg, f71858fg, f71862fg, f71868, f71869, f71882fg, f71889fg,
-            f81865, f81866};
+            f81803, f81865, f81866};
 
 static const char *f71808e_names[] = {
        "f71808fg",
@@ -118,6 +121,7 @@ static const char *f71808e_names[] = {
        "f71869",
        "f71882fg",
        "f71889fg",
+       "f81803",
        "f81865",
        "f81866",
 };
@@ -370,6 +374,14 @@ static int watchdog_start(void)
                        superio_inb(watchdog.sioaddr, SIO_REG_MFUNCT3) & 0xcf);
                break;
 
+       case f81803:
+               /* Enable TSI Level register bank */
+               superio_clear_bit(watchdog.sioaddr, SIO_REG_CLOCK_SEL, 3);
+               /* Set pin 27 to WDTRST# */
+               superio_outb(watchdog.sioaddr, SIO_REG_TSI_LEVEL_SEL, 0x5f &
+                       superio_inb(watchdog.sioaddr, SIO_REG_TSI_LEVEL_SEL));
+               break;
+
        case f81865:
                /* Set pin 70 to WDTRST# */
                superio_clear_bit(watchdog.sioaddr, SIO_REG_MFUNCT3, 5);
@@ -809,6 +821,9 @@ static int __init f71808e_find(int sioaddr)
                /* Confirmed (by datasheet) not to have a watchdog. */
                err = -ENODEV;
                goto exit;
+       case SIO_F81803_ID:
+               watchdog.type = f81803;
+               break;
        case SIO_F81865_ID:
                watchdog.type = f81865;
                break;
index c559f70..156360e 100644 (file)
@@ -48,6 +48,7 @@
 
 /* Includes */
 #include <linux/acpi.h>                        /* For ACPI support */
+#include <linux/bits.h>                        /* For BIT() */
 #include <linux/module.h>              /* For module specific items */
 #include <linux/moduleparam.h>         /* For new moduleparam's */
 #include <linux/types.h>               /* For standard types (like size_t) */
@@ -215,6 +216,23 @@ static int update_no_reboot_bit_mem(void *priv, bool set)
        return 0;
 }
 
+static int update_no_reboot_bit_cnt(void *priv, bool set)
+{
+       struct iTCO_wdt_private *p = priv;
+       u16 val, newval;
+
+       val = inw(TCO1_CNT(p));
+       if (set)
+               val |= BIT(0);
+       else
+               val &= ~BIT(0);
+       outw(val, TCO1_CNT(p));
+       newval = inw(TCO1_CNT(p));
+
+       /* make sure the update is successful */
+       return val != newval ? -EIO : 0;
+}
+
 static void iTCO_wdt_no_reboot_bit_setup(struct iTCO_wdt_private *p,
                struct itco_wdt_platform_data *pdata)
 {
@@ -224,7 +242,9 @@ static void iTCO_wdt_no_reboot_bit_setup(struct iTCO_wdt_private *p,
                return;
        }
 
-       if (p->iTCO_version >= 2)
+       if (p->iTCO_version >= 6)
+               p->update_no_reboot_bit = update_no_reboot_bit_cnt;
+       else if (p->iTCO_version >= 2)
                p->update_no_reboot_bit = update_no_reboot_bit_mem;
        else if (p->iTCO_version == 1)
                p->update_no_reboot_bit = update_no_reboot_bit_pci;
@@ -452,7 +472,8 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
         * Get the Memory-Mapped GCS or PMC register, we need it for the
         * NO_REBOOT flag (TCO v2 and v3).
         */
-       if (p->iTCO_version >= 2 && !pdata->update_no_reboot_bit) {
+       if (p->iTCO_version >= 2 && p->iTCO_version < 6 &&
+           !pdata->update_no_reboot_bit) {
                p->gcs_pmc_res = platform_get_resource(pdev,
                                                       IORESOURCE_MEM,
                                                       ICH_RES_MEM_GCS_PMC);
@@ -502,6 +523,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
 
        /* Clear out the (probably old) status */
        switch (p->iTCO_version) {
+       case 6:
        case 5:
        case 4:
                outw(0x0008, TCO1_STS(p)); /* Clear the Time Out Status bit */
index 32af397..8d019a9 100644 (file)
@@ -55,7 +55,7 @@
 
 #define IMX2_WDT_WMCR          0x08            /* Misc Register */
 
-#define IMX2_WDT_MAX_TIME      128
+#define IMX2_WDT_MAX_TIME      128U
 #define IMX2_WDT_DEFAULT_TIME  60              /* in seconds */
 
 #define WDOG_SEC_TO_COUNT(s)   ((s * 2 - 1) << 8)
@@ -180,7 +180,7 @@ static int imx2_wdt_set_timeout(struct watchdog_device *wdog,
 {
        unsigned int actual;
 
-       actual = min(new_timeout, wdog->max_hw_heartbeat_ms * 1000);
+       actual = min(new_timeout, IMX2_WDT_MAX_TIME);
        __imx2_wdt_set_timeout(wdog, actual);
        wdog->timeout = new_timeout;
        return 0;
diff --git a/drivers/watchdog/imx7ulp_wdt.c b/drivers/watchdog/imx7ulp_wdt.c
new file mode 100644 (file)
index 0000000..5ce5102
--- /dev/null
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 NXP.
+ */
+
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+#include <linux/watchdog.h>
+
+#define WDOG_CS                        0x0
+#define WDOG_CS_CMD32EN                BIT(13)
+#define WDOG_CS_ULK            BIT(11)
+#define WDOG_CS_RCS            BIT(10)
+#define WDOG_CS_EN             BIT(7)
+#define WDOG_CS_UPDATE         BIT(5)
+
+#define WDOG_CNT       0x4
+#define WDOG_TOVAL     0x8
+
+#define REFRESH_SEQ0   0xA602
+#define REFRESH_SEQ1   0xB480
+#define REFRESH                ((REFRESH_SEQ1 << 16) | REFRESH_SEQ0)
+
+#define UNLOCK_SEQ0    0xC520
+#define UNLOCK_SEQ1    0xD928
+#define UNLOCK         ((UNLOCK_SEQ1 << 16) | UNLOCK_SEQ0)
+
+#define DEFAULT_TIMEOUT        60
+#define MAX_TIMEOUT    128
+#define WDOG_CLOCK_RATE        1000
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, 0000);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
+                __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+struct imx7ulp_wdt_device {
+       struct notifier_block restart_handler;
+       struct watchdog_device wdd;
+       void __iomem *base;
+       struct clk *clk;
+};
+
+static inline void imx7ulp_wdt_enable(void __iomem *base, bool enable)
+{
+       u32 val = readl(base + WDOG_CS);
+
+       writel(UNLOCK, base + WDOG_CNT);
+       if (enable)
+               writel(val | WDOG_CS_EN, base + WDOG_CS);
+       else
+               writel(val & ~WDOG_CS_EN, base + WDOG_CS);
+}
+
+static inline bool imx7ulp_wdt_is_enabled(void __iomem *base)
+{
+       u32 val = readl(base + WDOG_CS);
+
+       return val & WDOG_CS_EN;
+}
+
+static int imx7ulp_wdt_ping(struct watchdog_device *wdog)
+{
+       struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog);
+
+       writel(REFRESH, wdt->base + WDOG_CNT);
+
+       return 0;
+}
+
+static int imx7ulp_wdt_start(struct watchdog_device *wdog)
+{
+       struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog);
+
+       imx7ulp_wdt_enable(wdt->base, true);
+
+       return 0;
+}
+
+static int imx7ulp_wdt_stop(struct watchdog_device *wdog)
+{
+       struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog);
+
+       imx7ulp_wdt_enable(wdt->base, false);
+
+       return 0;
+}
+
+static int imx7ulp_wdt_set_timeout(struct watchdog_device *wdog,
+                                  unsigned int timeout)
+{
+       struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog);
+       u32 val = WDOG_CLOCK_RATE * timeout;
+
+       writel(UNLOCK, wdt->base + WDOG_CNT);
+       writel(val, wdt->base + WDOG_TOVAL);
+
+       wdog->timeout = timeout;
+
+       return 0;
+}
+
+static const struct watchdog_ops imx7ulp_wdt_ops = {
+       .owner = THIS_MODULE,
+       .start = imx7ulp_wdt_start,
+       .stop  = imx7ulp_wdt_stop,
+       .ping  = imx7ulp_wdt_ping,
+       .set_timeout = imx7ulp_wdt_set_timeout,
+};
+
+static const struct watchdog_info imx7ulp_wdt_info = {
+       .identity = "i.MX7ULP watchdog timer",
+       .options  = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
+                   WDIOF_MAGICCLOSE,
+};
+
+static inline void imx7ulp_wdt_init(void __iomem *base, unsigned int timeout)
+{
+       u32 val;
+
+       /* unlock the wdog for reconfiguration */
+       writel_relaxed(UNLOCK_SEQ0, base + WDOG_CNT);
+       writel_relaxed(UNLOCK_SEQ1, base + WDOG_CNT);
+
+       /* set an initial timeout value in TOVAL */
+       writel(timeout, base + WDOG_TOVAL);
+       /* enable 32bit command sequence and reconfigure */
+       val = BIT(13) | BIT(8) | BIT(5);
+       writel(val, base + WDOG_CS);
+}
+
+static void imx7ulp_wdt_action(void *data)
+{
+       clk_disable_unprepare(data);
+}
+
+static int imx7ulp_wdt_probe(struct platform_device *pdev)
+{
+       struct imx7ulp_wdt_device *imx7ulp_wdt;
+       struct device *dev = &pdev->dev;
+       struct watchdog_device *wdog;
+       int ret;
+
+       imx7ulp_wdt = devm_kzalloc(dev, sizeof(*imx7ulp_wdt), GFP_KERNEL);
+       if (!imx7ulp_wdt)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, imx7ulp_wdt);
+
+       imx7ulp_wdt->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(imx7ulp_wdt->base))
+               return PTR_ERR(imx7ulp_wdt->base);
+
+       imx7ulp_wdt->clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(imx7ulp_wdt->clk)) {
+               dev_err(dev, "Failed to get watchdog clock\n");
+               return PTR_ERR(imx7ulp_wdt->clk);
+       }
+
+       ret = clk_prepare_enable(imx7ulp_wdt->clk);
+       if (ret)
+               return ret;
+
+       ret = devm_add_action_or_reset(dev, imx7ulp_wdt_action, imx7ulp_wdt->clk);
+       if (ret)
+               return ret;
+
+       wdog = &imx7ulp_wdt->wdd;
+       wdog->info = &imx7ulp_wdt_info;
+       wdog->ops = &imx7ulp_wdt_ops;
+       wdog->min_timeout = 1;
+       wdog->max_timeout = MAX_TIMEOUT;
+       wdog->parent = dev;
+       wdog->timeout = DEFAULT_TIMEOUT;
+
+       watchdog_init_timeout(wdog, 0, dev);
+       watchdog_stop_on_reboot(wdog);
+       watchdog_stop_on_unregister(wdog);
+       watchdog_set_drvdata(wdog, imx7ulp_wdt);
+       imx7ulp_wdt_init(imx7ulp_wdt->base, wdog->timeout * WDOG_CLOCK_RATE);
+
+       return devm_watchdog_register_device(dev, wdog);
+}
+
+static int __maybe_unused imx7ulp_wdt_suspend(struct device *dev)
+{
+       struct imx7ulp_wdt_device *imx7ulp_wdt = dev_get_drvdata(dev);
+
+       if (watchdog_active(&imx7ulp_wdt->wdd))
+               imx7ulp_wdt_stop(&imx7ulp_wdt->wdd);
+
+       clk_disable_unprepare(imx7ulp_wdt->clk);
+
+       return 0;
+}
+
+static int __maybe_unused imx7ulp_wdt_resume(struct device *dev)
+{
+       struct imx7ulp_wdt_device *imx7ulp_wdt = dev_get_drvdata(dev);
+       u32 timeout = imx7ulp_wdt->wdd.timeout * WDOG_CLOCK_RATE;
+       int ret;
+
+       ret = clk_prepare_enable(imx7ulp_wdt->clk);
+       if (ret)
+               return ret;
+
+       if (imx7ulp_wdt_is_enabled(imx7ulp_wdt->base))
+               imx7ulp_wdt_init(imx7ulp_wdt->base, timeout);
+
+       if (watchdog_active(&imx7ulp_wdt->wdd))
+               imx7ulp_wdt_start(&imx7ulp_wdt->wdd);
+
+       return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(imx7ulp_wdt_pm_ops, imx7ulp_wdt_suspend,
+                        imx7ulp_wdt_resume);
+
+static const struct of_device_id imx7ulp_wdt_dt_ids[] = {
+       { .compatible = "fsl,imx7ulp-wdt", },
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, imx7ulp_wdt_dt_ids);
+
+static struct platform_driver imx7ulp_wdt_driver = {
+       .probe          = imx7ulp_wdt_probe,
+       .driver         = {
+               .name   = "imx7ulp-wdt",
+               .pm     = &imx7ulp_wdt_pm_ops,
+               .of_match_table = imx7ulp_wdt_dt_ids,
+       },
+};
+module_platform_driver(imx7ulp_wdt_driver);
+
+MODULE_AUTHOR("Anson Huang <Anson.Huang@nxp.com>");
+MODULE_DESCRIPTION("Freescale i.MX7ULP watchdog driver");
+MODULE_LICENSE("GPL v2");
index 78eaaf7..7ea5cf5 100644 (file)
@@ -175,12 +175,9 @@ static int imx_sc_wdt_probe(struct platform_device *pdev)
        watchdog_stop_on_unregister(wdog);
 
        ret = devm_watchdog_register_device(dev, wdog);
-       if (ret) {
-               dev_err(dev, "Failed to register watchdog device\n");
-               return ret;
-       }
+       if (ret)
+               return ret;
+
        ret = imx_scu_irq_group_enable(SC_IRQ_GROUP_WDOG,
                                       SC_IRQ_WDOG,
                                       true);
index d4a9091..c6052ae 100644 (file)
@@ -162,7 +162,6 @@ static int jz4740_wdt_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct jz4740_wdt_drvdata *drvdata;
        struct watchdog_device *jz4740_wdt;
-       int ret;
 
        drvdata = devm_kzalloc(dev, sizeof(struct jz4740_wdt_drvdata),
                               GFP_KERNEL);
diff --git a/drivers/watchdog/ks8695_wdt.c b/drivers/watchdog/ks8695_wdt.c
deleted file mode 100644 (file)
index 1550ce3..0000000
+++ /dev/null
@@ -1,319 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Watchdog driver for Kendin/Micrel KS8695.
- *
- * (C) 2007 Andrew Victor
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/bitops.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/miscdevice.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/platform_device.h>
-#include <linux/types.h>
-#include <linux/watchdog.h>
-#include <linux/io.h>
-#include <linux/uaccess.h>
-#include <mach/hardware.h>
-
-#define KS8695_TMR_OFFSET      (0xF0000 + 0xE400)
-#define KS8695_TMR_VA          (KS8695_IO_VA + KS8695_TMR_OFFSET)
-
-/*
- * Timer registers
- */
-#define KS8695_TMCON           (0x00)          /* Timer Control Register */
-#define KS8695_T0TC            (0x08)          /* Timer 0 Timeout Count Register */
-#define TMCON_T0EN             (1 << 0)        /* Timer 0 Enable */
-
-/* Timer0 Timeout Counter Register */
-#define T0TC_WATCHDOG          (0xff)          /* Enable watchdog mode */
-
-#define WDT_DEFAULT_TIME       5       /* seconds */
-#define WDT_MAX_TIME           171     /* seconds */
-
-static int wdt_time = WDT_DEFAULT_TIME;
-static bool nowayout = WATCHDOG_NOWAYOUT;
-
-module_param(wdt_time, int, 0);
-MODULE_PARM_DESC(wdt_time, "Watchdog time in seconds. (default="
-                                       __MODULE_STRING(WDT_DEFAULT_TIME) ")");
-
-#ifdef CONFIG_WATCHDOG_NOWAYOUT
-module_param(nowayout, bool, 0);
-MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
-                               __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-#endif
-
-
-static unsigned long ks8695wdt_busy;
-static DEFINE_SPINLOCK(ks8695_lock);
-
-/* ......................................................................... */
-
-/*
- * Disable the watchdog.
- */
-static inline void ks8695_wdt_stop(void)
-{
-       unsigned long tmcon;
-
-       spin_lock(&ks8695_lock);
-       /* disable timer0 */
-       tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON);
-       __raw_writel(tmcon & ~TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON);
-       spin_unlock(&ks8695_lock);
-}
-
-/*
- * Enable and reset the watchdog.
- */
-static inline void ks8695_wdt_start(void)
-{
-       unsigned long tmcon;
-       unsigned long tval = wdt_time * KS8695_CLOCK_RATE;
-
-       spin_lock(&ks8695_lock);
-       /* disable timer0 */
-       tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON);
-       __raw_writel(tmcon & ~TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON);
-
-       /* program timer0 */
-       __raw_writel(tval | T0TC_WATCHDOG, KS8695_TMR_VA + KS8695_T0TC);
-
-       /* re-enable timer0 */
-       tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON);
-       __raw_writel(tmcon | TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON);
-       spin_unlock(&ks8695_lock);
-}
-
-/*
- * Reload the watchdog timer.  (ie, pat the watchdog)
- */
-static inline void ks8695_wdt_reload(void)
-{
-       unsigned long tmcon;
-
-       spin_lock(&ks8695_lock);
-       /* disable, then re-enable timer0 */
-       tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON);
-       __raw_writel(tmcon & ~TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON);
-       __raw_writel(tmcon | TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON);
-       spin_unlock(&ks8695_lock);
-}
-
-/*
- * Change the watchdog time interval.
- */
-static int ks8695_wdt_settimeout(int new_time)
-{
-       /*
-        * All counting occurs at KS8695_CLOCK_RATE / 128 = 0.256 Hz
-        *
-        * Since WDV is a 16-bit counter, the maximum period is
-        * 65536 / 0.256 = 256 seconds.
-        */
-       if ((new_time <= 0) || (new_time > WDT_MAX_TIME))
-               return -EINVAL;
-
-       /* Set new watchdog time. It will be used when
-          ks8695_wdt_start() is called. */
-       wdt_time = new_time;
-       return 0;
-}
-
-/* ......................................................................... */
-
-/*
- * Watchdog device is opened, and watchdog starts running.
- */
-static int ks8695_wdt_open(struct inode *inode, struct file *file)
-{
-       if (test_and_set_bit(0, &ks8695wdt_busy))
-               return -EBUSY;
-
-       ks8695_wdt_start();
-       return stream_open(inode, file);
-}
-
-/*
- * Close the watchdog device.
- * If CONFIG_WATCHDOG_NOWAYOUT is NOT defined then the watchdog is also
- *  disabled.
- */
-static int ks8695_wdt_close(struct inode *inode, struct file *file)
-{
-       /* Disable the watchdog when file is closed */
-       if (!nowayout)
-               ks8695_wdt_stop();
-       clear_bit(0, &ks8695wdt_busy);
-       return 0;
-}
-
-static const struct watchdog_info ks8695_wdt_info = {
-       .identity       = "ks8695 watchdog",
-       .options        = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
-};
-
-/*
- * Handle commands from user-space.
- */
-static long ks8695_wdt_ioctl(struct file *file, unsigned int cmd,
-                                                       unsigned long arg)
-{
-       void __user *argp = (void __user *)arg;
-       int __user *p = argp;
-       int new_value;
-
-       switch (cmd) {
-       case WDIOC_GETSUPPORT:
-               return copy_to_user(argp, &ks8695_wdt_info,
-                                       sizeof(ks8695_wdt_info)) ? -EFAULT : 0;
-       case WDIOC_GETSTATUS:
-       case WDIOC_GETBOOTSTATUS:
-               return put_user(0, p);
-       case WDIOC_SETOPTIONS:
-               if (get_user(new_value, p))
-                       return -EFAULT;
-               if (new_value & WDIOS_DISABLECARD)
-                       ks8695_wdt_stop();
-               if (new_value & WDIOS_ENABLECARD)
-                       ks8695_wdt_start();
-               return 0;
-       case WDIOC_KEEPALIVE:
-               ks8695_wdt_reload();    /* pat the watchdog */
-               return 0;
-       case WDIOC_SETTIMEOUT:
-               if (get_user(new_value, p))
-                       return -EFAULT;
-               if (ks8695_wdt_settimeout(new_value))
-                       return -EINVAL;
-               /* Enable new time value */
-               ks8695_wdt_start();
-               /* Return current value */
-               return put_user(wdt_time, p);
-       case WDIOC_GETTIMEOUT:
-               return put_user(wdt_time, p);
-       default:
-               return -ENOTTY;
-       }
-}
-
-/*
- * Pat the watchdog whenever device is written to.
- */
-static ssize_t ks8695_wdt_write(struct file *file, const char *data,
-                                               size_t len, loff_t *ppos)
-{
-       ks8695_wdt_reload();            /* pat the watchdog */
-       return len;
-}
-
-/* ......................................................................... */
-
-static const struct file_operations ks8695wdt_fops = {
-       .owner          = THIS_MODULE,
-       .llseek         = no_llseek,
-       .unlocked_ioctl = ks8695_wdt_ioctl,
-       .open           = ks8695_wdt_open,
-       .release        = ks8695_wdt_close,
-       .write          = ks8695_wdt_write,
-};
-
-static struct miscdevice ks8695wdt_miscdev = {
-       .minor          = WATCHDOG_MINOR,
-       .name           = "watchdog",
-       .fops           = &ks8695wdt_fops,
-};
-
-static int ks8695wdt_probe(struct platform_device *pdev)
-{
-       int res;
-
-       if (ks8695wdt_miscdev.parent)
-               return -EBUSY;
-       ks8695wdt_miscdev.parent = &pdev->dev;
-
-       res = misc_register(&ks8695wdt_miscdev);
-       if (res)
-               return res;
-
-       pr_info("KS8695 Watchdog Timer enabled (%d seconds%s)\n",
-               wdt_time, nowayout ? ", nowayout" : "");
-       return 0;
-}
-
-static int ks8695wdt_remove(struct platform_device *pdev)
-{
-       misc_deregister(&ks8695wdt_miscdev);
-       ks8695wdt_miscdev.parent = NULL;
-
-       return 0;
-}
-
-static void ks8695wdt_shutdown(struct platform_device *pdev)
-{
-       ks8695_wdt_stop();
-}
-
-#ifdef CONFIG_PM
-
-static int ks8695wdt_suspend(struct platform_device *pdev, pm_message_t message)
-{
-       ks8695_wdt_stop();
-       return 0;
-}
-
-static int ks8695wdt_resume(struct platform_device *pdev)
-{
-       if (ks8695wdt_busy)
-               ks8695_wdt_start();
-       return 0;
-}
-
-#else
-#define ks8695wdt_suspend NULL
-#define ks8695wdt_resume       NULL
-#endif
-
-static struct platform_driver ks8695wdt_driver = {
-       .probe          = ks8695wdt_probe,
-       .remove         = ks8695wdt_remove,
-       .shutdown       = ks8695wdt_shutdown,
-       .suspend        = ks8695wdt_suspend,
-       .resume         = ks8695wdt_resume,
-       .driver         = {
-               .name   = "ks8695_wdt",
-       },
-};
-
-static int __init ks8695_wdt_init(void)
-{
-       /* Check that the heartbeat value is within range;
-          if not reset to the default */
-       if (ks8695_wdt_settimeout(wdt_time)) {
-               ks8695_wdt_settimeout(WDT_DEFAULT_TIME);
-               pr_info("ks8695_wdt: wdt_time value must be 1 <= wdt_time <= %i"
-                                       ", using %d\n", wdt_time, WDT_MAX_TIME);
-       }
-       return platform_driver_register(&ks8695wdt_driver);
-}
-
-static void __exit ks8695_wdt_exit(void)
-{
-       platform_driver_unregister(&ks8695wdt_driver);
-}
-
-module_init(ks8695_wdt_init);
-module_exit(ks8695_wdt_exit);
-
-MODULE_AUTHOR("Andrew Victor");
-MODULE_DESCRIPTION("Watchdog driver for KS8695");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:ks8695_wdt");
diff --git a/drivers/watchdog/nuc900_wdt.c b/drivers/watchdog/nuc900_wdt.c
deleted file mode 100644 (file)
index db124ce..0000000
+++ /dev/null
@@ -1,302 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2009 Nuvoton technology corporation.
- *
- * Wan ZongShun <mcuos.com@gmail.com>
- */
-
-#include <linux/bitops.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/io.h>
-#include <linux/clk.h>
-#include <linux/kernel.h>
-#include <linux/miscdevice.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/types.h>
-#include <linux/watchdog.h>
-#include <linux/uaccess.h>
-
-#define REG_WTCR               0x1c
-#define WTCLK                  (0x01 << 10)
-#define WTE                    (0x01 << 7)     /*wdt enable*/
-#define WTIS                   (0x03 << 4)
-#define WTIF                   (0x01 << 3)
-#define WTRF                   (0x01 << 2)
-#define WTRE                   (0x01 << 1)
-#define WTR                    (0x01 << 0)
-/*
- * The watchdog time interval can be calculated via following formula:
- * WTIS                real time interval (formula)
- * 0x00                ((2^ 14 ) * ((external crystal freq) / 256))seconds
- * 0x01                ((2^ 16 ) * ((external crystal freq) / 256))seconds
- * 0x02                ((2^ 18 ) * ((external crystal freq) / 256))seconds
- * 0x03                ((2^ 20 ) * ((external crystal freq) / 256))seconds
- *
- * The external crystal freq is 15Mhz in the nuc900 evaluation board.
- * So 0x00 = +-0.28 seconds, 0x01 = +-1.12 seconds, 0x02 = +-4.48 seconds,
- * 0x03 = +- 16.92 seconds..
- */
-#define WDT_HW_TIMEOUT         0x02
-#define WDT_TIMEOUT            (HZ/2)
-#define WDT_HEARTBEAT          15
-
-static int heartbeat = WDT_HEARTBEAT;
-module_param(heartbeat, int, 0);
-MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds. "
-       "(default = " __MODULE_STRING(WDT_HEARTBEAT) ")");
-
-static bool nowayout = WATCHDOG_NOWAYOUT;
-module_param(nowayout, bool, 0);
-MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
-       "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-
-struct nuc900_wdt {
-       struct clk       *wdt_clock;
-       struct platform_device *pdev;
-       void __iomem     *wdt_base;
-       char             expect_close;
-       struct timer_list timer;
-       spinlock_t       wdt_lock;
-       unsigned long next_heartbeat;
-};
-
-static unsigned long nuc900wdt_busy;
-static struct nuc900_wdt *nuc900_wdt;
-
-static inline void nuc900_wdt_keepalive(void)
-{
-       unsigned int val;
-
-       spin_lock(&nuc900_wdt->wdt_lock);
-
-       val = __raw_readl(nuc900_wdt->wdt_base + REG_WTCR);
-       val |= (WTR | WTIF);
-       __raw_writel(val, nuc900_wdt->wdt_base + REG_WTCR);
-
-       spin_unlock(&nuc900_wdt->wdt_lock);
-}
-
-static inline void nuc900_wdt_start(void)
-{
-       unsigned int val;
-
-       spin_lock(&nuc900_wdt->wdt_lock);
-
-       val = __raw_readl(nuc900_wdt->wdt_base + REG_WTCR);
-       val |= (WTRE | WTE | WTR | WTCLK | WTIF);
-       val &= ~WTIS;
-       val |= (WDT_HW_TIMEOUT << 0x04);
-       __raw_writel(val, nuc900_wdt->wdt_base + REG_WTCR);
-
-       spin_unlock(&nuc900_wdt->wdt_lock);
-
-       nuc900_wdt->next_heartbeat = jiffies + heartbeat * HZ;
-       mod_timer(&nuc900_wdt->timer, jiffies + WDT_TIMEOUT);
-}
-
-static inline void nuc900_wdt_stop(void)
-{
-       unsigned int val;
-
-       del_timer(&nuc900_wdt->timer);
-
-       spin_lock(&nuc900_wdt->wdt_lock);
-
-       val = __raw_readl(nuc900_wdt->wdt_base + REG_WTCR);
-       val &= ~WTE;
-       __raw_writel(val, nuc900_wdt->wdt_base + REG_WTCR);
-
-       spin_unlock(&nuc900_wdt->wdt_lock);
-}
-
-static inline void nuc900_wdt_ping(void)
-{
-       nuc900_wdt->next_heartbeat = jiffies + heartbeat * HZ;
-}
-
-static int nuc900_wdt_open(struct inode *inode, struct file *file)
-{
-
-       if (test_and_set_bit(0, &nuc900wdt_busy))
-               return -EBUSY;
-
-       nuc900_wdt_start();
-
-       return stream_open(inode, file);
-}
-
-static int nuc900_wdt_close(struct inode *inode, struct file *file)
-{
-       if (nuc900_wdt->expect_close == 42)
-               nuc900_wdt_stop();
-       else {
-               dev_crit(&nuc900_wdt->pdev->dev,
-                       "Unexpected close, not stopping watchdog!\n");
-               nuc900_wdt_ping();
-       }
-
-       nuc900_wdt->expect_close = 0;
-       clear_bit(0, &nuc900wdt_busy);
-       return 0;
-}
-
-static const struct watchdog_info nuc900_wdt_info = {
-       .identity       = "nuc900 watchdog",
-       .options        = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
-                                               WDIOF_MAGICCLOSE,
-};
-
-static long nuc900_wdt_ioctl(struct file *file,
-                                       unsigned int cmd, unsigned long arg)
-{
-       void __user *argp = (void __user *)arg;
-       int __user *p = argp;
-       int new_value;
-
-       switch (cmd) {
-       case WDIOC_GETSUPPORT:
-               return copy_to_user(argp, &nuc900_wdt_info,
-                               sizeof(nuc900_wdt_info)) ? -EFAULT : 0;
-       case WDIOC_GETSTATUS:
-       case WDIOC_GETBOOTSTATUS:
-               return put_user(0, p);
-
-       case WDIOC_KEEPALIVE:
-               nuc900_wdt_ping();
-               return 0;
-
-       case WDIOC_SETTIMEOUT:
-               if (get_user(new_value, p))
-                       return -EFAULT;
-
-               heartbeat = new_value;
-               nuc900_wdt_ping();
-
-               return put_user(new_value, p);
-       case WDIOC_GETTIMEOUT:
-               return put_user(heartbeat, p);
-       default:
-               return -ENOTTY;
-       }
-}
-
-static ssize_t nuc900_wdt_write(struct file *file, const char __user *data,
-                                               size_t len, loff_t *ppos)
-{
-       if (!len)
-               return 0;
-
-       /* Scan for magic character */
-       if (!nowayout) {
-               size_t i;
-
-               nuc900_wdt->expect_close = 0;
-
-               for (i = 0; i < len; i++) {
-                       char c;
-                       if (get_user(c, data + i))
-                               return -EFAULT;
-                       if (c == 'V') {
-                               nuc900_wdt->expect_close = 42;
-                               break;
-                       }
-               }
-       }
-
-       nuc900_wdt_ping();
-       return len;
-}
-
-static void nuc900_wdt_timer_ping(struct timer_list *unused)
-{
-       if (time_before(jiffies, nuc900_wdt->next_heartbeat)) {
-               nuc900_wdt_keepalive();
-               mod_timer(&nuc900_wdt->timer, jiffies + WDT_TIMEOUT);
-       } else
-               dev_warn(&nuc900_wdt->pdev->dev, "Will reset the machine !\n");
-}
-
-static const struct file_operations nuc900wdt_fops = {
-       .owner          = THIS_MODULE,
-       .llseek         = no_llseek,
-       .unlocked_ioctl = nuc900_wdt_ioctl,
-       .open           = nuc900_wdt_open,
-       .release        = nuc900_wdt_close,
-       .write          = nuc900_wdt_write,
-};
-
-static struct miscdevice nuc900wdt_miscdev = {
-       .minor          = WATCHDOG_MINOR,
-       .name           = "watchdog",
-       .fops           = &nuc900wdt_fops,
-};
-
-static int nuc900wdt_probe(struct platform_device *pdev)
-{
-       int ret = 0;
-
-       nuc900_wdt = devm_kzalloc(&pdev->dev, sizeof(*nuc900_wdt),
-                               GFP_KERNEL);
-       if (!nuc900_wdt)
-               return -ENOMEM;
-
-       nuc900_wdt->pdev = pdev;
-
-       spin_lock_init(&nuc900_wdt->wdt_lock);
-
-       nuc900_wdt->wdt_base = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(nuc900_wdt->wdt_base))
-               return PTR_ERR(nuc900_wdt->wdt_base);
-
-       nuc900_wdt->wdt_clock = devm_clk_get(&pdev->dev, NULL);
-       if (IS_ERR(nuc900_wdt->wdt_clock)) {
-               dev_err(&pdev->dev, "failed to find watchdog clock source\n");
-               return PTR_ERR(nuc900_wdt->wdt_clock);
-       }
-
-       clk_enable(nuc900_wdt->wdt_clock);
-
-       timer_setup(&nuc900_wdt->timer, nuc900_wdt_timer_ping, 0);
-
-       ret = misc_register(&nuc900wdt_miscdev);
-       if (ret) {
-               dev_err(&pdev->dev, "err register miscdev on minor=%d (%d)\n",
-                       WATCHDOG_MINOR, ret);
-               goto err_clk;
-       }
-
-       return 0;
-
-err_clk:
-       clk_disable(nuc900_wdt->wdt_clock);
-       return ret;
-}
-
-static int nuc900wdt_remove(struct platform_device *pdev)
-{
-       misc_deregister(&nuc900wdt_miscdev);
-
-       clk_disable(nuc900_wdt->wdt_clock);
-
-       return 0;
-}
-
-static struct platform_driver nuc900wdt_driver = {
-       .probe          = nuc900wdt_probe,
-       .remove         = nuc900wdt_remove,
-       .driver         = {
-               .name   = "nuc900-wdt",
-       },
-};
-
-module_platform_driver(nuc900wdt_driver);
-
-MODULE_AUTHOR("Wan ZongShun <mcuos.com@gmail.com>");
-MODULE_DESCRIPTION("Watchdog driver for NUC900");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:nuc900-wdt");
index cdb0d17..1cccf8e 100644 (file)
  * Watchdog timer block registers.
  */
 #define TIMER_CTRL             0x0000
-#define TIMER_A370_STATUS      0x04
+#define TIMER1_FIXED_ENABLE_BIT        BIT(12)
+#define WDT_AXP_FIXED_ENABLE_BIT BIT(10)
+#define TIMER1_ENABLE_BIT      BIT(2)
+
+#define TIMER_A370_STATUS      0x0004
+#define WDT_A370_EXPIRED       BIT(31)
+#define TIMER1_STATUS_BIT      BIT(8)
+
+#define TIMER1_VAL_OFF         0x001c
 
 #define WDT_MAX_CYCLE_COUNT    0xffffffff
 
@@ -43,9 +51,6 @@
 #define WDT_A370_RATIO_SHIFT   5
 #define WDT_A370_RATIO         (1 << WDT_A370_RATIO_SHIFT)
 
-#define WDT_AXP_FIXED_ENABLE_BIT BIT(10)
-#define WDT_A370_EXPIRED       BIT(31)
-
 static bool nowayout = WATCHDOG_NOWAYOUT;
 static int heartbeat = -1;             /* module parameter (seconds) */
 
@@ -158,6 +163,7 @@ static int armadaxp_wdt_clock_init(struct platform_device *pdev,
                                   struct orion_watchdog *dev)
 {
        int ret;
+       u32 val;
 
        dev->clk = of_clk_get_by_name(pdev->dev.of_node, "fixed");
        if (IS_ERR(dev->clk))
@@ -168,10 +174,9 @@ static int armadaxp_wdt_clock_init(struct platform_device *pdev,
                return ret;
        }
 
-       /* Enable the fixed watchdog clock input */
-       atomic_io_modify(dev->reg + TIMER_CTRL,
-                        WDT_AXP_FIXED_ENABLE_BIT,
-                        WDT_AXP_FIXED_ENABLE_BIT);
+       /* Fix the wdt and timer1 clock freqency to 25MHz */
+       val = WDT_AXP_FIXED_ENABLE_BIT | TIMER1_FIXED_ENABLE_BIT;
+       atomic_io_modify(dev->reg + TIMER_CTRL, val, val);
 
        dev->clk_rate = clk_get_rate(dev->clk);
        return 0;
@@ -183,6 +188,10 @@ static int orion_wdt_ping(struct watchdog_device *wdt_dev)
        /* Reload watchdog duration */
        writel(dev->clk_rate * wdt_dev->timeout,
               dev->reg + dev->data->wdt_counter_offset);
+       if (dev->wdt.info->options & WDIOF_PRETIMEOUT)
+               writel(dev->clk_rate * (wdt_dev->timeout - wdt_dev->pretimeout),
+                      dev->reg + TIMER1_VAL_OFF);
+
        return 0;
 }
 
@@ -194,13 +203,18 @@ static int armada375_start(struct watchdog_device *wdt_dev)
        /* Set watchdog duration */
        writel(dev->clk_rate * wdt_dev->timeout,
               dev->reg + dev->data->wdt_counter_offset);
+       if (dev->wdt.info->options & WDIOF_PRETIMEOUT)
+               writel(dev->clk_rate * (wdt_dev->timeout - wdt_dev->pretimeout),
+                      dev->reg + TIMER1_VAL_OFF);
 
        /* Clear the watchdog expiration bit */
        atomic_io_modify(dev->reg + TIMER_A370_STATUS, WDT_A370_EXPIRED, 0);
 
        /* Enable watchdog timer */
-       atomic_io_modify(dev->reg + TIMER_CTRL, dev->data->wdt_enable_bit,
-                                               dev->data->wdt_enable_bit);
+       reg = dev->data->wdt_enable_bit;
+       if (dev->wdt.info->options & WDIOF_PRETIMEOUT)
+               reg |= TIMER1_ENABLE_BIT;
+       atomic_io_modify(dev->reg + TIMER_CTRL, reg, reg);
 
        /* Enable reset on watchdog */
        reg = readl(dev->rstout);
@@ -277,7 +291,7 @@ static int orion_stop(struct watchdog_device *wdt_dev)
 static int armada375_stop(struct watchdog_device *wdt_dev)
 {
        struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev);
-       u32 reg;
+       u32 reg, mask;
 
        /* Disable reset on watchdog */
        atomic_io_modify(dev->rstout_mask, dev->data->rstout_mask_bit,
@@ -287,7 +301,10 @@ static int armada375_stop(struct watchdog_device *wdt_dev)
        writel(reg, dev->rstout);
 
        /* Disable watchdog timer */
-       atomic_io_modify(dev->reg + TIMER_CTRL, dev->data->wdt_enable_bit, 0);
+       mask = dev->data->wdt_enable_bit;
+       if (wdt_dev->info->options & WDIOF_PRETIMEOUT)
+               mask |= TIMER1_ENABLE_BIT;
+       atomic_io_modify(dev->reg + TIMER_CTRL, mask, 0);
 
        return 0;
 }
@@ -349,7 +366,7 @@ static unsigned int orion_wdt_get_timeleft(struct watchdog_device *wdt_dev)
        return readl(dev->reg + dev->data->wdt_counter_offset) / dev->clk_rate;
 }
 
-static const struct watchdog_info orion_wdt_info = {
+static struct watchdog_info orion_wdt_info = {
        .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
        .identity = "Orion Watchdog",
 };
@@ -368,6 +385,16 @@ static irqreturn_t orion_wdt_irq(int irq, void *devid)
        return IRQ_HANDLED;
 }
 
+static irqreturn_t orion_wdt_pre_irq(int irq, void *devid)
+{
+       struct orion_watchdog *dev = devid;
+
+       atomic_io_modify(dev->reg + TIMER_A370_STATUS,
+                        TIMER1_STATUS_BIT, 0);
+       watchdog_notify_pretimeout(&dev->wdt);
+       return IRQ_HANDLED;
+}
+
 /*
  * The original devicetree binding for this driver specified only
  * one memory resource, so in order to keep DT backwards compatibility
@@ -589,6 +616,19 @@ static int orion_wdt_probe(struct platform_device *pdev)
                }
        }
 
+       /* Optional 2nd interrupt for pretimeout */
+       irq = platform_get_irq(pdev, 1);
+       if (irq > 0) {
+               orion_wdt_info.options |= WDIOF_PRETIMEOUT;
+               ret = devm_request_irq(&pdev->dev, irq, orion_wdt_pre_irq,
+                                      0, pdev->name, dev);
+               if (ret < 0) {
+                       dev_err(&pdev->dev, "failed to request IRQ\n");
+                       goto disable_clk;
+               }
+       }
+
+
        watchdog_set_nowayout(&dev->wdt, nowayout);
        ret = watchdog_register_device(&dev->wdt);
        if (ret)
index 7be7f87..a494543 100644 (file)
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2014, The Linux Foundation. All rights reserved.
  */
+#include <linux/bits.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -19,6 +21,9 @@ enum wdt_reg {
        WDT_BITE_TIME,
 };
 
+#define QCOM_WDT_ENABLE                BIT(0)
+#define QCOM_WDT_ENABLE_IRQ    BIT(1)
+
 static const u32 reg_offset_data_apcs_tmr[] = {
        [WDT_RST] = 0x38,
        [WDT_EN] = 0x40,
@@ -37,7 +42,6 @@ static const u32 reg_offset_data_kpss[] = {
 
 struct qcom_wdt {
        struct watchdog_device  wdd;
-       struct clk              *clk;
        unsigned long           rate;
        void __iomem            *base;
        const u32               *layout;
@@ -54,15 +58,35 @@ struct qcom_wdt *to_qcom_wdt(struct watchdog_device *wdd)
        return container_of(wdd, struct qcom_wdt, wdd);
 }
 
+static inline int qcom_get_enable(struct watchdog_device *wdd)
+{
+       int enable = QCOM_WDT_ENABLE;
+
+       if (wdd->pretimeout)
+               enable |= QCOM_WDT_ENABLE_IRQ;
+
+       return enable;
+}
+
+static irqreturn_t qcom_wdt_isr(int irq, void *arg)
+{
+       struct watchdog_device *wdd = arg;
+
+       watchdog_notify_pretimeout(wdd);
+
+       return IRQ_HANDLED;
+}
+
 static int qcom_wdt_start(struct watchdog_device *wdd)
 {
        struct qcom_wdt *wdt = to_qcom_wdt(wdd);
+       unsigned int bark = wdd->timeout - wdd->pretimeout;
 
        writel(0, wdt_addr(wdt, WDT_EN));
        writel(1, wdt_addr(wdt, WDT_RST));
-       writel(wdd->timeout * wdt->rate, wdt_addr(wdt, WDT_BARK_TIME));
+       writel(bark * wdt->rate, wdt_addr(wdt, WDT_BARK_TIME));
        writel(wdd->timeout * wdt->rate, wdt_addr(wdt, WDT_BITE_TIME));
-       writel(1, wdt_addr(wdt, WDT_EN));
+       writel(qcom_get_enable(wdd), wdt_addr(wdt, WDT_EN));
        return 0;
 }
 
@@ -89,6 +113,13 @@ static int qcom_wdt_set_timeout(struct watchdog_device *wdd,
        return qcom_wdt_start(wdd);
 }
 
+static int qcom_wdt_set_pretimeout(struct watchdog_device *wdd,
+                                  unsigned int timeout)
+{
+       wdd->pretimeout = timeout;
+       return qcom_wdt_start(wdd);
+}
+
 static int qcom_wdt_restart(struct watchdog_device *wdd, unsigned long action,
                            void *data)
 {
@@ -105,7 +136,7 @@ static int qcom_wdt_restart(struct watchdog_device *wdd, unsigned long action,
        writel(1, wdt_addr(wdt, WDT_RST));
        writel(timeout, wdt_addr(wdt, WDT_BARK_TIME));
        writel(timeout, wdt_addr(wdt, WDT_BITE_TIME));
-       writel(1, wdt_addr(wdt, WDT_EN));
+       writel(QCOM_WDT_ENABLE, wdt_addr(wdt, WDT_EN));
 
        /*
         * Actually make sure the above sequence hits hardware before sleeping.
@@ -121,6 +152,7 @@ static const struct watchdog_ops qcom_wdt_ops = {
        .stop           = qcom_wdt_stop,
        .ping           = qcom_wdt_ping,
        .set_timeout    = qcom_wdt_set_timeout,
+       .set_pretimeout = qcom_wdt_set_pretimeout,
        .restart        = qcom_wdt_restart,
        .owner          = THIS_MODULE,
 };
@@ -133,6 +165,15 @@ static const struct watchdog_info qcom_wdt_info = {
        .identity       = KBUILD_MODNAME,
 };
 
+static const struct watchdog_info qcom_wdt_pt_info = {
+       .options        = WDIOF_KEEPALIVEPING
+                       | WDIOF_MAGICCLOSE
+                       | WDIOF_SETTIMEOUT
+                       | WDIOF_PRETIMEOUT
+                       | WDIOF_CARDRESET,
+       .identity       = KBUILD_MODNAME,
+};
+
 static void qcom_clk_disable_unprepare(void *data)
 {
        clk_disable_unprepare(data);
@@ -146,7 +187,8 @@ static int qcom_wdt_probe(struct platform_device *pdev)
        struct device_node *np = dev->of_node;
        const u32 *regs;
        u32 percpu_offset;
-       int ret;
+       int irq, ret;
+       struct clk *clk;
 
        regs = of_device_get_match_data(dev);
        if (!regs) {
@@ -173,19 +215,18 @@ static int qcom_wdt_probe(struct platform_device *pdev)
        if (IS_ERR(wdt->base))
                return PTR_ERR(wdt->base);
 
-       wdt->clk = devm_clk_get(dev, NULL);
-       if (IS_ERR(wdt->clk)) {
+       clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(clk)) {
                dev_err(dev, "failed to get input clock\n");
-               return PTR_ERR(wdt->clk);
+               return PTR_ERR(clk);
        }
 
-       ret = clk_prepare_enable(wdt->clk);
+       ret = clk_prepare_enable(clk);
        if (ret) {
                dev_err(dev, "failed to setup clock\n");
                return ret;
        }
-       ret = devm_add_action_or_reset(dev, qcom_clk_disable_unprepare,
-                                      wdt->clk);
+       ret = devm_add_action_or_reset(dev, qcom_clk_disable_unprepare, clk);
        if (ret)
                return ret;
 
@@ -197,14 +238,31 @@ static int qcom_wdt_probe(struct platform_device *pdev)
         * that it would bite before a second elapses it's usefulness is
         * limited.  Bail if this is the case.
         */
-       wdt->rate = clk_get_rate(wdt->clk);
+       wdt->rate = clk_get_rate(clk);
        if (wdt->rate == 0 ||
            wdt->rate > 0x10000000U) {
                dev_err(dev, "invalid clock rate\n");
                return -EINVAL;
        }
 
-       wdt->wdd.info = &qcom_wdt_info;
+       /* check if there is pretimeout support */
+       irq = platform_get_irq(pdev, 0);
+       if (irq > 0) {
+               ret = devm_request_irq(dev, irq, qcom_wdt_isr,
+                                      IRQF_TRIGGER_RISING,
+                                      "wdt_bark", &wdt->wdd);
+               if (ret)
+                       return ret;
+
+               wdt->wdd.info = &qcom_wdt_pt_info;
+               wdt->wdd.pretimeout = 1;
+       } else {
+               if (irq == -EPROBE_DEFER)
+                       return -EPROBE_DEFER;
+
+               wdt->wdd.info = &qcom_wdt_info;
+       }
+
        wdt->wdd.ops = &qcom_wdt_ops;
        wdt->wdd.min_timeout = 1;
        wdt->wdd.max_timeout = 0x10000000U / wdt->rate;
index edba4e2..0bb17b0 100644 (file)
@@ -284,10 +284,8 @@ static int sprd_wdt_probe(struct platform_device *pdev)
        }
 
        wdt->irq = platform_get_irq(pdev, 0);
-       if (wdt->irq < 0) {
-               dev_err(dev, "failed to get IRQ resource\n");
+       if (wdt->irq < 0)
                return wdt->irq;
-       }
 
        ret = devm_request_irq(dev, wdt->irq, sprd_wdt_isr, IRQF_NO_SUSPEND,
                               "sprd-wdt", (void *)wdt);
index dec660c..4a363a8 100644 (file)
 #include <linux/version.h>
 #include <linux/watchdog.h>
 
+#include <asm/unaligned.h>
+
 #define ZIIRAVE_TIMEOUT_MIN    3
 #define ZIIRAVE_TIMEOUT_MAX    255
+#define ZIIRAVE_TIMEOUT_DEFAULT        30
 
 #define ZIIRAVE_PING_VALUE     0x0
 
@@ -48,16 +51,12 @@ static char *ziirave_reasons[] = {"power cycle", "hw watchdog", NULL, NULL,
 
 #define ZIIRAVE_FIRM_PKT_TOTAL_SIZE    20
 #define ZIIRAVE_FIRM_PKT_DATA_SIZE     16
-#define ZIIRAVE_FIRM_FLASH_MEMORY_START        0x1600
-#define ZIIRAVE_FIRM_FLASH_MEMORY_END  0x2bbf
+#define ZIIRAVE_FIRM_FLASH_MEMORY_START        (2 * 0x1600)
+#define ZIIRAVE_FIRM_FLASH_MEMORY_END  (2 * 0x2bbf)
+#define ZIIRAVE_FIRM_PAGE_SIZE         128
 
 /* Received and ready for next Download packet. */
 #define ZIIRAVE_FIRM_DOWNLOAD_ACK      1
-/* Currently writing to flash. Retry Download status in a moment! */
-#define ZIIRAVE_FIRM_DOWNLOAD_BUSY     2
-
-/* Wait for ACK timeout in ms */
-#define ZIIRAVE_FIRM_WAIT_FOR_ACK_TIMEOUT      50
 
 /* Firmware commands */
 #define ZIIRAVE_CMD_DOWNLOAD_START             0x10
@@ -68,6 +67,12 @@ static char *ziirave_reasons[] = {"power cycle", "hw watchdog", NULL, NULL,
 #define ZIIRAVE_CMD_JUMP_TO_BOOTLOADER         0x0c
 #define ZIIRAVE_CMD_DOWNLOAD_PACKET            0x0e
 
+#define ZIIRAVE_CMD_JUMP_TO_BOOTLOADER_MAGIC   1
+#define ZIIRAVE_CMD_RESET_PROCESSOR_MAGIC      1
+
+#define ZIIRAVE_FW_VERSION_FMT "02.%02u.%02u"
+#define ZIIRAVE_BL_VERSION_FMT "01.%02u.%02u"
+
 struct ziirave_wdt_rev {
        unsigned char major;
        unsigned char minor;
@@ -165,67 +170,37 @@ static unsigned int ziirave_wdt_get_timeleft(struct watchdog_device *wdd)
        return ret;
 }
 
-static int ziirave_firm_wait_for_ack(struct watchdog_device *wdd)
+static int ziirave_firm_read_ack(struct watchdog_device *wdd)
 {
        struct i2c_client *client = to_i2c_client(wdd->parent);
        int ret;
-       unsigned long timeout;
 
-       timeout = jiffies + msecs_to_jiffies(ZIIRAVE_FIRM_WAIT_FOR_ACK_TIMEOUT);
-       do {
-               if (time_after(jiffies, timeout))
-                       return -ETIMEDOUT;
-
-               usleep_range(5000, 10000);
-
-               ret = i2c_smbus_read_byte(client);
-               if (ret < 0) {
-                       dev_err(&client->dev, "Failed to read byte\n");
-                       return ret;
-               }
-       } while (ret == ZIIRAVE_FIRM_DOWNLOAD_BUSY);
+       ret = i2c_smbus_read_byte(client);
+       if (ret < 0) {
+               dev_err(&client->dev, "Failed to read status byte\n");
+               return ret;
+       }
 
        return ret == ZIIRAVE_FIRM_DOWNLOAD_ACK ? 0 : -EIO;
 }
 
-static int ziirave_firm_set_read_addr(struct watchdog_device *wdd, u16 addr)
+static int ziirave_firm_set_read_addr(struct watchdog_device *wdd, u32 addr)
 {
        struct i2c_client *client = to_i2c_client(wdd->parent);
+       const u16 addr16 = (u16)addr / 2;
        u8 address[2];
 
-       address[0] = addr & 0xff;
-       address[1] = (addr >> 8) & 0xff;
+       put_unaligned_le16(addr16, address);
 
        return i2c_smbus_write_block_data(client,
                                          ZIIRAVE_CMD_DOWNLOAD_SET_READ_ADDR,
-                                         ARRAY_SIZE(address), address);
-}
-
-static int ziirave_firm_write_block_data(struct watchdog_device *wdd,
-                                        u8 command, u8 length, const u8 *data,
-                                        bool wait_for_ack)
-{
-       struct i2c_client *client = to_i2c_client(wdd->parent);
-       int ret;
-
-       ret = i2c_smbus_write_block_data(client, command, length, data);
-       if (ret) {
-               dev_err(&client->dev,
-                       "Failed to send command 0x%02x: %d\n", command, ret);
-               return ret;
-       }
-
-       if (wait_for_ack)
-               ret = ziirave_firm_wait_for_ack(wdd);
-
-       return ret;
+                                         sizeof(address), address);
 }
 
-static int ziirave_firm_write_byte(struct watchdog_device *wdd, u8 command,
-                                  u8 byte, bool wait_for_ack)
+static bool ziirave_firm_addr_readonly(u32 addr)
 {
-       return ziirave_firm_write_block_data(wdd, command, 1, &byte,
-                                            wait_for_ack);
+       return addr < ZIIRAVE_FIRM_FLASH_MEMORY_START ||
+              addr > ZIIRAVE_FIRM_FLASH_MEMORY_END;
 }
 
 /*
@@ -240,35 +215,53 @@ static int ziirave_firm_write_byte(struct watchdog_device *wdd, u8 command,
  *     Data0 .. Data15: Array of 16 bytes of data.
  *     Checksum: Checksum byte to verify data integrity.
  */
-static int ziirave_firm_write_pkt(struct watchdog_device *wdd,
-                                 const struct ihex_binrec *rec)
+static int __ziirave_firm_write_pkt(struct watchdog_device *wdd,
+                                   u32 addr, const u8 *data, u8 len)
 {
+       const u16 addr16 = (u16)addr / 2;
        struct i2c_client *client = to_i2c_client(wdd->parent);
        u8 i, checksum = 0, packet[ZIIRAVE_FIRM_PKT_TOTAL_SIZE];
        int ret;
-       u16 addr;
 
-       memset(packet, 0, ARRAY_SIZE(packet));
+       /* Check max data size */
+       if (len > ZIIRAVE_FIRM_PKT_DATA_SIZE) {
+               dev_err(&client->dev, "Firmware packet too long (%d)\n",
+                       len);
+               return -EMSGSIZE;
+       }
+
+       /*
+        * Ignore packets that are targeting program memory outisde of
+        * app partition, since they will be ignored by the
+        * bootloader. At the same time, we need to make sure we'll
+        * allow zero length packet that will be sent as the last step
+        * of firmware update
+        */
+       if (len && ziirave_firm_addr_readonly(addr))
+               return 0;
 
        /* Packet length */
-       packet[0] = (u8)be16_to_cpu(rec->len);
+       packet[0] = len;
        /* Packet address */
-       addr = (be32_to_cpu(rec->addr) & 0xffff) >> 1;
-       packet[1] = addr & 0xff;
-       packet[2] = (addr & 0xff00) >> 8;
+       put_unaligned_le16(addr16, packet + 1);
 
-       /* Packet data */
-       if (be16_to_cpu(rec->len) > ZIIRAVE_FIRM_PKT_DATA_SIZE)
-               return -EMSGSIZE;
-       memcpy(packet + 3, rec->data, be16_to_cpu(rec->len));
+       memcpy(packet + 3, data, len);
+       memset(packet + 3 + len, 0, ZIIRAVE_FIRM_PKT_DATA_SIZE - len);
 
        /* Packet checksum */
-       for (i = 0; i < ZIIRAVE_FIRM_PKT_TOTAL_SIZE - 1; i++)
+       for (i = 0; i < len + 3; i++)
                checksum += packet[i];
        packet[ZIIRAVE_FIRM_PKT_TOTAL_SIZE - 1] = checksum;
 
-       ret = ziirave_firm_write_block_data(wdd, ZIIRAVE_CMD_DOWNLOAD_PACKET,
-                                           ARRAY_SIZE(packet), packet, true);
+       ret = i2c_smbus_write_block_data(client, ZIIRAVE_CMD_DOWNLOAD_PACKET,
+                                        sizeof(packet), packet);
+       if (ret) {
+               dev_err(&client->dev,
+                       "Failed to send DOWNLOAD_PACKET: %d\n", ret);
+               return ret;
+       }
+
+       ret = ziirave_firm_read_ack(wdd);
        if (ret)
                dev_err(&client->dev,
                      "Failed to write firmware packet at address 0x%04x: %d\n",
@@ -277,6 +270,30 @@ static int ziirave_firm_write_pkt(struct watchdog_device *wdd,
        return ret;
 }
 
+static int ziirave_firm_write_pkt(struct watchdog_device *wdd,
+                                 u32 addr, const u8 *data, u8 len)
+{
+       const u8 max_write_len = ZIIRAVE_FIRM_PAGE_SIZE -
+               (addr - ALIGN_DOWN(addr, ZIIRAVE_FIRM_PAGE_SIZE));
+       int ret;
+
+       if (len > max_write_len) {
+               /*
+                * If data crossed page boundary we need to split this
+                * write in two
+                */
+               ret = __ziirave_firm_write_pkt(wdd, addr, data, max_write_len);
+               if (ret)
+                       return ret;
+
+               addr += max_write_len;
+               data += max_write_len;
+               len  -= max_write_len;
+       }
+
+       return __ziirave_firm_write_pkt(wdd, addr, data, len);
+}
+
 static int ziirave_firm_verify(struct watchdog_device *wdd,
                               const struct firmware *fw)
 {
@@ -284,16 +301,12 @@ static int ziirave_firm_verify(struct watchdog_device *wdd,
        const struct ihex_binrec *rec;
        int i, ret;
        u8 data[ZIIRAVE_FIRM_PKT_DATA_SIZE];
-       u16 addr;
 
        for (rec = (void *)fw->data; rec; rec = ihex_next_binrec(rec)) {
-               /* Zero length marks end of records */
-               if (!be16_to_cpu(rec->len))
-                       break;
+               const u16 len = be16_to_cpu(rec->len);
+               const u32 addr = be32_to_cpu(rec->addr);
 
-               addr = (be32_to_cpu(rec->addr) & 0xffff) >> 1;
-               if (addr < ZIIRAVE_FIRM_FLASH_MEMORY_START ||
-                   addr > ZIIRAVE_FIRM_FLASH_MEMORY_END)
+               if (ziirave_firm_addr_readonly(addr))
                        continue;
 
                ret = ziirave_firm_set_read_addr(wdd, addr);
@@ -304,7 +317,7 @@ static int ziirave_firm_verify(struct watchdog_device *wdd,
                        return ret;
                }
 
-               for (i = 0; i < ARRAY_SIZE(data); i++) {
+               for (i = 0; i < len; i++) {
                        ret = i2c_smbus_read_byte_data(client,
                                                ZIIRAVE_CMD_DOWNLOAD_READ_BYTE);
                        if (ret < 0) {
@@ -315,7 +328,7 @@ static int ziirave_firm_verify(struct watchdog_device *wdd,
                        data[i] = ret;
                }
 
-               if (memcmp(data, rec->data, be16_to_cpu(rec->len))) {
+               if (memcmp(data, rec->data, len)) {
                        dev_err(&client->dev,
                                "Firmware mismatch at address 0x%04x\n", addr);
                        return -EINVAL;
@@ -329,97 +342,45 @@ static int ziirave_firm_upload(struct watchdog_device *wdd,
                               const struct firmware *fw)
 {
        struct i2c_client *client = to_i2c_client(wdd->parent);
-       int ret, words_till_page_break;
        const struct ihex_binrec *rec;
-       struct ihex_binrec *rec_new;
+       int ret;
 
-       ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_JUMP_TO_BOOTLOADER, 1,
-                                     false);
-       if (ret)
+       ret = i2c_smbus_write_byte_data(client,
+                                       ZIIRAVE_CMD_JUMP_TO_BOOTLOADER,
+                                       ZIIRAVE_CMD_JUMP_TO_BOOTLOADER_MAGIC);
+       if (ret) {
+               dev_err(&client->dev, "Failed to jump to bootloader\n");
                return ret;
+       }
 
        msleep(500);
 
-       ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_DOWNLOAD_START, 1, true);
-       if (ret)
+       ret = i2c_smbus_write_byte(client, ZIIRAVE_CMD_DOWNLOAD_START);
+       if (ret) {
+               dev_err(&client->dev, "Failed to start download\n");
                return ret;
+       }
+
+       ret = ziirave_firm_read_ack(wdd);
+       if (ret) {
+               dev_err(&client->dev, "No ACK for start download\n");
+               return ret;
+       }
 
        msleep(500);
 
        for (rec = (void *)fw->data; rec; rec = ihex_next_binrec(rec)) {
-               /* Zero length marks end of records */
-               if (!be16_to_cpu(rec->len))
-                       break;
-
-               /* Check max data size */
-               if (be16_to_cpu(rec->len) > ZIIRAVE_FIRM_PKT_DATA_SIZE) {
-                       dev_err(&client->dev, "Firmware packet too long (%d)\n",
-                               be16_to_cpu(rec->len));
-                       return -EMSGSIZE;
-               }
-
-               /* Calculate words till page break */
-               words_till_page_break = (64 - ((be32_to_cpu(rec->addr) >> 1) &
-                                        0x3f));
-               if ((be16_to_cpu(rec->len) >> 1) > words_till_page_break) {
-                       /*
-                        * Data in passes page boundary, so we need to split in
-                        * two blocks of data. Create a packet with the first
-                        * block of data.
-                        */
-                       rec_new = kzalloc(sizeof(struct ihex_binrec) +
-                                         (words_till_page_break << 1),
-                                         GFP_KERNEL);
-                       if (!rec_new)
-                               return -ENOMEM;
-
-                       rec_new->len = cpu_to_be16(words_till_page_break << 1);
-                       rec_new->addr = rec->addr;
-                       memcpy(rec_new->data, rec->data,
-                              be16_to_cpu(rec_new->len));
-
-                       ret = ziirave_firm_write_pkt(wdd, rec_new);
-                       kfree(rec_new);
-                       if (ret)
-                               return ret;
-
-                       /* Create a packet with the second block of data */
-                       rec_new = kzalloc(sizeof(struct ihex_binrec) +
-                                         be16_to_cpu(rec->len) -
-                                         (words_till_page_break << 1),
-                                         GFP_KERNEL);
-                       if (!rec_new)
-                               return -ENOMEM;
-
-                       /* Remaining bytes */
-                       rec_new->len = rec->len -
-                                      cpu_to_be16(words_till_page_break << 1);
-
-                       rec_new->addr = cpu_to_be32(be32_to_cpu(rec->addr) +
-                                       (words_till_page_break << 1));
-
-                       memcpy(rec_new->data,
-                              rec->data + (words_till_page_break << 1),
-                              be16_to_cpu(rec_new->len));
-
-                       ret = ziirave_firm_write_pkt(wdd, rec_new);
-                       kfree(rec_new);
-                       if (ret)
-                               return ret;
-               } else {
-                       ret = ziirave_firm_write_pkt(wdd, rec);
-                       if (ret)
-                               return ret;
-               }
+               ret = ziirave_firm_write_pkt(wdd, be32_to_cpu(rec->addr),
+                                            rec->data, be16_to_cpu(rec->len));
+               if (ret)
+                       return ret;
        }
 
-       /* For end of download, the length field will be set to 0 */
-       rec_new = kzalloc(sizeof(struct ihex_binrec) + 1, GFP_KERNEL);
-       if (!rec_new)
-               return -ENOMEM;
-
-       ret = ziirave_firm_write_pkt(wdd, rec_new);
-       kfree(rec_new);
+       /*
+        * Finish firmware download process by sending a zero length
+        * payload
+        */
+       ret = ziirave_firm_write_pkt(wdd, 0, NULL, 0);
        if (ret) {
                dev_err(&client->dev, "Failed to send EMPTY packet: %d\n", ret);
                return ret;
@@ -437,15 +398,22 @@ static int ziirave_firm_upload(struct watchdog_device *wdd,
        }
 
        /* End download operation */
-       ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_DOWNLOAD_END, 1, false);
-       if (ret)
+       ret = i2c_smbus_write_byte(client, ZIIRAVE_CMD_DOWNLOAD_END);
+       if (ret) {
+               dev_err(&client->dev,
+                       "Failed to end firmware download: %d\n", ret);
                return ret;
+       }
 
        /* Reset the processor */
-       ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_RESET_PROCESSOR, 1,
-                                     false);
-       if (ret)
+       ret = i2c_smbus_write_byte_data(client,
+                                       ZIIRAVE_CMD_RESET_PROCESSOR,
+                                       ZIIRAVE_CMD_RESET_PROCESSOR_MAGIC);
+       if (ret) {
+               dev_err(&client->dev,
+                       "Failed to reset the watchdog: %d\n", ret);
                return ret;
+       }
 
        msleep(500);
 
@@ -478,7 +446,7 @@ static ssize_t ziirave_wdt_sysfs_show_firm(struct device *dev,
        if (ret)
                return ret;
 
-       ret = sprintf(buf, "02.%02u.%02u", w_priv->firmware_rev.major,
+       ret = sprintf(buf, ZIIRAVE_FW_VERSION_FMT, w_priv->firmware_rev.major,
                      w_priv->firmware_rev.minor);
 
        mutex_unlock(&w_priv->sysfs_mutex);
@@ -501,7 +469,7 @@ static ssize_t ziirave_wdt_sysfs_show_boot(struct device *dev,
        if (ret)
                return ret;
 
-       ret = sprintf(buf, "01.%02u.%02u", w_priv->bootloader_rev.major,
+       ret = sprintf(buf, ZIIRAVE_BL_VERSION_FMT, w_priv->bootloader_rev.major,
                      w_priv->bootloader_rev.minor);
 
        mutex_unlock(&w_priv->sysfs_mutex);
@@ -568,7 +536,8 @@ static ssize_t ziirave_wdt_sysfs_store_firm(struct device *dev,
                goto unlock_mutex;
        }
 
-       dev_info(&client->dev, "Firmware updated to version 02.%02u.%02u\n",
+       dev_info(&client->dev,
+                "Firmware updated to version " ZIIRAVE_FW_VERSION_FMT "\n",
                 w_priv->firmware_rev.major, w_priv->firmware_rev.minor);
 
        /* Restore the watchdog timeout */
@@ -611,7 +580,7 @@ static int ziirave_wdt_init_duration(struct i2c_client *client)
                                                   &reset_duration);
                if (ret) {
                        dev_info(&client->dev,
-                                "Unable to set reset pulse duration, using default\n");
+                        "No reset pulse duration specified, using default\n");
                        return 0;
                }
        }
@@ -633,7 +602,10 @@ static int ziirave_wdt_probe(struct i2c_client *client,
        struct ziirave_wdt_data *w_priv;
        int val;
 
-       if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+       if (!i2c_check_functionality(client->adapter,
+                                    I2C_FUNC_SMBUS_BYTE |
+                                    I2C_FUNC_SMBUS_BYTE_DATA |
+                                    I2C_FUNC_SMBUS_WRITE_BLOCK_DATA))
                return -ENODEV;
 
        w_priv = devm_kzalloc(&client->dev, sizeof(*w_priv), GFP_KERNEL);
@@ -658,57 +630,80 @@ static int ziirave_wdt_probe(struct i2c_client *client,
         */
        if (w_priv->wdd.timeout == 0) {
                val = i2c_smbus_read_byte_data(client, ZIIRAVE_WDT_TIMEOUT);
-               if (val < 0)
+               if (val < 0) {
+                       dev_err(&client->dev, "Failed to read timeout\n");
                        return val;
+               }
 
-               if (val < ZIIRAVE_TIMEOUT_MIN)
-                       return -ENODEV;
+               if (val > ZIIRAVE_TIMEOUT_MAX ||
+                   val < ZIIRAVE_TIMEOUT_MIN)
+                       val = ZIIRAVE_TIMEOUT_DEFAULT;
 
                w_priv->wdd.timeout = val;
-       } else {
-               ret = ziirave_wdt_set_timeout(&w_priv->wdd,
-                                             w_priv->wdd.timeout);
-               if (ret)
-                       return ret;
+       }
 
-               dev_info(&client->dev, "Timeout set to %ds.",
-                        w_priv->wdd.timeout);
+       ret = ziirave_wdt_set_timeout(&w_priv->wdd, w_priv->wdd.timeout);
+       if (ret) {
+               dev_err(&client->dev, "Failed to set timeout\n");
+               return ret;
        }
 
+       dev_info(&client->dev, "Timeout set to %ds\n", w_priv->wdd.timeout);
+
        watchdog_set_nowayout(&w_priv->wdd, nowayout);
 
        i2c_set_clientdata(client, w_priv);
 
        /* If in unconfigured state, set to stopped */
        val = i2c_smbus_read_byte_data(client, ZIIRAVE_WDT_STATE);
-       if (val < 0)
+       if (val < 0) {
+               dev_err(&client->dev, "Failed to read state\n");
                return val;
+       }
 
        if (val == ZIIRAVE_STATE_INITIAL)
                ziirave_wdt_stop(&w_priv->wdd);
 
        ret = ziirave_wdt_init_duration(client);
-       if (ret)
+       if (ret) {
+               dev_err(&client->dev, "Failed to init duration\n");
                return ret;
+       }
 
        ret = ziirave_wdt_revision(client, &w_priv->firmware_rev,
                                   ZIIRAVE_WDT_FIRM_VER_MAJOR);
-       if (ret)
+       if (ret) {
+               dev_err(&client->dev, "Failed to read firmware version\n");
                return ret;
+       }
+
+       dev_info(&client->dev,
+                "Firmware version: " ZIIRAVE_FW_VERSION_FMT "\n",
+                w_priv->firmware_rev.major, w_priv->firmware_rev.minor);
 
        ret = ziirave_wdt_revision(client, &w_priv->bootloader_rev,
                                   ZIIRAVE_WDT_BOOT_VER_MAJOR);
-       if (ret)
+       if (ret) {
+               dev_err(&client->dev, "Failed to read bootloader version\n");
                return ret;
+       }
+
+       dev_info(&client->dev,
+                "Bootloader version: " ZIIRAVE_BL_VERSION_FMT "\n",
+                w_priv->bootloader_rev.major, w_priv->bootloader_rev.minor);
 
        w_priv->reset_reason = i2c_smbus_read_byte_data(client,
                                                ZIIRAVE_WDT_RESET_REASON);
-       if (w_priv->reset_reason < 0)
+       if (w_priv->reset_reason < 0) {
+               dev_err(&client->dev, "Failed to read reset reason\n");
                return w_priv->reset_reason;
+       }
 
        if (w_priv->reset_reason >= ARRAY_SIZE(ziirave_reasons) ||
-           !ziirave_reasons[w_priv->reset_reason])
+           !ziirave_reasons[w_priv->reset_reason]) {
+               dev_err(&client->dev, "Invalid reset reason\n");
                return -ENODEV;
+       }
 
        ret = watchdog_register_device(&w_priv->wdd);
 
index 4e11de6..5bae515 100644 (file)
@@ -156,8 +156,10 @@ static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
        (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
 
 /* balloon_append: add the given page to the balloon. */
-static void __balloon_append(struct page *page)
+static void balloon_append(struct page *page)
 {
+       __SetPageOffline(page);
+
        /* Lowmem is re-populated first, so highmem pages go at list tail. */
        if (PageHighMem(page)) {
                list_add_tail(&page->lru, &ballooned_pages);
@@ -169,11 +171,6 @@ static void __balloon_append(struct page *page)
        wake_up(&balloon_wq);
 }
 
-static void balloon_append(struct page *page)
-{
-       __balloon_append(page);
-}
-
 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
 static struct page *balloon_retrieve(bool require_lowmem)
 {
@@ -192,6 +189,7 @@ static struct page *balloon_retrieve(bool require_lowmem)
        else
                balloon_stats.balloon_low--;
 
+       __ClearPageOffline(page);
        return page;
 }
 
@@ -377,8 +375,7 @@ static void xen_online_page(struct page *page, unsigned int order)
        for (i = 0; i < size; i++) {
                p = pfn_to_page(start_pfn + i);
                __online_page_set_limits(p);
-               __SetPageOffline(p);
-               __balloon_append(p);
+               balloon_append(p);
        }
        mutex_unlock(&balloon_mutex);
 }
@@ -444,7 +441,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
                xenmem_reservation_va_mapping_update(1, &page, &frame_list[i]);
 
                /* Relinquish the page back to the allocator. */
-               __ClearPageOffline(page);
                free_reserved_page(page);
        }
 
@@ -471,7 +467,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
                        state = BP_EAGAIN;
                        break;
                }
-               __SetPageOffline(page);
                adjust_managed_page_count(page, -1);
                xenmem_reservation_scrub_page(page);
                list_add(&page->lru, &pages);
@@ -611,7 +606,6 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
        while (pgno < nr_pages) {
                page = balloon_retrieve(true);
                if (page) {
-                       __ClearPageOffline(page);
                        pages[pgno++] = page;
 #ifdef CONFIG_XEN_HAVE_PVMMU
                        /*
@@ -653,10 +647,8 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
        mutex_lock(&balloon_mutex);
 
        for (i = 0; i < nr_pages; i++) {
-               if (pages[i]) {
-                       __SetPageOffline(pages[i]);
+               if (pages[i])
                        balloon_append(pages[i]);
-               }
        }
 
        balloon_stats.target_unpopulated -= nr_pages;
@@ -674,7 +666,6 @@ static void __init balloon_add_region(unsigned long start_pfn,
                                      unsigned long pages)
 {
        unsigned long pfn, extra_pfn_end;
-       struct page *page;
 
        /*
         * If the amount of usable memory has been limited (e.g., with
@@ -684,11 +675,10 @@ static void __init balloon_add_region(unsigned long start_pfn,
        extra_pfn_end = min(max_pfn, start_pfn + pages);
 
        for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
-               page = pfn_to_page(pfn);
                /* totalram_pages and totalhigh_pages do not
                   include the boot-time balloon extension, so
                   don't subtract from it. */
-               __balloon_append(page);
+               balloon_append(pfn_to_page(pfn));
        }
 
        balloon_stats.total_pages += extra_pfn_end - start_pfn;
index 89d60f8..d1ff218 100644 (file)
@@ -40,7 +40,7 @@
 
 #define efi_data(op)   (op.u.efi_runtime_call)
 
-efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
+static efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 {
        struct xen_platform_op op = INIT_EFI_OP(get_time);
 
@@ -61,9 +61,8 @@ efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 
        return efi_data(op).status;
 }
-EXPORT_SYMBOL_GPL(xen_efi_get_time);
 
-efi_status_t xen_efi_set_time(efi_time_t *tm)
+static efi_status_t xen_efi_set_time(efi_time_t *tm)
 {
        struct xen_platform_op op = INIT_EFI_OP(set_time);
 
@@ -75,10 +74,10 @@ efi_status_t xen_efi_set_time(efi_time_t *tm)
 
        return efi_data(op).status;
 }
-EXPORT_SYMBOL_GPL(xen_efi_set_time);
 
-efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
-                                    efi_time_t *tm)
+static efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled,
+                                           efi_bool_t *pending,
+                                           efi_time_t *tm)
 {
        struct xen_platform_op op = INIT_EFI_OP(get_wakeup_time);
 
@@ -98,9 +97,8 @@ efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
 
        return efi_data(op).status;
 }
-EXPORT_SYMBOL_GPL(xen_efi_get_wakeup_time);
 
-efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
+static efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
 {
        struct xen_platform_op op = INIT_EFI_OP(set_wakeup_time);
 
@@ -117,11 +115,10 @@ efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
 
        return efi_data(op).status;
 }
-EXPORT_SYMBOL_GPL(xen_efi_set_wakeup_time);
 
-efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor,
-                                 u32 *attr, unsigned long *data_size,
-                                 void *data)
+static efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor,
+                                        u32 *attr, unsigned long *data_size,
+                                        void *data)
 {
        struct xen_platform_op op = INIT_EFI_OP(get_variable);
 
@@ -141,11 +138,10 @@ efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor,
 
        return efi_data(op).status;
 }
-EXPORT_SYMBOL_GPL(xen_efi_get_variable);
 
-efi_status_t xen_efi_get_next_variable(unsigned long *name_size,
-                                      efi_char16_t *name,
-                                      efi_guid_t *vendor)
+static efi_status_t xen_efi_get_next_variable(unsigned long *name_size,
+                                             efi_char16_t *name,
+                                             efi_guid_t *vendor)
 {
        struct xen_platform_op op = INIT_EFI_OP(get_next_variable_name);
 
@@ -165,11 +161,10 @@ efi_status_t xen_efi_get_next_variable(unsigned long *name_size,
 
        return efi_data(op).status;
 }
-EXPORT_SYMBOL_GPL(xen_efi_get_next_variable);
 
-efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor,
-                                u32 attr, unsigned long data_size,
-                                void *data)
+static efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor,
+                                        u32 attr, unsigned long data_size,
+                                        void *data)
 {
        struct xen_platform_op op = INIT_EFI_OP(set_variable);
 
@@ -186,11 +181,10 @@ efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor,
 
        return efi_data(op).status;
 }
-EXPORT_SYMBOL_GPL(xen_efi_set_variable);
 
-efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space,
-                                        u64 *remaining_space,
-                                        u64 *max_variable_size)
+static efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space,
+                                               u64 *remaining_space,
+                                               u64 *max_variable_size)
 {
        struct xen_platform_op op = INIT_EFI_OP(query_variable_info);
 
@@ -208,9 +202,8 @@ efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space,
 
        return efi_data(op).status;
 }
-EXPORT_SYMBOL_GPL(xen_efi_query_variable_info);
 
-efi_status_t xen_efi_get_next_high_mono_count(u32 *count)
+static efi_status_t xen_efi_get_next_high_mono_count(u32 *count)
 {
        struct xen_platform_op op = INIT_EFI_OP(get_next_high_monotonic_count);
 
@@ -221,10 +214,9 @@ efi_status_t xen_efi_get_next_high_mono_count(u32 *count)
 
        return efi_data(op).status;
 }
-EXPORT_SYMBOL_GPL(xen_efi_get_next_high_mono_count);
 
-efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules,
-                                   unsigned long count, unsigned long sg_list)
+static efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules,
+                               unsigned long count, unsigned long sg_list)
 {
        struct xen_platform_op op = INIT_EFI_OP(update_capsule);
 
@@ -241,11 +233,9 @@ efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules,
 
        return efi_data(op).status;
 }
-EXPORT_SYMBOL_GPL(xen_efi_update_capsule);
 
-efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules,
-                                       unsigned long count, u64 *max_size,
-                                       int *reset_type)
+static efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules,
+                       unsigned long count, u64 *max_size, int *reset_type)
 {
        struct xen_platform_op op = INIT_EFI_OP(query_capsule_capabilities);
 
@@ -264,10 +254,9 @@ efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules,
 
        return efi_data(op).status;
 }
-EXPORT_SYMBOL_GPL(xen_efi_query_capsule_caps);
 
-void xen_efi_reset_system(int reset_type, efi_status_t status,
-                         unsigned long data_size, efi_char16_t *data)
+static void xen_efi_reset_system(int reset_type, efi_status_t status,
+                                unsigned long data_size, efi_char16_t *data)
 {
        switch (reset_type) {
        case EFI_RESET_COLD:
@@ -281,4 +270,25 @@ void xen_efi_reset_system(int reset_type, efi_status_t status,
                BUG();
        }
 }
-EXPORT_SYMBOL_GPL(xen_efi_reset_system);
+
+/*
+ * Set XEN EFI runtime services function pointers. Other fields of struct efi,
+ * e.g. efi.systab, will be set like normal EFI.
+ */
+void __init xen_efi_runtime_setup(void)
+{
+       efi.get_time                    = xen_efi_get_time;
+       efi.set_time                    = xen_efi_set_time;
+       efi.get_wakeup_time             = xen_efi_get_wakeup_time;
+       efi.set_wakeup_time             = xen_efi_set_wakeup_time;
+       efi.get_variable                = xen_efi_get_variable;
+       efi.get_next_variable           = xen_efi_get_next_variable;
+       efi.set_variable                = xen_efi_set_variable;
+       efi.set_variable_nonblocking    = xen_efi_set_variable;
+       efi.query_variable_info         = xen_efi_query_variable_info;
+       efi.query_variable_info_nonblocking = xen_efi_query_variable_info;
+       efi.update_capsule              = xen_efi_update_capsule;
+       efi.query_capsule_caps          = xen_efi_query_capsule_caps;
+       efi.get_next_high_mono_count    = xen_efi_get_next_high_mono_count;
+       efi.reset_system                = xen_efi_reset_system;
+}
index 2e8570c..6c88439 100644 (file)
@@ -247,7 +247,7 @@ static void xen_irq_info_cleanup(struct irq_info *info)
  */
 unsigned int evtchn_from_irq(unsigned irq)
 {
-       if (unlikely(WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)))
+       if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq))
                return 0;
 
        return info_for_irq(irq)->evtchn;
index a446a72..81401f3 100644 (file)
@@ -22,6 +22,7 @@
 
 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
 
+#include <linux/dma-mapping.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -34,9 +35,6 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/refcount.h>
-#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
-#include <linux/of_device.h>
-#endif
 
 #include <xen/xen.h>
 #include <xen/grant_table.h>
@@ -625,14 +623,7 @@ static int gntdev_open(struct inode *inode, struct file *flip)
        flip->private_data = priv;
 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
        priv->dma_dev = gntdev_miscdev.this_device;
-
-       /*
-        * The device is not spawn from a device tree, so arch_setup_dma_ops
-        * is not called, thus leaving the device with dummy DMA ops.
-        * Fix this by calling of_dma_configure() with a NULL node to set
-        * default DMA ops.
-        */
-       of_dma_configure(priv->dma_dev, NULL, true);
+       dma_coerce_mask_and_coherent(priv->dma_dev, DMA_BIT_MASK(64));
 #endif
        pr_debug("priv %p\n", priv);
 
index 7ea6fb6..49b381e 100644 (file)
@@ -1363,8 +1363,7 @@ static int gnttab_setup(void)
        if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) {
                gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
                if (gnttab_shared.addr == NULL) {
-                       pr_warn("gnttab share frames (addr=0x%08lx) is not mapped!\n",
-                               (unsigned long)xen_auto_xlat_grant_frames.vaddr);
+                       pr_warn("gnttab share frames is not mapped!\n");
                        return -ENOMEM;
                }
        }
index 3eeb9be..224df03 100644 (file)
@@ -17,6 +17,8 @@
 #include "../pci/pci.h"
 #ifdef CONFIG_PCI_MMCONFIG
 #include <asm/pci_x86.h>
+
+static int xen_mcfg_late(void);
 #endif
 
 static bool __read_mostly pci_seg_supported = true;
@@ -28,7 +30,18 @@ static int xen_add_device(struct device *dev)
 #ifdef CONFIG_PCI_IOV
        struct pci_dev *physfn = pci_dev->physfn;
 #endif
-
+#ifdef CONFIG_PCI_MMCONFIG
+       static bool pci_mcfg_reserved = false;
+       /*
+        * Reserve MCFG areas in Xen on first invocation due to this being
+        * potentially called from inside of acpi_init immediately after
+        * MCFG table has been finally parsed.
+        */
+       if (!pci_mcfg_reserved) {
+               xen_mcfg_late();
+               pci_mcfg_reserved = true;
+       }
+#endif
        if (pci_seg_supported) {
                struct {
                        struct physdev_pci_device_add add;
@@ -201,7 +214,7 @@ static int __init register_xen_pci_notifier(void)
 arch_initcall(register_xen_pci_notifier);
 
 #ifdef CONFIG_PCI_MMCONFIG
-static int __init xen_mcfg_late(void)
+static int xen_mcfg_late(void)
 {
        struct pci_mmcfg_region *cfg;
        int rc;
@@ -240,8 +253,4 @@ static int __init xen_mcfg_late(void)
        }
        return 0;
 }
-/*
- * Needs to be done after acpi_init which are subsys_initcall.
- */
-subsys_initcall_sync(xen_mcfg_late);
 #endif
index 69a626b..c57c71b 100644 (file)
@@ -775,7 +775,7 @@ static int pvcalls_back_poll(struct xenbus_device *dev,
        mappass->reqcopy = *req;
        icsk = inet_csk(mappass->sock->sk);
        queue = &icsk->icsk_accept_queue;
-       data = queue->rskq_accept_head != NULL;
+       data = READ_ONCE(queue->rskq_accept_head) != NULL;
        if (data) {
                mappass->reqcopy.cmd = 0;
                ret = 0;
index 58c9365..bd3a10d 100644 (file)
@@ -39,6 +39,7 @@
 #include <asm/xen/page-coherent.h>
 
 #include <trace/events/swiotlb.h>
+#define MAX_DMA_BITS 32
 /*
  * Used to do a quick range check in swiotlb_tbl_unmap_single and
  * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
@@ -115,8 +116,6 @@ static int is_xen_swiotlb_buffer(dma_addr_t dma_addr)
        return 0;
 }
 
-static int max_dma_bits = 32;
-
 static int
 xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
 {
@@ -136,7 +135,7 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
                                p + (i << IO_TLB_SHIFT),
                                get_order(slabs << IO_TLB_SHIFT),
                                dma_bits, &dma_handle);
-               } while (rc && dma_bits++ < max_dma_bits);
+               } while (rc && dma_bits++ < MAX_DMA_BITS);
                if (rc)
                        return rc;
 
index 08adc59..597af45 100644 (file)
@@ -55,6 +55,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/miscdevice.h>
+#include <linux/workqueue.h>
 
 #include <xen/xenbus.h>
 #include <xen/xen.h>
@@ -116,6 +117,8 @@ struct xenbus_file_priv {
        wait_queue_head_t read_waitq;
 
        struct kref kref;
+
+       struct work_struct wq;
 };
 
 /* Read out any raw xenbus messages queued up. */
@@ -300,14 +303,14 @@ static void watch_fired(struct xenbus_watch *watch,
        mutex_unlock(&adap->dev_data->reply_mutex);
 }
 
-static void xenbus_file_free(struct kref *kref)
+static void xenbus_worker(struct work_struct *wq)
 {
        struct xenbus_file_priv *u;
        struct xenbus_transaction_holder *trans, *tmp;
        struct watch_adapter *watch, *tmp_watch;
        struct read_buffer *rb, *tmp_rb;
 
-       u = container_of(kref, struct xenbus_file_priv, kref);
+       u = container_of(wq, struct xenbus_file_priv, wq);
 
        /*
         * No need for locking here because there are no other users,
@@ -333,6 +336,18 @@ static void xenbus_file_free(struct kref *kref)
        kfree(u);
 }
 
+static void xenbus_file_free(struct kref *kref)
+{
+       struct xenbus_file_priv *u;
+
+       /*
+        * We might be called in xenbus_thread().
+        * Use workqueue to avoid deadlock.
+        */
+       u = container_of(kref, struct xenbus_file_priv, kref);
+       schedule_work(&u->wq);
+}
+
 static struct xenbus_transaction_holder *xenbus_get_transaction(
        struct xenbus_file_priv *u, uint32_t tx_id)
 {
@@ -650,6 +665,7 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
        INIT_LIST_HEAD(&u->watches);
        INIT_LIST_HEAD(&u->read_buffers);
        init_waitqueue_head(&u->read_waitq);
+       INIT_WORK(&u->wq, xenbus_worker);
 
        mutex_init(&u->reply_mutex);
        mutex_init(&u->msgbuffer_mutex);
index 995e332..eb2151f 100644 (file)
@@ -51,6 +51,8 @@ void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
        if (!v9ses->cachetag) {
                if (v9fs_random_cachetag(v9ses) < 0) {
                        v9ses->fscache = NULL;
+                       kfree(v9ses->cachetag);
+                       v9ses->cachetag = NULL;
                        return;
                }
        }
index 4cc966a..fe7f0bd 100644 (file)
@@ -513,6 +513,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
        v9inode = V9FS_I(inode);
        mutex_lock(&v9inode->v_mutex);
        if (!v9inode->writeback_fid &&
+           (vma->vm_flags & VM_SHARED) &&
            (vma->vm_flags & VM_WRITE)) {
                /*
                 * clone a fid and add it to writeback_fid
@@ -614,6 +615,8 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
                        (vma->vm_end - vma->vm_start - 1),
        };
 
+       if (!(vma->vm_flags & VM_SHARED))
+               return;
 
        p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma);
 
index ca243e6..74df32b 100644 (file)
@@ -58,7 +58,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
 
 static int
 v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
-               int flags, void *data)
+               int flags)
 {
        int ret;
 
@@ -132,7 +132,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
                retval = PTR_ERR(sb);
                goto clunk_fid;
        }
-       retval = v9fs_fill_super(sb, v9ses, flags, data);
+       retval = v9fs_fill_super(sb, v9ses, flags);
        if (retval)
                goto release_sb;
 
index bcd1baf..4150280 100644 (file)
 #include <linux/dns_resolver.h>
 #include "internal.h"
 
-const struct file_operations afs_dynroot_file_operations = {
-       .open           = dcache_dir_open,
-       .release        = dcache_dir_close,
-       .iterate_shared = dcache_readdir,
-       .llseek         = dcache_dir_lseek,
-};
-
 /*
  * Probe to see if a cell may exist.  This prevents positive dentries from
  * being created unnecessarily.
index 7b1c18c..46d2d7c 100644 (file)
@@ -443,7 +443,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
        inode->i_mode           = S_IFDIR | S_IRUGO | S_IXUGO;
        if (root) {
                inode->i_op     = &afs_dynroot_inode_operations;
-               inode->i_fop    = &afs_dynroot_file_operations;
+               inode->i_fop    = &simple_dir_operations;
        } else {
                inode->i_op     = &afs_autocell_inode_operations;
        }
index 9cdfaba..759e057 100644 (file)
@@ -910,7 +910,6 @@ extern int afs_silly_iput(struct dentry *, struct inode *);
 /*
  * dynroot.c
  */
-extern const struct file_operations afs_dynroot_file_operations;
 extern const struct inode_operations afs_dynroot_inode_operations;
 extern const struct dentry_operations afs_dynroot_dentry_operations;
 
index d4e11b2..c5642bc 100644 (file)
@@ -670,26 +670,6 @@ out:
  * libraries.  There is no binary dependent code anywhere else.
  */
 
-#ifndef STACK_RND_MASK
-#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))    /* 8MB of VA */
-#endif
-
-static unsigned long randomize_stack_top(unsigned long stack_top)
-{
-       unsigned long random_variable = 0;
-
-       if (current->flags & PF_RANDOMIZE) {
-               random_variable = get_random_long();
-               random_variable &= STACK_RND_MASK;
-               random_variable <<= PAGE_SHIFT;
-       }
-#ifdef CONFIG_STACK_GROWSUP
-       return PAGE_ALIGN(stack_top) + random_variable;
-#else
-       return PAGE_ALIGN(stack_top) - random_variable;
-#endif
-}
-
 static int load_elf_binary(struct linux_binprm *bprm)
 {
        struct file *interpreter = NULL; /* to shut gcc up */
@@ -899,7 +879,7 @@ out_free_interp:
           the correct location in memory. */
        for(i = 0, elf_ppnt = elf_phdata;
            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
-               int elf_prot, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE;
+               int elf_prot, elf_flags;
                unsigned long k, vaddr;
                unsigned long total_size = 0;
 
@@ -931,13 +911,6 @@ out_free_interp:
                                         */
                                }
                        }
-
-                       /*
-                        * Some binaries have overlapping elf segments and then
-                        * we have to forcefully map over an existing mapping
-                        * e.g. over this newly established brk mapping.
-                        */
-                       elf_fixed = MAP_FIXED;
                }
 
                elf_prot = make_prot(elf_ppnt->p_flags);
@@ -950,7 +923,7 @@ out_free_interp:
                 * the ET_DYN load_addr calculations, proceed normally.
                 */
                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
-                       elf_flags |= elf_fixed;
+                       elf_flags |= MAP_FIXED;
                } else if (loc->elf_ex.e_type == ET_DYN) {
                        /*
                         * This logic is run once for the first LOAD Program
@@ -986,7 +959,7 @@ out_free_interp:
                                load_bias = ELF_ET_DYN_BASE;
                                if (current->flags & PF_RANDOMIZE)
                                        load_bias += arch_mmap_rnd();
-                               elf_flags |= elf_fixed;
+                               elf_flags |= MAP_FIXED;
                        } else
                                load_bias = 0;
 
@@ -1141,7 +1114,8 @@ out_free_interp:
                 * (since it grows up, and may collide early with the stack
                 * growing down), and into the unused ELF_ET_DYN_BASE region.
                 */
-               if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && !interpreter)
+               if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
+                   loc->elf_ex.e_type == ET_DYN && !interpreter)
                        current->mm->brk = current->mm->start_brk =
                                ELF_ET_DYN_BASE;
 
index bf7e3f2..670700c 100644 (file)
@@ -1761,6 +1761,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
                        btrfs_err(info,
 "bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
                                  cache->key.objectid);
+                       btrfs_put_block_group(cache);
                        ret = -EINVAL;
                        goto error;
                }
index 19d669d..fe2b876 100644 (file)
@@ -734,8 +734,6 @@ struct btrfs_fs_info {
        struct btrfs_workqueue *fixup_workers;
        struct btrfs_workqueue *delayed_workers;
 
-       /* the extent workers do delayed refs on the extent allocation tree */
-       struct btrfs_workqueue *extent_workers;
        struct task_struct *transaction_kthread;
        struct task_struct *cleaner_kthread;
        u32 thread_pool_size;
@@ -2489,8 +2487,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
                                     int nitems, bool use_global_rsv);
 void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
                                      struct btrfs_block_rsv *rsv);
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
-                                   bool qgroup_free);
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
 
 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
index d949d7d..db9f2c5 100644 (file)
@@ -381,7 +381,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 out_qgroup:
        btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
 out_fail:
-       btrfs_inode_rsv_release(inode, true);
        if (delalloc_lock)
                mutex_unlock(&inode->delalloc_mutex);
        return ret;
@@ -418,7 +417,6 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
  * btrfs_delalloc_release_extents - release our outstanding_extents
  * @inode: the inode to balance the reservation for.
  * @num_bytes: the number of bytes we originally reserved with
- * @qgroup_free: do we need to free qgroup meta reservation or convert them.
  *
  * When we reserve space we increase outstanding_extents for the extents we may
  * add.  Once we've set the range as delalloc or created our ordered extents we
@@ -426,8 +424,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
  * temporarily tracked outstanding_extents.  This _must_ be used in conjunction
  * with btrfs_delalloc_reserve_metadata.
  */
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
-                                   bool qgroup_free)
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
        unsigned num_extents;
@@ -441,7 +438,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
        if (btrfs_is_testing(fs_info))
                return;
 
-       btrfs_inode_rsv_release(inode, qgroup_free);
+       btrfs_inode_rsv_release(inode, true);
 }
 
 /**
index 044981c..402b61b 100644 (file)
@@ -2008,7 +2008,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
        btrfs_destroy_workqueue(fs_info->readahead_workers);
        btrfs_destroy_workqueue(fs_info->flush_workers);
        btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
-       btrfs_destroy_workqueue(fs_info->extent_workers);
        /*
         * Now that all other work queues are destroyed, we can safely destroy
         * the queues used for metadata I/O, since tasks from those other work
@@ -2214,10 +2213,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
                                      max_active, 2);
        fs_info->qgroup_rescan_workers =
                btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0);
-       fs_info->extent_workers =
-               btrfs_alloc_workqueue(fs_info, "extent-refs", flags,
-                                     min_t(u64, fs_devices->num_devices,
-                                           max_active), 8);
 
        if (!(fs_info->workers && fs_info->delalloc_workers &&
              fs_info->submit_workers && fs_info->flush_workers &&
@@ -2228,7 +2223,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
              fs_info->endio_freespace_worker && fs_info->rmw_workers &&
              fs_info->caching_workers && fs_info->readahead_workers &&
              fs_info->fixup_workers && fs_info->delayed_workers &&
-             fs_info->extent_workers &&
              fs_info->qgroup_rescan_workers)) {
                return -ENOMEM;
        }
index 7b32b6a..cceaf05 100644 (file)
@@ -3745,11 +3745,20 @@ err_unlock:
 static void set_btree_ioerr(struct page *page)
 {
        struct extent_buffer *eb = (struct extent_buffer *)page->private;
+       struct btrfs_fs_info *fs_info;
 
        SetPageError(page);
        if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
                return;
 
+       /*
+        * If we error out, we should add back the dirty_metadata_bytes
+        * to make it consistent.
+        */
+       fs_info = eb->fs_info;
+       percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
+                                eb->len, fs_info->dirty_metadata_batch);
+
        /*
         * If writeback for a btree extent that doesn't belong to a log tree
         * failed, increment the counter transaction->eb_write_errors.
@@ -3986,6 +3995,10 @@ retry:
                        if (!ret) {
                                free_extent_buffer(eb);
                                continue;
+                       } else if (ret < 0) {
+                               done = 1;
+                               free_extent_buffer(eb);
+                               break;
                        }
 
                        ret = write_one_eb(eb, wbc, &epd);
index 8fe4eb7..435a502 100644 (file)
@@ -1591,7 +1591,6 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct page **pages = NULL;
-       struct extent_state *cached_state = NULL;
        struct extent_changeset *data_reserved = NULL;
        u64 release_bytes = 0;
        u64 lockstart;
@@ -1611,6 +1610,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
                return -ENOMEM;
 
        while (iov_iter_count(i) > 0) {
+               struct extent_state *cached_state = NULL;
                size_t offset = offset_in_page(pos);
                size_t sector_offset;
                size_t write_bytes = min(iov_iter_count(i),
@@ -1692,7 +1692,7 @@ again:
                                    force_page_uptodate);
                if (ret) {
                        btrfs_delalloc_release_extents(BTRFS_I(inode),
-                                                      reserve_bytes, true);
+                                                      reserve_bytes);
                        break;
                }
 
@@ -1704,7 +1704,7 @@ again:
                        if (extents_locked == -EAGAIN)
                                goto again;
                        btrfs_delalloc_release_extents(BTRFS_I(inode),
-                                                      reserve_bytes, true);
+                                                      reserve_bytes);
                        ret = extents_locked;
                        break;
                }
@@ -1758,11 +1758,21 @@ again:
                if (copied > 0)
                        ret = btrfs_dirty_pages(inode, pages, dirty_pages,
                                                pos, copied, &cached_state);
+
+               /*
+                * If we have not locked the extent range, because the range's
+                * start offset is >= i_size, we might still have a non-NULL
+                * cached extent state, acquired while marking the extent range
+                * as delalloc through btrfs_dirty_pages(). Therefore free any
+                * possible cached extent state to avoid a memory leak.
+                */
                if (extents_locked)
                        unlock_extent_cached(&BTRFS_I(inode)->io_tree,
                                             lockstart, lockend, &cached_state);
-               btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
-                                              true);
+               else
+                       free_extent_state(cached_state);
+
+               btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
                if (ret) {
                        btrfs_drop_pages(pages, num_pages);
                        break;
@@ -2057,25 +2067,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        struct btrfs_trans_handle *trans;
        struct btrfs_log_ctx ctx;
        int ret = 0, err;
-       u64 len;
 
-       /*
-        * If the inode needs a full sync, make sure we use a full range to
-        * avoid log tree corruption, due to hole detection racing with ordered
-        * extent completion for adjacent ranges, and assertion failures during
-        * hole detection.
-        */
-       if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-                    &BTRFS_I(inode)->runtime_flags)) {
-               start = 0;
-               end = LLONG_MAX;
-       }
-
-       /*
-        * The range length can be represented by u64, we have to do the typecasts
-        * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync()
-        */
-       len = (u64)end - (u64)start + 1;
        trace_btrfs_sync_file(file, datasync);
 
        btrfs_init_log_ctx(&ctx, inode);
@@ -2101,6 +2093,19 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
        atomic_inc(&root->log_batch);
 
+       /*
+        * If the inode needs a full sync, make sure we use a full range to
+        * avoid log tree corruption, due to hole detection racing with ordered
+        * extent completion for adjacent ranges, and assertion failures during
+        * hole detection. Do this while holding the inode lock, to avoid races
+        * with other tasks.
+        */
+       if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                    &BTRFS_I(inode)->runtime_flags)) {
+               start = 0;
+               end = LLONG_MAX;
+       }
+
        /*
         * Before we acquired the inode's lock, someone may have dirtied more
         * pages in the target range. We need to make sure that writeback for
@@ -2128,8 +2133,11 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        /*
         * We have to do this here to avoid the priority inversion of waiting on
         * IO of a lower priority task while holding a transaction open.
+        *
+        * Also, the range length can be represented by u64, we have to do the
+        * typecasts to avoid signed overflow if it's [0, LLONG_MAX].
         */
-       ret = btrfs_wait_ordered_range(inode, start, len);
+       ret = btrfs_wait_ordered_range(inode, start, (u64)end - (u64)start + 1);
        if (ret) {
                up_write(&BTRFS_I(inode)->dio_sem);
                inode_unlock(inode);
index 63cad78..37345fb 100644 (file)
@@ -501,13 +501,13 @@ again:
        ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
                                              prealloc, prealloc, &alloc_hint);
        if (ret) {
-               btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true);
+               btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
                btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc, true);
                goto out_put;
        }
 
        ret = btrfs_write_out_ino_cache(root, trans, path, inode);
-       btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false);
+       btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
 out_put:
        iput(inode);
 out_release:
index a054640..c3f386b 100644 (file)
@@ -2206,7 +2206,7 @@ again:
 
        ClearPageChecked(page);
        set_page_dirty(page);
-       btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false);
+       btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
 out:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
                             &cached_state);
@@ -4951,7 +4951,7 @@ again:
        if (!page) {
                btrfs_delalloc_release_space(inode, data_reserved,
                                             block_start, blocksize, true);
-               btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
+               btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
                ret = -ENOMEM;
                goto out;
        }
@@ -5018,7 +5018,7 @@ out_unlock:
        if (ret)
                btrfs_delalloc_release_space(inode, data_reserved, block_start,
                                             blocksize, true);
-       btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0));
+       btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
        unlock_page(page);
        put_page(page);
 out:
@@ -6305,13 +6305,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        u32 sizes[2];
        int nitems = name ? 2 : 1;
        unsigned long ptr;
+       unsigned int nofs_flag;
        int ret;
 
        path = btrfs_alloc_path();
        if (!path)
                return ERR_PTR(-ENOMEM);
 
+       nofs_flag = memalloc_nofs_save();
        inode = new_inode(fs_info->sb);
+       memalloc_nofs_restore(nofs_flag);
        if (!inode) {
                btrfs_free_path(path);
                return ERR_PTR(-ENOMEM);
@@ -8706,7 +8709,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
                } else if (ret >= 0 && (size_t)ret < count)
                        btrfs_delalloc_release_space(inode, data_reserved,
                                        offset, count - (size_t)ret, true);
-               btrfs_delalloc_release_extents(BTRFS_I(inode), count, false);
+               btrfs_delalloc_release_extents(BTRFS_I(inode), count);
        }
 out:
        if (wakeup)
@@ -9056,7 +9059,7 @@ again:
        unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
 
        if (!ret2) {
-               btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true);
+               btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
                sb_end_pagefault(inode->i_sb);
                extent_changeset_free(data_reserved);
                return VM_FAULT_LOCKED;
@@ -9065,7 +9068,7 @@ again:
 out_unlock:
        unlock_page(page);
 out:
-       btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0));
+       btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
        btrfs_delalloc_release_space(inode, data_reserved, page_start,
                                     reserved_space, (ret != 0));
 out_noreserve:
index de730e5..7c145a4 100644 (file)
@@ -1360,8 +1360,7 @@ again:
                unlock_page(pages[i]);
                put_page(pages[i]);
        }
-       btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
-                                      false);
+       btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
        extent_changeset_free(data_reserved);
        return i_done;
 out:
@@ -1372,8 +1371,7 @@ out:
        btrfs_delalloc_release_space(inode, data_reserved,
                        start_index << PAGE_SHIFT,
                        page_cnt << PAGE_SHIFT, true);
-       btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
-                                      true);
+       btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
        extent_changeset_free(data_reserved);
        return ret;
 
index 8d3bd79..3ad1516 100644 (file)
@@ -3166,9 +3166,6 @@ out:
        btrfs_free_path(path);
 
        mutex_lock(&fs_info->qgroup_rescan_lock);
-       if (!btrfs_fs_closing(fs_info))
-               fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
-
        if (err > 0 &&
            fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
@@ -3184,16 +3181,30 @@ out:
        trans = btrfs_start_transaction(fs_info->quota_root, 1);
        if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
+               trans = NULL;
                btrfs_err(fs_info,
                          "fail to start transaction for status update: %d",
                          err);
-               goto done;
        }
-       ret = update_qgroup_status_item(trans);
-       if (ret < 0) {
-               err = ret;
-               btrfs_err(fs_info, "fail to update qgroup status: %d", err);
+
+       mutex_lock(&fs_info->qgroup_rescan_lock);
+       if (!btrfs_fs_closing(fs_info))
+               fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+       if (trans) {
+               ret = update_qgroup_status_item(trans);
+               if (ret < 0) {
+                       err = ret;
+                       btrfs_err(fs_info, "fail to update qgroup status: %d",
+                                 err);
+               }
        }
+       fs_info->qgroup_rescan_running = false;
+       complete_all(&fs_info->qgroup_rescan_completion);
+       mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+       if (!trans)
+               return;
+
        btrfs_end_transaction(trans);
 
        if (btrfs_fs_closing(fs_info)) {
@@ -3204,12 +3215,6 @@ out:
        } else {
                btrfs_err(fs_info, "qgroup scan failed with %d", err);
        }
-
-done:
-       mutex_lock(&fs_info->qgroup_rescan_lock);
-       fs_info->qgroup_rescan_running = false;
-       mutex_unlock(&fs_info->qgroup_rescan_lock);
-       complete_all(&fs_info->qgroup_rescan_completion);
 }
 
 /*
@@ -3437,6 +3442,9 @@ cleanup:
        while ((unode = ulist_next(&reserved->range_changed, &uiter)))
                clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
                                 unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL);
+       /* Also free data bytes of already reserved one */
+       btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid,
+                                 orig_reserved, BTRFS_QGROUP_RSV_DATA);
        extent_changeset_release(reserved);
        return ret;
 }
@@ -3481,7 +3489,7 @@ static int qgroup_free_reserved_data(struct inode *inode,
                 * EXTENT_QGROUP_RESERVED, we won't double free.
                 * So not need to rush.
                 */
-               ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree,
+               ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree,
                                free_start, free_start + free_len - 1,
                                EXTENT_QGROUP_RESERVED, &changeset);
                if (ret < 0)
@@ -3621,7 +3629,7 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
                return 0;
 
        BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
-       trace_qgroup_meta_reserve(root, type, (s64)num_bytes);
+       trace_qgroup_meta_reserve(root, (s64)num_bytes, type);
        ret = qgroup_reserve(root, num_bytes, enforce, type);
        if (ret < 0)
                return ret;
@@ -3668,7 +3676,7 @@ void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
         */
        num_bytes = sub_root_meta_rsv(root, num_bytes, type);
        BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
-       trace_qgroup_meta_reserve(root, type, -(s64)num_bytes);
+       trace_qgroup_meta_reserve(root, -(s64)num_bytes, type);
        btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid,
                                  num_bytes, type);
 }
index e87cbda..b57f361 100644 (file)
@@ -500,7 +500,7 @@ static int process_leaf(struct btrfs_root *root,
        struct btrfs_extent_data_ref *dref;
        struct btrfs_shared_data_ref *sref;
        u32 count;
-       int i = 0, tree_block_level = 0, ret;
+       int i = 0, tree_block_level = 0, ret = 0;
        struct btrfs_key key;
        int nritems = btrfs_header_nritems(leaf);
 
index 2f0e25a..5cd42b6 100644 (file)
@@ -1435,6 +1435,13 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
        int clear_rsv = 0;
        int ret;
 
+       /*
+        * The subvolume has reloc tree but the swap is finished, no need to
+        * create/update the dead reloc tree
+        */
+       if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
+               return 0;
+
        if (root->reloc_root) {
                reloc_root = root->reloc_root;
                reloc_root->last_trans = trans->transid;
@@ -2187,7 +2194,6 @@ static int clean_dirty_subvols(struct reloc_control *rc)
                        /* Merged subvolume, cleanup its reloc root */
                        struct btrfs_root *reloc_root = root->reloc_root;
 
-                       clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
                        list_del_init(&root->reloc_dirty_list);
                        root->reloc_root = NULL;
                        if (reloc_root) {
@@ -2196,6 +2202,7 @@ static int clean_dirty_subvols(struct reloc_control *rc)
                                if (ret2 < 0 && !ret)
                                        ret = ret2;
                        }
+                       clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
                        btrfs_put_fs_root(root);
                } else {
                        /* Orphan reloc tree, just clean it up */
@@ -3270,6 +3277,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
                        if (!page) {
                                btrfs_delalloc_release_metadata(BTRFS_I(inode),
                                                        PAGE_SIZE, true);
+                               btrfs_delalloc_release_extents(BTRFS_I(inode),
+                                                       PAGE_SIZE);
                                ret = -ENOMEM;
                                goto out;
                        }
@@ -3290,7 +3299,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
                                btrfs_delalloc_release_metadata(BTRFS_I(inode),
                                                        PAGE_SIZE, true);
                                btrfs_delalloc_release_extents(BTRFS_I(inode),
-                                                              PAGE_SIZE, true);
+                                                              PAGE_SIZE);
                                ret = -EIO;
                                goto out;
                        }
@@ -3319,7 +3328,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
                        btrfs_delalloc_release_metadata(BTRFS_I(inode),
                                                         PAGE_SIZE, true);
                        btrfs_delalloc_release_extents(BTRFS_I(inode),
-                                                      PAGE_SIZE, true);
+                                                      PAGE_SIZE);
 
                        clear_extent_bits(&BTRFS_I(inode)->io_tree,
                                          page_start, page_end,
@@ -3335,8 +3344,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
                put_page(page);
 
                index++;
-               btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE,
-                                              false);
+               btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
                balance_dirty_pages_ratelimited(inode->i_mapping);
                btrfs_throttle(fs_info);
        }
index f321502..123ac54 100644 (file)
@@ -5085,7 +5085,7 @@ static int clone_range(struct send_ctx *sctx,
        struct btrfs_path *path;
        struct btrfs_key key;
        int ret;
-       u64 clone_src_i_size;
+       u64 clone_src_i_size = 0;
 
        /*
         * Prevent cloning from a zero offset with a length matching the sector
index b5e8056..99fe9bf 100644 (file)
@@ -52,7 +52,13 @@ static struct file_system_type test_type = {
 
 struct inode *btrfs_new_test_inode(void)
 {
-       return new_inode(test_mnt->mnt_sb);
+       struct inode *inode;
+
+       inode = new_inode(test_mnt->mnt_sb);
+       if (inode)
+               inode_init_owner(inode, NULL, S_IFREG);
+
+       return inode;
 }
 
 static int btrfs_init_test_fs(void)
index 29b82a7..8a6cc60 100644 (file)
@@ -2932,7 +2932,8 @@ out:
  * in the tree of log roots
  */
 static int update_log_root(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *log)
+                          struct btrfs_root *log,
+                          struct btrfs_root_item *root_item)
 {
        struct btrfs_fs_info *fs_info = log->fs_info;
        int ret;
@@ -2940,10 +2941,10 @@ static int update_log_root(struct btrfs_trans_handle *trans,
        if (log->log_transid == 1) {
                /* insert root item on the first sync */
                ret = btrfs_insert_root(trans, fs_info->log_root_tree,
-                               &log->root_key, &log->root_item);
+                               &log->root_key, root_item);
        } else {
                ret = btrfs_update_root(trans, fs_info->log_root_tree,
-                               &log->root_key, &log->root_item);
+                               &log->root_key, root_item);
        }
        return ret;
 }
@@ -3041,6 +3042,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_root *log = root->log_root;
        struct btrfs_root *log_root_tree = fs_info->log_root_tree;
+       struct btrfs_root_item new_root_item;
        int log_transid = 0;
        struct btrfs_log_ctx root_log_ctx;
        struct blk_plug plug;
@@ -3104,17 +3106,25 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
                goto out;
        }
 
+       /*
+        * We _must_ update under the root->log_mutex in order to make sure we
+        * have a consistent view of the log root we are trying to commit at
+        * this moment.
+        *
+        * We _must_ copy this into a local copy, because we are not holding the
+        * log_root_tree->log_mutex yet.  This is important because when we
+        * commit the log_root_tree we must have a consistent view of the
+        * log_root_tree when we update the super block to point at the
+        * log_root_tree bytenr.  If we update the log_root_tree here we'll race
+        * with the commit and possibly point at the new block which we may not
+        * have written out.
+        */
        btrfs_set_root_node(&log->root_item, log->node);
+       memcpy(&new_root_item, &log->root_item, sizeof(new_root_item));
 
        root->log_transid++;
        log->log_transid = root->log_transid;
        root->log_start_pid = 0;
-       /*
-        * Update or create log root item under the root's log_mutex to prevent
-        * races with concurrent log syncs that can lead to failure to update
-        * log root item because it was not created yet.
-        */
-       ret = update_log_root(trans, log);
        /*
         * IO has been started, blocks of the log tree have WRITTEN flag set
         * in their headers. new modifications of the log will be written to
@@ -3135,6 +3145,14 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        mutex_unlock(&log_root_tree->log_mutex);
 
        mutex_lock(&log_root_tree->log_mutex);
+
+       /*
+        * Now we are safe to update the log_root_tree because we're under the
+        * log_mutex, and we're a current writer so we're holding the commit
+        * open until we drop the log_mutex.
+        */
+       ret = update_log_root(trans, log, &new_root_item);
+
        if (atomic_dec_and_test(&log_root_tree->log_writers)) {
                /* atomic_dec_and_test implies a barrier */
                cond_wake_up_nomb(&log_root_tree->log_writer_wait);
index a324480..bdfe449 100644 (file)
@@ -3845,7 +3845,11 @@ static int alloc_profile_is_valid(u64 flags, int extended)
                return !extended; /* "0" is valid for usual profiles */
 
        /* true if exactly one bit set */
-       return is_power_of_2(flags);
+       /*
+        * Don't use is_power_of_2(unsigned long) because it won't work
+        * for the single profile (1ULL << 48) on 32-bit CPUs.
+        */
+       return flags != 0 && (flags & (flags - 1)) == 0;
 }
 
 static inline int balance_need_close(struct btrfs_fs_info *fs_info)
@@ -4063,7 +4067,13 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
        }
 
        num_devices = btrfs_num_devices(fs_info);
-       allowed = 0;
+
+       /*
+        * SINGLE profile on-disk has no profile bit, but in-memory we have a
+        * special bit for it, to make it easier to distinguish.  Thus we need
+        * to set it manually, or balance would refuse the profile.
+        */
+       allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
        for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++)
                if (num_devices >= btrfs_raid_array[i].devs_min)
                        allowed |= btrfs_raid_array[i].bg_flag;
index a699e32..c1da294 100644 (file)
@@ -6,7 +6,7 @@
 obj-$(CONFIG_CEPH_FS) += ceph.o
 
 ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
-       export.o caps.o snap.o xattr.o quota.o \
+       export.o caps.o snap.o xattr.o quota.o io.o \
        mds_client.o mdsmap.o strings.o ceph_frag.o \
        debugfs.o
 
index b3c8b88..7ab6166 100644 (file)
@@ -189,8 +189,7 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
 {
        struct inode *inode = file_inode(filp);
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_osd_client *osdc =
-               &ceph_inode_to_client(inode)->client->osdc;
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        int err = 0;
        u64 off = page_offset(page);
        u64 len = PAGE_SIZE;
@@ -219,8 +218,8 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
 
        dout("readpage inode %p file %p page %p index %lu\n",
             inode, filp, page, page->index);
-       err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
-                                 off, &len,
+       err = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
+                                 &ci->i_layout, off, &len,
                                  ci->i_truncate_seq, ci->i_truncate_size,
                                  &page, 1, 0);
        if (err == -ENOENT)
@@ -228,6 +227,8 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
        if (err < 0) {
                SetPageError(page);
                ceph_fscache_readpage_cancel(inode, page);
+               if (err == -EBLACKLISTED)
+                       fsc->blacklisted = true;
                goto out;
        }
        if (err < PAGE_SIZE)
@@ -266,6 +267,8 @@ static void finish_read(struct ceph_osd_request *req)
        int i;
 
        dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
+       if (rc == -EBLACKLISTED)
+               ceph_inode_to_client(inode)->blacklisted = true;
 
        /* unlock all pages, zeroing any data we didn't read */
        osd_data = osd_req_op_extent_osd_data(req, 0);
@@ -323,7 +326,8 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
                /* caller of readpages does not hold buffer and read caps
                 * (fadvise, madvise and readahead cases) */
                int want = CEPH_CAP_FILE_CACHE;
-               ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, true, &got);
+               ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want,
+                                       true, &got);
                if (ret < 0) {
                        dout("start_read %p, error getting cap\n", inode);
                } else if (!(got & want)) {
@@ -569,7 +573,7 @@ static u64 get_writepages_data_length(struct inode *inode,
 /*
  * Write a single page, but leave the page locked.
  *
- * If we get a write error, set the page error bit, but still adjust the
+ * If we get a write error, mark the mapping for error, but still adjust the
  * dirty page accounting (i.e., page is no longer dirty).
  */
 static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
@@ -640,9 +644,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
                        end_page_writeback(page);
                        return err;
                }
+               if (err == -EBLACKLISTED)
+                       fsc->blacklisted = true;
                dout("writepage setting page/mapping error %d %p\n",
                     err, page);
-               SetPageError(page);
                mapping_set_error(&inode->i_data, err);
                wbc->pages_skipped++;
        } else {
@@ -679,23 +684,6 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
        return err;
 }
 
-/*
- * lame release_pages helper.  release_pages() isn't exported to
- * modules.
- */
-static void ceph_release_pages(struct page **pages, int num)
-{
-       struct pagevec pvec;
-       int i;
-
-       pagevec_init(&pvec);
-       for (i = 0; i < num; i++) {
-               if (pagevec_add(&pvec, pages[i]) == 0)
-                       pagevec_release(&pvec);
-       }
-       pagevec_release(&pvec);
-}
-
 /*
  * async writeback completion handler.
  *
@@ -720,6 +708,8 @@ static void writepages_finish(struct ceph_osd_request *req)
        if (rc < 0) {
                mapping_set_error(mapping, rc);
                ceph_set_error_write(ci);
+               if (rc == -EBLACKLISTED)
+                       fsc->blacklisted = true;
        } else {
                ceph_clear_error_write(ci);
        }
@@ -769,7 +759,7 @@ static void writepages_finish(struct ceph_osd_request *req)
                dout("writepages_finish %p wrote %llu bytes cleaned %d pages\n",
                     inode, osd_data->length, rc >= 0 ? num_pages : 0);
 
-               ceph_release_pages(osd_data->pages, num_pages);
+               release_pages(osd_data->pages, num_pages);
        }
 
        ceph_put_wrbuffer_cap_refs(ci, total_pages, snapc);
@@ -1452,7 +1442,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
                want = CEPH_CAP_FILE_CACHE;
 
        got = 0;
-       err = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
+       err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_RD, want, -1,
+                           &got, &pinned_page);
        if (err < 0)
                goto out_restore;
 
@@ -1540,6 +1531,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
        if (!prealloc_cf)
                return VM_FAULT_OOM;
 
+       sb_start_pagefault(inode->i_sb);
        ceph_block_sigs(&oldset);
 
        if (ci->i_inline_version != CEPH_INLINE_NONE) {
@@ -1568,7 +1560,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
                want = CEPH_CAP_FILE_BUFFER;
 
        got = 0;
-       err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
+       err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_WR, want, off + len,
                            &got, NULL);
        if (err < 0)
                goto out_free;
@@ -1614,6 +1606,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
        ceph_put_cap_refs(ci, got);
 out_free:
        ceph_restore_sigs(&oldset);
+       sb_end_pagefault(inode->i_sb);
        ceph_free_cap_flush(prealloc_cf);
        if (err < 0)
                ret = vmf_error(err);
@@ -1946,12 +1939,17 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
 
        if (err >= 0 || err == -ENOENT)
                have |= POOL_READ;
-       else if (err != -EPERM)
+       else if (err != -EPERM) {
+               if (err == -EBLACKLISTED)
+                       fsc->blacklisted = true;
                goto out_unlock;
+       }
 
        if (err2 == 0 || err2 == -EEXIST)
                have |= POOL_WRITE;
        else if (err2 != -EPERM) {
+               if (err2 == -EBLACKLISTED)
+                       fsc->blacklisted = true;
                err = err2;
                goto out_unlock;
        }
@@ -1989,10 +1987,11 @@ out:
        return err;
 }
 
-int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
+int ceph_pool_perm_check(struct inode *inode, int need)
 {
-       s64 pool;
+       struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_string *pool_ns;
+       s64 pool;
        int ret, flags;
 
        if (ci->i_vino.snap != CEPH_NOSNAP) {
@@ -2004,7 +2003,7 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
                return 0;
        }
 
-       if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
+       if (ceph_test_mount_opt(ceph_inode_to_client(inode),
                                NOPOOLPERM))
                return 0;
 
index bc90cf6..b2ec29e 100644 (file)
@@ -6,6 +6,8 @@
  *  Written by Milosz Tanski (milosz@adfin.com)
  */
 
+#include <linux/ceph/ceph_debug.h>
+
 #include "super.h"
 #include "cache.h"
 
index ce0f565..d3b9c9d 100644 (file)
@@ -457,37 +457,6 @@ struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
        return cap;
 }
 
-/*
- * Return id of any MDS with a cap, preferably FILE_WR|BUFFER|EXCL, else -1.
- */
-static int __ceph_get_cap_mds(struct ceph_inode_info *ci)
-{
-       struct ceph_cap *cap;
-       int mds = -1;
-       struct rb_node *p;
-
-       /* prefer mds with WR|BUFFER|EXCL caps */
-       for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
-               cap = rb_entry(p, struct ceph_cap, ci_node);
-               mds = cap->mds;
-               if (cap->issued & (CEPH_CAP_FILE_WR |
-                                  CEPH_CAP_FILE_BUFFER |
-                                  CEPH_CAP_FILE_EXCL))
-                       break;
-       }
-       return mds;
-}
-
-int ceph_get_cap_mds(struct inode *inode)
-{
-       struct ceph_inode_info *ci = ceph_inode(inode);
-       int mds;
-       spin_lock(&ci->i_ceph_lock);
-       mds = __ceph_get_cap_mds(ceph_inode(inode));
-       spin_unlock(&ci->i_ceph_lock);
-       return mds;
-}
-
 /*
  * Called under i_ceph_lock.
  */
@@ -628,7 +597,7 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
 /*
  * Add a capability under the given MDS session.
  *
- * Caller should hold session snap_rwsem (read) and s_mutex.
+ * Caller should hold session snap_rwsem (read) and ci->i_ceph_lock
  *
  * @fmode is the open file mode, if we are opening a file, otherwise
  * it is < 0.  (This is so we can atomically add the cap and add an
@@ -645,6 +614,9 @@ void ceph_add_cap(struct inode *inode,
        struct ceph_cap *cap;
        int mds = session->s_mds;
        int actual_wanted;
+       u32 gen;
+
+       lockdep_assert_held(&ci->i_ceph_lock);
 
        dout("add_cap %p mds%d cap %llx %s seq %d\n", inode,
             session->s_mds, cap_id, ceph_cap_string(issued), seq);
@@ -656,6 +628,10 @@ void ceph_add_cap(struct inode *inode,
        if (fmode >= 0)
                wanted |= ceph_caps_for_mode(fmode);
 
+       spin_lock(&session->s_gen_ttl_lock);
+       gen = session->s_cap_gen;
+       spin_unlock(&session->s_gen_ttl_lock);
+
        cap = __get_cap_for_mds(ci, mds);
        if (!cap) {
                cap = *new_cap;
@@ -681,7 +657,7 @@ void ceph_add_cap(struct inode *inode,
                list_move_tail(&cap->session_caps, &session->s_caps);
                spin_unlock(&session->s_cap_lock);
 
-               if (cap->cap_gen < session->s_cap_gen)
+               if (cap->cap_gen < gen)
                        cap->issued = cap->implemented = CEPH_CAP_PIN;
 
                /*
@@ -775,7 +751,7 @@ void ceph_add_cap(struct inode *inode,
        cap->seq = seq;
        cap->issue_seq = seq;
        cap->mseq = mseq;
-       cap->cap_gen = session->s_cap_gen;
+       cap->cap_gen = gen;
 
        if (fmode >= 0)
                __ceph_get_fmode(ci, fmode);
@@ -1284,10 +1260,6 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
  * Make note of max_size reported/requested from mds, revoked caps
  * that have now been implemented.
  *
- * Make half-hearted attempt ot to invalidate page cache if we are
- * dropping RDCACHE.  Note that this will leave behind locked pages
- * that we'll then need to deal with elsewhere.
- *
  * Return non-zero if delayed release, or we experienced an error
  * such that the caller should requeue + retry later.
  *
@@ -1746,11 +1718,11 @@ static bool __finish_cap_flush(struct ceph_mds_client *mdsc,
  * Add dirty inode to the flushing list.  Assigned a seq number so we
  * can wait for caps to flush without starving.
  *
- * Called under i_ceph_lock.
+ * Called under i_ceph_lock. Returns the flush tid.
  */
-static int __mark_caps_flushing(struct inode *inode,
+static u64 __mark_caps_flushing(struct inode *inode,
                                struct ceph_mds_session *session, bool wake,
-                               u64 *flush_tid, u64 *oldest_flush_tid)
+                               u64 *oldest_flush_tid)
 {
        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1789,8 +1761,7 @@ static int __mark_caps_flushing(struct inode *inode,
 
        list_add_tail(&cf->i_list, &ci->i_cap_flush_list);
 
-       *flush_tid = cf->tid;
-       return flushing;
+       return cf->tid;
 }
 
 /*
@@ -2028,11 +1999,6 @@ retry_locked:
                }
 
 ack:
-               if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
-                       dout(" skipping %p I_NOFLUSH set\n", inode);
-                       continue;
-               }
-
                if (session && session != cap->session) {
                        dout("oops, wrong session %p mutex\n", session);
                        mutex_unlock(&session->s_mutex);
@@ -2080,9 +2046,9 @@ ack:
                }
 
                if (cap == ci->i_auth_cap && ci->i_dirty_caps) {
-                       flushing = __mark_caps_flushing(inode, session, false,
-                                                       &flush_tid,
-                                                       &oldest_flush_tid);
+                       flushing = ci->i_dirty_caps;
+                       flush_tid = __mark_caps_flushing(inode, session, false,
+                                                        &oldest_flush_tid);
                } else {
                        flushing = 0;
                        flush_tid = 0;
@@ -2130,16 +2096,11 @@ static int try_flush_caps(struct inode *inode, u64 *ptid)
 retry:
        spin_lock(&ci->i_ceph_lock);
 retry_locked:
-       if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
-               spin_unlock(&ci->i_ceph_lock);
-               dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
-               goto out;
-       }
        if (ci->i_dirty_caps && ci->i_auth_cap) {
                struct ceph_cap *cap = ci->i_auth_cap;
                int delayed;
 
-               if (!session || session != cap->session) {
+               if (session != cap->session) {
                        spin_unlock(&ci->i_ceph_lock);
                        if (session)
                                mutex_unlock(&session->s_mutex);
@@ -2161,8 +2122,9 @@ retry_locked:
                        goto retry_locked;
                }
 
-               flushing = __mark_caps_flushing(inode, session, true,
-                                               &flush_tid, &oldest_flush_tid);
+               flushing = ci->i_dirty_caps;
+               flush_tid = __mark_caps_flushing(inode, session, true,
+                                                &oldest_flush_tid);
 
                /* __send_cap drops i_ceph_lock */
                delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
@@ -2261,35 +2223,45 @@ static int unsafe_request_wait(struct inode *inode)
 
 int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
+       struct ceph_file_info *fi = file->private_data;
        struct inode *inode = file->f_mapping->host;
        struct ceph_inode_info *ci = ceph_inode(inode);
        u64 flush_tid;
-       int ret;
+       int ret, err;
        int dirty;
 
        dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
 
        ret = file_write_and_wait_range(file, start, end);
-       if (ret < 0)
-               goto out;
-
        if (datasync)
                goto out;
 
        dirty = try_flush_caps(inode, &flush_tid);
        dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
 
-       ret = unsafe_request_wait(inode);
+       err = unsafe_request_wait(inode);
 
        /*
         * only wait on non-file metadata writeback (the mds
         * can recover size and mtime, so we don't need to
         * wait for that)
         */
-       if (!ret && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
-               ret = wait_event_interruptible(ci->i_cap_wq,
+       if (!err && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
+               err = wait_event_interruptible(ci->i_cap_wq,
                                        caps_are_flushed(inode, flush_tid));
        }
+
+       if (err < 0)
+               ret = err;
+
+       if (errseq_check(&ci->i_meta_err, READ_ONCE(fi->meta_err))) {
+               spin_lock(&file->f_lock);
+               err = errseq_check_and_advance(&ci->i_meta_err,
+                                              &fi->meta_err);
+               spin_unlock(&file->f_lock);
+               if (err < 0)
+                       ret = err;
+       }
 out:
        dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
        return ret;
@@ -2560,10 +2532,15 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got,
  *
  * FIXME: how does a 0 return differ from -EAGAIN?
  */
-static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
-                           loff_t endoff, bool nonblock, int *got)
+enum {
+       NON_BLOCKING    = 1,
+       CHECK_FILELOCK  = 2,
+};
+
+static int try_get_cap_refs(struct inode *inode, int need, int want,
+                           loff_t endoff, int flags, int *got)
 {
-       struct inode *inode = &ci->vfs_inode;
+       struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
        int ret = 0;
        int have, implemented;
@@ -2576,6 +2553,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 again:
        spin_lock(&ci->i_ceph_lock);
 
+       if ((flags & CHECK_FILELOCK) &&
+           (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK)) {
+               dout("try_get_cap_refs %p error filelock\n", inode);
+               ret = -EIO;
+               goto out_unlock;
+       }
+
        /* make sure file is actually open */
        file_wanted = __ceph_caps_file_wanted(ci);
        if ((file_wanted & need) != need) {
@@ -2637,7 +2621,7 @@ again:
                                         * we can not call down_read() when
                                         * task isn't in TASK_RUNNING state
                                         */
-                                       if (nonblock) {
+                                       if (flags & NON_BLOCKING) {
                                                ret = -EAGAIN;
                                                goto out_unlock;
                                        }
@@ -2731,18 +2715,19 @@ static void check_max_size(struct inode *inode, loff_t endoff)
                ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
 }
 
-int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
+int ceph_try_get_caps(struct inode *inode, int need, int want,
                      bool nonblock, int *got)
 {
        int ret;
 
        BUG_ON(need & ~CEPH_CAP_FILE_RD);
        BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED));
-       ret = ceph_pool_perm_check(ci, need);
+       ret = ceph_pool_perm_check(inode, need);
        if (ret < 0)
                return ret;
 
-       ret = try_get_cap_refs(ci, need, want, 0, nonblock, got);
+       ret = try_get_cap_refs(inode, need, want, 0,
+                              (nonblock ? NON_BLOCKING : 0), got);
        return ret == -EAGAIN ? 0 : ret;
 }
 
@@ -2751,30 +2736,40 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
  * due to a small max_size, make sure we check_max_size (and possibly
  * ask the mds) so we don't get hung up indefinitely.
  */
-int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
+int ceph_get_caps(struct file *filp, int need, int want,
                  loff_t endoff, int *got, struct page **pinned_page)
 {
-       int _got, ret;
+       struct ceph_file_info *fi = filp->private_data;
+       struct inode *inode = file_inode(filp);
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+       int ret, _got, flags;
 
-       ret = ceph_pool_perm_check(ci, need);
+       ret = ceph_pool_perm_check(inode, need);
        if (ret < 0)
                return ret;
 
+       if ((fi->fmode & CEPH_FILE_MODE_WR) &&
+           fi->filp_gen != READ_ONCE(fsc->filp_gen))
+               return -EBADF;
+
        while (true) {
                if (endoff > 0)
-                       check_max_size(&ci->vfs_inode, endoff);
+                       check_max_size(inode, endoff);
 
+               flags = atomic_read(&fi->num_locks) ? CHECK_FILELOCK : 0;
                _got = 0;
-               ret = try_get_cap_refs(ci, need, want, endoff,
-                                      false, &_got);
+               ret = try_get_cap_refs(inode, need, want, endoff,
+                                      flags, &_got);
                if (ret == -EAGAIN)
                        continue;
                if (!ret) {
                        DEFINE_WAIT_FUNC(wait, woken_wake_function);
                        add_wait_queue(&ci->i_cap_wq, &wait);
 
-                       while (!(ret = try_get_cap_refs(ci, need, want, endoff,
-                                                       true, &_got))) {
+                       flags |= NON_BLOCKING;
+                       while (!(ret = try_get_cap_refs(inode, need, want,
+                                                       endoff, flags, &_got))) {
                                if (signal_pending(current)) {
                                        ret = -ERESTARTSYS;
                                        break;
@@ -2786,10 +2781,18 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
                        if (ret == -EAGAIN)
                                continue;
                }
+
+               if ((fi->fmode & CEPH_FILE_MODE_WR) &&
+                   fi->filp_gen != READ_ONCE(fsc->filp_gen)) {
+                       if (ret >= 0 && _got)
+                               ceph_put_cap_refs(ci, _got);
+                       return -EBADF;
+               }
+
                if (ret < 0) {
                        if (ret == -ESTALE) {
                                /* session was killed, try renew caps */
-                               ret = ceph_renew_caps(&ci->vfs_inode);
+                               ret = ceph_renew_caps(inode);
                                if (ret == 0)
                                        continue;
                        }
@@ -2798,9 +2801,9 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
 
                if (ci->i_inline_version != CEPH_INLINE_NONE &&
                    (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
-                   i_size_read(&ci->vfs_inode) > 0) {
+                   i_size_read(inode) > 0) {
                        struct page *page =
-                               find_get_page(ci->vfs_inode.i_mapping, 0);
+                               find_get_page(inode->i_mapping, 0);
                        if (page) {
                                if (PageUptodate(page)) {
                                        *pinned_page = page;
@@ -2819,7 +2822,7 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
                         * getattr request will bring inline data into
                         * page cache
                         */
-                       ret = __ceph_do_getattr(&ci->vfs_inode, NULL,
+                       ret = __ceph_do_getattr(inode, NULL,
                                                CEPH_STAT_CAP_INLINE_DATA,
                                                true);
                        if (ret < 0)
index 2eb88ed..facb387 100644 (file)
@@ -294,7 +294,6 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 
 void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 {
-       return 0;
 }
 
 void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
index 15ff1b0..b6bfa94 100644 (file)
@@ -35,7 +35,7 @@ struct ceph_nfs_snapfh {
 static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
                              struct inode *parent_inode)
 {
-       const static int snap_handle_length =
+       static const int snap_handle_length =
                sizeof(struct ceph_nfs_snapfh) >> 2;
        struct ceph_nfs_snapfh *sfh = (void *)rawfh;
        u64 snapid = ceph_snap(inode);
@@ -85,9 +85,9 @@ out:
 static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
                          struct inode *parent_inode)
 {
-       const static int handle_length =
+       static const int handle_length =
                sizeof(struct ceph_nfs_fh) >> 2;
-       const static int connected_handle_length =
+       static const int connected_handle_length =
                sizeof(struct ceph_nfs_confh) >> 2;
        int type;
 
@@ -458,33 +458,33 @@ static int __get_snap_name(struct dentry *parent, char *name,
                if (err < 0)
                        goto out;
 
-                rinfo = &req->r_reply_info;
-                for (i = 0; i < rinfo->dir_nr; i++) {
-                        rde = rinfo->dir_entries + i;
-                        BUG_ON(!rde->inode.in);
-                        if (ceph_snap(inode) ==
-                            le64_to_cpu(rde->inode.in->snapid)) {
-                                memcpy(name, rde->name, rde->name_len);
-                                name[rde->name_len] = '\0';
-                                err = 0;
-                                goto out;
-                        }
-                }
-
-                if (rinfo->dir_end)
-                        break;
-
-                BUG_ON(rinfo->dir_nr <= 0);
-                rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
-                next_offset += rinfo->dir_nr;
-                last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
-                if (!last_name) {
-                        err = -ENOMEM;
-                        goto out;
-                }
-
-                ceph_mdsc_put_request(req);
-                req = NULL;
+               rinfo = &req->r_reply_info;
+               for (i = 0; i < rinfo->dir_nr; i++) {
+                       rde = rinfo->dir_entries + i;
+                       BUG_ON(!rde->inode.in);
+                       if (ceph_snap(inode) ==
+                           le64_to_cpu(rde->inode.in->snapid)) {
+                               memcpy(name, rde->name, rde->name_len);
+                               name[rde->name_len] = '\0';
+                               err = 0;
+                               goto out;
+                       }
+               }
+
+               if (rinfo->dir_end)
+                       break;
+
+               BUG_ON(rinfo->dir_nr <= 0);
+               rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
+               next_offset += rinfo->dir_nr;
+               last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
+               if (!last_name) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+
+               ceph_mdsc_put_request(req);
+               req = NULL;
        }
        err = -ENOENT;
 out:
index 685a03c..d277f71 100644 (file)
@@ -15,6 +15,7 @@
 #include "super.h"
 #include "mds_client.h"
 #include "cache.h"
+#include "io.h"
 
 static __le32 ceph_flags_sys2wire(u32 flags)
 {
@@ -201,6 +202,7 @@ out:
 static int ceph_init_file_info(struct inode *inode, struct file *file,
                                        int fmode, bool isdir)
 {
+       struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_file_info *fi;
 
        dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
@@ -211,7 +213,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
                struct ceph_dir_file_info *dfi =
                        kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
                if (!dfi) {
-                       ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+                       ceph_put_fmode(ci, fmode); /* clean up */
                        return -ENOMEM;
                }
 
@@ -222,7 +224,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
        } else {
                fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
                if (!fi) {
-                       ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+                       ceph_put_fmode(ci, fmode); /* clean up */
                        return -ENOMEM;
                }
 
@@ -232,6 +234,8 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
        fi->fmode = fmode;
        spin_lock_init(&fi->rw_contexts_lock);
        INIT_LIST_HEAD(&fi->rw_contexts);
+       fi->meta_err = errseq_sample(&ci->i_meta_err);
+       fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen);
 
        return 0;
 }
@@ -695,7 +699,13 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                        ceph_release_page_vector(pages, num_pages);
                }
 
-               if (ret <= 0 || off >= i_size || !more)
+               if (ret < 0) {
+                       if (ret == -EBLACKLISTED)
+                               fsc->blacklisted = true;
+                       break;
+               }
+
+               if (off >= i_size || !more)
                        break;
        }
 
@@ -921,7 +931,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
        struct ceph_aio_request *aio_req = NULL;
        int num_pages = 0;
        int flags;
-       int ret;
+       int ret = 0;
        struct timespec64 mtime = current_time(inode);
        size_t count = iov_iter_count(iter);
        loff_t pos = iocb->ki_pos;
@@ -935,11 +945,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
             (write ? "write" : "read"), file, pos, (unsigned)count,
             snapc, snapc ? snapc->seq : 0);
 
-       ret = filemap_write_and_wait_range(inode->i_mapping,
-                                          pos, pos + count - 1);
-       if (ret < 0)
-               return ret;
-
        if (write) {
                int ret2 = invalidate_inode_pages2_range(inode->i_mapping,
                                        pos >> PAGE_SHIFT,
@@ -1260,7 +1265,8 @@ again:
                want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
        else
                want = CEPH_CAP_FILE_CACHE;
-       ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
+       ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1,
+                           &got, &pinned_page);
        if (ret < 0)
                return ret;
 
@@ -1274,12 +1280,16 @@ again:
 
                if (ci->i_inline_version == CEPH_INLINE_NONE) {
                        if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
+                               ceph_start_io_direct(inode);
                                ret = ceph_direct_read_write(iocb, to,
                                                             NULL, NULL);
+                               ceph_end_io_direct(inode);
                                if (ret >= 0 && ret < len)
                                        retry_op = CHECK_EOF;
                        } else {
+                               ceph_start_io_read(inode);
                                ret = ceph_sync_read(iocb, to, &retry_op);
+                               ceph_end_io_read(inode);
                        }
                } else {
                        retry_op = READ_INLINE;
@@ -1290,7 +1300,9 @@ again:
                     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
                     ceph_cap_string(got));
                ceph_add_rw_context(fi, &rw_ctx);
+               ceph_start_io_read(inode);
                ret = generic_file_read_iter(iocb, to);
+               ceph_end_io_read(inode);
                ceph_del_rw_context(fi, &rw_ctx);
        }
        dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
@@ -1399,7 +1411,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
                return -ENOMEM;
 
 retry_snap:
-       inode_lock(inode);
+       if (iocb->ki_flags & IOCB_DIRECT)
+               ceph_start_io_direct(inode);
+       else
+               ceph_start_io_write(inode);
 
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = inode_to_bdi(inode);
@@ -1457,7 +1472,7 @@ retry_snap:
        else
                want = CEPH_CAP_FILE_BUFFER;
        got = 0;
-       err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, pos + count,
+       err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count,
                            &got, NULL);
        if (err < 0)
                goto out;
@@ -1470,7 +1485,6 @@ retry_snap:
            (ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) {
                struct ceph_snap_context *snapc;
                struct iov_iter data;
-               inode_unlock(inode);
 
                spin_lock(&ci->i_ceph_lock);
                if (__ceph_have_pending_cap_snap(ci)) {
@@ -1487,11 +1501,14 @@ retry_snap:
 
                /* we might need to revert back to that point */
                data = *from;
-               if (iocb->ki_flags & IOCB_DIRECT)
+               if (iocb->ki_flags & IOCB_DIRECT) {
                        written = ceph_direct_read_write(iocb, &data, snapc,
                                                         &prealloc_cf);
-               else
+                       ceph_end_io_direct(inode);
+               } else {
                        written = ceph_sync_write(iocb, &data, pos, snapc);
+                       ceph_end_io_write(inode);
+               }
                if (written > 0)
                        iov_iter_advance(from, written);
                ceph_put_snap_context(snapc);
@@ -1506,7 +1523,7 @@ retry_snap:
                written = generic_perform_write(file, from, pos);
                if (likely(written >= 0))
                        iocb->ki_pos = pos + written;
-               inode_unlock(inode);
+               ceph_end_io_write(inode);
        }
 
        if (written >= 0) {
@@ -1541,9 +1558,11 @@ retry_snap:
        }
 
        goto out_unlocked;
-
 out:
-       inode_unlock(inode);
+       if (iocb->ki_flags & IOCB_DIRECT)
+               ceph_end_io_direct(inode);
+       else
+               ceph_end_io_write(inode);
 out_unlocked:
        ceph_free_cap_flush(prealloc_cf);
        current->backing_dev_info = NULL;
@@ -1781,7 +1800,7 @@ static long ceph_fallocate(struct file *file, int mode,
        else
                want = CEPH_CAP_FILE_BUFFER;
 
-       ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
+       ret = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
        if (ret < 0)
                goto unlock;
 
@@ -1810,16 +1829,15 @@ unlock:
  * src_ci.  Two attempts are made to obtain both caps, and an error is return if
  * this fails; zero is returned on success.
  */
-static int get_rd_wr_caps(struct ceph_inode_info *src_ci,
-                         loff_t src_endoff, int *src_got,
-                         struct ceph_inode_info *dst_ci,
+static int get_rd_wr_caps(struct file *src_filp, int *src_got,
+                         struct file *dst_filp,
                          loff_t dst_endoff, int *dst_got)
 {
        int ret = 0;
        bool retrying = false;
 
 retry_caps:
-       ret = ceph_get_caps(dst_ci, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER,
+       ret = ceph_get_caps(dst_filp, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER,
                            dst_endoff, dst_got, NULL);
        if (ret < 0)
                return ret;
@@ -1829,24 +1847,24 @@ retry_caps:
         * we would risk a deadlock by using ceph_get_caps.  Thus, we'll do some
         * retry dance instead to try to get both capabilities.
         */
-       ret = ceph_try_get_caps(src_ci, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_SHARED,
+       ret = ceph_try_get_caps(file_inode(src_filp),
+                               CEPH_CAP_FILE_RD, CEPH_CAP_FILE_SHARED,
                                false, src_got);
        if (ret <= 0) {
                /* Start by dropping dst_ci caps and getting src_ci caps */
-               ceph_put_cap_refs(dst_ci, *dst_got);
+               ceph_put_cap_refs(ceph_inode(file_inode(dst_filp)), *dst_got);
                if (retrying) {
                        if (!ret)
                                /* ceph_try_get_caps masks EAGAIN */
                                ret = -EAGAIN;
                        return ret;
                }
-               ret = ceph_get_caps(src_ci, CEPH_CAP_FILE_RD,
-                                   CEPH_CAP_FILE_SHARED, src_endoff,
-                                   src_got, NULL);
+               ret = ceph_get_caps(src_filp, CEPH_CAP_FILE_RD,
+                                   CEPH_CAP_FILE_SHARED, -1, src_got, NULL);
                if (ret < 0)
                        return ret;
                /*... drop src_ci caps too, and retry */
-               ceph_put_cap_refs(src_ci, *src_got);
+               ceph_put_cap_refs(ceph_inode(file_inode(src_filp)), *src_got);
                retrying = true;
                goto retry_caps;
        }
@@ -1904,6 +1922,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
        struct ceph_inode_info *src_ci = ceph_inode(src_inode);
        struct ceph_inode_info *dst_ci = ceph_inode(dst_inode);
        struct ceph_cap_flush *prealloc_cf;
+       struct ceph_fs_client *src_fsc = ceph_inode_to_client(src_inode);
        struct ceph_object_locator src_oloc, dst_oloc;
        struct ceph_object_id src_oid, dst_oid;
        loff_t endoff = 0, size;
@@ -1913,10 +1932,16 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
        int src_got = 0, dst_got = 0, err, dirty;
        bool do_final_copy = false;
 
-       if (src_inode == dst_inode)
-               return -EINVAL;
-       if (src_inode->i_sb != dst_inode->i_sb)
-               return -EXDEV;
+       if (src_inode->i_sb != dst_inode->i_sb) {
+               struct ceph_fs_client *dst_fsc = ceph_inode_to_client(dst_inode);
+
+               if (ceph_fsid_compare(&src_fsc->client->fsid,
+                                     &dst_fsc->client->fsid)) {
+                       dout("Copying files across clusters: src: %pU dst: %pU\n",
+                            &src_fsc->client->fsid, &dst_fsc->client->fsid);
+                       return -EXDEV;
+               }
+       }
        if (ceph_snap(dst_inode) != CEPH_NOSNAP)
                return -EROFS;
 
@@ -1928,7 +1953,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
         * efficient).
         */
 
-       if (ceph_test_mount_opt(ceph_inode_to_client(src_inode), NOCOPYFROM))
+       if (ceph_test_mount_opt(src_fsc, NOCOPYFROM))
                return -EOPNOTSUPP;
 
        if ((src_ci->i_layout.stripe_unit != dst_ci->i_layout.stripe_unit) ||
@@ -1960,8 +1985,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
         * clients may have dirty data in their caches.  And OSDs know nothing
         * about caps, so they can't safely do the remote object copies.
         */
-       err = get_rd_wr_caps(src_ci, (src_off + len), &src_got,
-                            dst_ci, (dst_off + len), &dst_got);
+       err = get_rd_wr_caps(src_file, &src_got,
+                            dst_file, (dst_off + len), &dst_got);
        if (err < 0) {
                dout("get_rd_wr_caps returned %d\n", err);
                ret = -EOPNOTSUPP;
@@ -2018,9 +2043,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
                        goto out;
                }
                len -= ret;
-               err = get_rd_wr_caps(src_ci, (src_off + len),
-                                    &src_got, dst_ci,
-                                    (dst_off + len), &dst_got);
+               err = get_rd_wr_caps(src_file, &src_got,
+                                    dst_file, (dst_off + len), &dst_got);
                if (err < 0)
                        goto out;
                err = is_file_size_ok(src_inode, dst_inode,
@@ -2044,7 +2068,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
                                dst_ci->i_vino.ino, dst_objnum);
                /* Do an object remote copy */
                err = ceph_osdc_copy_from(
-                       &ceph_inode_to_client(src_inode)->client->osdc,
+                       &src_fsc->client->osdc,
                        src_ci->i_vino.snap, 0,
                        &src_oid, &src_oloc,
                        CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
index 18500ed..9f13562 100644 (file)
@@ -515,6 +515,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 
        ceph_fscache_inode_init(ci);
 
+       ci->i_meta_err = 0;
+
        return &ci->vfs_inode;
 }
 
@@ -801,7 +803,12 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
 
        /* update inode */
        inode->i_rdev = le32_to_cpu(info->rdev);
-       inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+       /* directories have fl_stripe_unit set to zero */
+       if (le32_to_cpu(info->layout.fl_stripe_unit))
+               inode->i_blkbits =
+                       fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+       else
+               inode->i_blkbits = CEPH_BLOCK_SHIFT;
 
        __ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files);
 
@@ -1982,7 +1989,7 @@ static const struct inode_operations ceph_symlink_iops = {
 int __ceph_setattr(struct inode *inode, struct iattr *attr)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
-       const unsigned int ia_valid = attr->ia_valid;
+       unsigned int ia_valid = attr->ia_valid;
        struct ceph_mds_request *req;
        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_cap_flush *prealloc_cf;
@@ -2087,6 +2094,26 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                                   CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
                }
        }
+       if (ia_valid & ATTR_SIZE) {
+               dout("setattr %p size %lld -> %lld\n", inode,
+                    inode->i_size, attr->ia_size);
+               if ((issued & CEPH_CAP_FILE_EXCL) &&
+                   attr->ia_size > inode->i_size) {
+                       i_size_write(inode, attr->ia_size);
+                       inode->i_blocks = calc_inode_blocks(attr->ia_size);
+                       ci->i_reported_size = attr->ia_size;
+                       dirtied |= CEPH_CAP_FILE_EXCL;
+                       ia_valid |= ATTR_MTIME;
+               } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
+                          attr->ia_size != inode->i_size) {
+                       req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
+                       req->r_args.setattr.old_size =
+                               cpu_to_le64(inode->i_size);
+                       mask |= CEPH_SETATTR_SIZE;
+                       release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
+                                  CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
+               }
+       }
        if (ia_valid & ATTR_MTIME) {
                dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode,
                     inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
@@ -2109,25 +2136,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                                   CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
                }
        }
-       if (ia_valid & ATTR_SIZE) {
-               dout("setattr %p size %lld -> %lld\n", inode,
-                    inode->i_size, attr->ia_size);
-               if ((issued & CEPH_CAP_FILE_EXCL) &&
-                   attr->ia_size > inode->i_size) {
-                       i_size_write(inode, attr->ia_size);
-                       inode->i_blocks = calc_inode_blocks(attr->ia_size);
-                       ci->i_reported_size = attr->ia_size;
-                       dirtied |= CEPH_CAP_FILE_EXCL;
-               } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
-                          attr->ia_size != inode->i_size) {
-                       req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
-                       req->r_args.setattr.old_size =
-                               cpu_to_le64(inode->i_size);
-                       mask |= CEPH_SETATTR_SIZE;
-                       release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
-                                  CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
-               }
-       }
 
        /* these do nothing */
        if (ia_valid & ATTR_CTIME) {
diff --git a/fs/ceph/io.c b/fs/ceph/io.c
new file mode 100644 (file)
index 0000000..97602ea
--- /dev/null
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2016 Trond Myklebust
+ * Copyright (c) 2019 Jeff Layton
+ *
+ * I/O and data path helper functionality.
+ *
+ * Heavily borrowed from equivalent code in fs/nfs/io.c
+ */
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/rwsem.h>
+#include <linux/fs.h>
+
+#include "super.h"
+#include "io.h"
+
+/* Call with exclusively locked inode->i_rwsem */
+static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
+{
+       lockdep_assert_held_write(&inode->i_rwsem);
+
+       if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) {
+               spin_lock(&ci->i_ceph_lock);
+               ci->i_ceph_flags &= ~CEPH_I_ODIRECT;
+               spin_unlock(&ci->i_ceph_lock);
+               inode_dio_wait(inode);
+       }
+}
+
+/**
+ * ceph_start_io_read - declare the file is being used for buffered reads
+ * @inode: file inode
+ *
+ * Declare that a buffered read operation is about to start, and ensure
+ * that we block all direct I/O.
+ * On exit, the function ensures that the CEPH_I_ODIRECT flag is unset,
+ * and holds a shared lock on inode->i_rwsem to ensure that the flag
+ * cannot be changed.
+ * In practice, this means that buffered read operations are allowed to
+ * execute in parallel, thanks to the shared lock, whereas direct I/O
+ * operations need to wait to grab an exclusive lock in order to set
+ * CEPH_I_ODIRECT.
+ * Note that buffered writes and truncates both take a write lock on
+ * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
+ */
+void
+ceph_start_io_read(struct inode *inode)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+
+       /* Be an optimist! */
+       down_read(&inode->i_rwsem);
+       if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
+               return;
+       up_read(&inode->i_rwsem);
+       /* Slow path.... */
+       down_write(&inode->i_rwsem);
+       ceph_block_o_direct(ci, inode);
+       downgrade_write(&inode->i_rwsem);
+}
+
+/**
+ * ceph_end_io_read - declare that the buffered read operation is done
+ * @inode: file inode
+ *
+ * Declare that a buffered read operation is done, and release the shared
+ * lock on inode->i_rwsem.
+ */
+void
+ceph_end_io_read(struct inode *inode)
+{
+       up_read(&inode->i_rwsem);
+}
+
+/**
+ * ceph_start_io_write - declare the file is being used for buffered writes
+ * @inode: file inode
+ *
+ * Declare that a buffered write operation is about to start, and ensure
+ * that we block all direct I/O.
+ */
+void
+ceph_start_io_write(struct inode *inode)
+{
+       down_write(&inode->i_rwsem);
+       ceph_block_o_direct(ceph_inode(inode), inode);
+}
+
+/**
+ * ceph_end_io_write - declare that the buffered write operation is done
+ * @inode: file inode
+ *
+ * Declare that a buffered write operation is done, and release the
+ * lock on inode->i_rwsem.
+ */
+void
+ceph_end_io_write(struct inode *inode)
+{
+       up_write(&inode->i_rwsem);
+}
+
+/* Call with exclusively locked inode->i_rwsem */
+static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
+{
+       lockdep_assert_held_write(&inode->i_rwsem);
+
+       if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) {
+               spin_lock(&ci->i_ceph_lock);
+               ci->i_ceph_flags |= CEPH_I_ODIRECT;
+               spin_unlock(&ci->i_ceph_lock);
+               /* FIXME: unmap_mapping_range? */
+               filemap_write_and_wait(inode->i_mapping);
+       }
+}
+
+/**
+ * ceph_end_io_direct - declare the file is being used for direct i/o
+ * @inode: file inode
+ *
+ * Declare that a direct I/O operation is about to start, and ensure
+ * that we block all buffered I/O.
+ * On exit, the function ensures that the CEPH_I_ODIRECT flag is set,
+ * and holds a shared lock on inode->i_rwsem to ensure that the flag
+ * cannot be changed.
+ * In practice, this means that direct I/O operations are allowed to
+ * execute in parallel, thanks to the shared lock, whereas buffered I/O
+ * operations need to wait to grab an exclusive lock in order to clear
+ * CEPH_I_ODIRECT.
+ * Note that buffered writes and truncates both take a write lock on
+ * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
+ */
+void
+ceph_start_io_direct(struct inode *inode)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+
+       /* Be an optimist! */
+       down_read(&inode->i_rwsem);
+       if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
+               return;
+       up_read(&inode->i_rwsem);
+       /* Slow path.... */
+       down_write(&inode->i_rwsem);
+       ceph_block_buffered(ci, inode);
+       downgrade_write(&inode->i_rwsem);
+}
+
+/**
+ * ceph_end_io_direct - declare that the direct i/o operation is done
+ * @inode: file inode
+ *
+ * Declare that a direct I/O operation is done, and release the shared
+ * lock on inode->i_rwsem.
+ */
+void
+ceph_end_io_direct(struct inode *inode)
+{
+       up_read(&inode->i_rwsem);
+}
diff --git a/fs/ceph/io.h b/fs/ceph/io.h
new file mode 100644 (file)
index 0000000..fa594cd
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _FS_CEPH_IO_H
+#define _FS_CEPH_IO_H
+
+void ceph_start_io_read(struct inode *inode);
+void ceph_end_io_read(struct inode *inode);
+void ceph_start_io_write(struct inode *inode);
+void ceph_end_io_write(struct inode *inode);
+void ceph_start_io_direct(struct inode *inode);
+void ceph_end_io_direct(struct inode *inode);
+
+#endif /* FS_CEPH_IO_H */
index 5083e23..544e9e8 100644 (file)
@@ -32,14 +32,18 @@ void __init ceph_flock_init(void)
 
 static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
 {
-       struct inode *inode = file_inode(src->fl_file);
+       struct ceph_file_info *fi = dst->fl_file->private_data;
+       struct inode *inode = file_inode(dst->fl_file);
        atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+       atomic_inc(&fi->num_locks);
 }
 
 static void ceph_fl_release_lock(struct file_lock *fl)
 {
+       struct ceph_file_info *fi = fl->fl_file->private_data;
        struct inode *inode = file_inode(fl->fl_file);
        struct ceph_inode_info *ci = ceph_inode(inode);
+       atomic_dec(&fi->num_locks);
        if (atomic_dec_and_test(&ci->i_filelock_ref)) {
                /* clear error when all locks are released */
                spin_lock(&ci->i_ceph_lock);
@@ -73,7 +77,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
                 * window. Caller function will decrease the counter.
                 */
                fl->fl_ops = &ceph_fl_lock_ops;
-               atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+               fl->fl_ops->fl_copy_lock(fl, NULL);
        }
 
        if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
index 920e9f0..a516329 100644 (file)
@@ -384,8 +384,8 @@ static int parse_reply_info_readdir(void **p, void *end,
        }
 
 done:
-       if (*p != end)
-               goto bad;
+       /* Skip over any unrecognized fields */
+       *p = end;
        return 0;
 
 bad:
@@ -406,12 +406,10 @@ static int parse_reply_info_filelock(void **p, void *end,
                goto bad;
 
        info->filelock_reply = *p;
-       *p += sizeof(*info->filelock_reply);
 
-       if (unlikely(*p != end))
-               goto bad;
+       /* Skip over any unrecognized fields */
+       *p = end;
        return 0;
-
 bad:
        return -EIO;
 }
@@ -425,18 +423,21 @@ static int parse_reply_info_create(void **p, void *end,
 {
        if (features == (u64)-1 ||
            (features & CEPH_FEATURE_REPLY_CREATE_INODE)) {
+               /* Malformed reply? */
                if (*p == end) {
                        info->has_create_ino = false;
                } else {
                        info->has_create_ino = true;
-                       info->ino = ceph_decode_64(p);
+                       ceph_decode_64_safe(p, end, info->ino, bad);
                }
+       } else {
+               if (*p != end)
+                       goto bad;
        }
 
-       if (unlikely(*p != end))
-               goto bad;
+       /* Skip over any unrecognized fields */
+       *p = end;
        return 0;
-
 bad:
        return -EIO;
 }
@@ -639,7 +640,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
        s->s_renew_seq = 0;
        INIT_LIST_HEAD(&s->s_caps);
        s->s_nr_caps = 0;
-       s->s_trim_caps = 0;
        refcount_set(&s->s_ref, 1);
        INIT_LIST_HEAD(&s->s_waiting);
        INIT_LIST_HEAD(&s->s_unsafe);
@@ -1270,6 +1270,7 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
 {
        struct ceph_mds_request *req;
        struct rb_node *p;
+       struct ceph_inode_info *ci;
 
        dout("cleanup_session_requests mds%d\n", session->s_mds);
        mutex_lock(&mdsc->mutex);
@@ -1278,6 +1279,16 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
                                       struct ceph_mds_request, r_unsafe_item);
                pr_warn_ratelimited(" dropping unsafe request %llu\n",
                                    req->r_tid);
+               if (req->r_target_inode) {
+                       /* dropping unsafe change of inode's attributes */
+                       ci = ceph_inode(req->r_target_inode);
+                       errseq_set(&ci->i_meta_err, -EIO);
+               }
+               if (req->r_unsafe_dir) {
+                       /* dropping unsafe directory operation */
+                       ci = ceph_inode(req->r_unsafe_dir);
+                       errseq_set(&ci->i_meta_err, -EIO);
+               }
                __unregister_request(mdsc, req);
        }
        /* zero r_attempts, so kick_requests() will re-send requests */
@@ -1370,7 +1381,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
        struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
        struct ceph_inode_info *ci = ceph_inode(inode);
        LIST_HEAD(to_remove);
-       bool drop = false;
+       bool dirty_dropped = false;
        bool invalidate = false;
 
        dout("removing cap %p, ci is %p, inode is %p\n",
@@ -1383,9 +1394,12 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                struct ceph_cap_flush *cf;
                struct ceph_mds_client *mdsc = fsc->mdsc;
 
-               if (ci->i_wrbuffer_ref > 0 &&
-                   READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
-                       invalidate = true;
+               if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+                       if (inode->i_data.nrpages > 0)
+                               invalidate = true;
+                       if (ci->i_wrbuffer_ref > 0)
+                               mapping_set_error(&inode->i_data, -EIO);
+               }
 
                while (!list_empty(&ci->i_cap_flush_list)) {
                        cf = list_first_entry(&ci->i_cap_flush_list,
@@ -1405,7 +1419,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                                inode, ceph_ino(inode));
                        ci->i_dirty_caps = 0;
                        list_del_init(&ci->i_dirty_item);
-                       drop = true;
+                       dirty_dropped = true;
                }
                if (!list_empty(&ci->i_flushing_item)) {
                        pr_warn_ratelimited(
@@ -1415,10 +1429,22 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                        ci->i_flushing_caps = 0;
                        list_del_init(&ci->i_flushing_item);
                        mdsc->num_cap_flushing--;
-                       drop = true;
+                       dirty_dropped = true;
                }
                spin_unlock(&mdsc->cap_dirty_lock);
 
+               if (dirty_dropped) {
+                       errseq_set(&ci->i_meta_err, -EIO);
+
+                       if (ci->i_wrbuffer_ref_head == 0 &&
+                           ci->i_wr_ref == 0 &&
+                           ci->i_dirty_caps == 0 &&
+                           ci->i_flushing_caps == 0) {
+                               ceph_put_snap_context(ci->i_head_snapc);
+                               ci->i_head_snapc = NULL;
+                       }
+               }
+
                if (atomic_read(&ci->i_filelock_ref) > 0) {
                        /* make further file lock syscall return -EIO */
                        ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
@@ -1430,15 +1456,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                        list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
                        ci->i_prealloc_cap_flush = NULL;
                }
-
-               if (drop &&
-                  ci->i_wrbuffer_ref_head == 0 &&
-                  ci->i_wr_ref == 0 &&
-                  ci->i_dirty_caps == 0 &&
-                  ci->i_flushing_caps == 0) {
-                      ceph_put_snap_context(ci->i_head_snapc);
-                      ci->i_head_snapc = NULL;
-               }
        }
        spin_unlock(&ci->i_ceph_lock);
        while (!list_empty(&to_remove)) {
@@ -1452,7 +1469,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
        wake_up_all(&ci->i_cap_wq);
        if (invalidate)
                ceph_queue_invalidate(inode);
-       if (drop)
+       if (dirty_dropped)
                iput(inode);
        return 0;
 }
@@ -1705,11 +1722,11 @@ out:
  */
 static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
 {
-       struct ceph_mds_session *session = arg;
+       int *remaining = arg;
        struct ceph_inode_info *ci = ceph_inode(inode);
        int used, wanted, oissued, mine;
 
-       if (session->s_trim_caps <= 0)
+       if (*remaining <= 0)
                return -1;
 
        spin_lock(&ci->i_ceph_lock);
@@ -1746,7 +1763,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
        if (oissued) {
                /* we aren't the only cap.. just remove us */
                __ceph_remove_cap(cap, true);
-               session->s_trim_caps--;
+               (*remaining)--;
        } else {
                struct dentry *dentry;
                /* try dropping referring dentries */
@@ -1758,7 +1775,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
                        d_prune_aliases(inode);
                        count = atomic_read(&inode->i_count);
                        if (count == 1)
-                               session->s_trim_caps--;
+                               (*remaining)--;
                        dout("trim_caps_cb %p cap %p pruned, count now %d\n",
                             inode, cap, count);
                } else {
@@ -1784,12 +1801,12 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc,
        dout("trim_caps mds%d start: %d / %d, trim %d\n",
             session->s_mds, session->s_nr_caps, max_caps, trim_caps);
        if (trim_caps > 0) {
-               session->s_trim_caps = trim_caps;
-               ceph_iterate_session_caps(session, trim_caps_cb, session);
+               int remaining = trim_caps;
+
+               ceph_iterate_session_caps(session, trim_caps_cb, &remaining);
                dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
                     session->s_mds, session->s_nr_caps, max_caps,
-                       trim_caps - session->s_trim_caps);
-               session->s_trim_caps = 0;
+                       trim_caps - remaining);
        }
 
        ceph_flush_cap_releases(mdsc, session);
@@ -3015,18 +3032,23 @@ bad:
        pr_err("mdsc_handle_forward decode error err=%d\n", err);
 }
 
-static int __decode_and_drop_session_metadata(void **p, void *end)
+static int __decode_session_metadata(void **p, void *end,
+                                    bool *blacklisted)
 {
        /* map<string,string> */
        u32 n;
+       bool err_str;
        ceph_decode_32_safe(p, end, n, bad);
        while (n-- > 0) {
                u32 len;
                ceph_decode_32_safe(p, end, len, bad);
                ceph_decode_need(p, end, len, bad);
+               err_str = !strncmp(*p, "error_string", len);
                *p += len;
                ceph_decode_32_safe(p, end, len, bad);
                ceph_decode_need(p, end, len, bad);
+               if (err_str && strnstr(*p, "blacklisted", len))
+                       *blacklisted = true;
                *p += len;
        }
        return 0;
@@ -3050,6 +3072,7 @@ static void handle_session(struct ceph_mds_session *session,
        u64 seq;
        unsigned long features = 0;
        int wake = 0;
+       bool blacklisted = false;
 
        /* decode */
        ceph_decode_need(&p, end, sizeof(*h), bad);
@@ -3062,7 +3085,7 @@ static void handle_session(struct ceph_mds_session *session,
        if (msg_version >= 3) {
                u32 len;
                /* version >= 2, metadata */
-               if (__decode_and_drop_session_metadata(&p, end) < 0)
+               if (__decode_session_metadata(&p, end, &blacklisted) < 0)
                        goto bad;
                /* version >= 3, feature bits */
                ceph_decode_32_safe(&p, end, len, bad);
@@ -3149,6 +3172,8 @@ static void handle_session(struct ceph_mds_session *session,
                session->s_state = CEPH_MDS_SESSION_REJECTED;
                cleanup_session_requests(mdsc, session);
                remove_session_caps(session);
+               if (blacklisted)
+                       mdsc->fsc->blacklisted = true;
                wake = 2; /* for good measure */
                break;
 
@@ -3998,7 +4023,27 @@ static void lock_unlock_sessions(struct ceph_mds_client *mdsc)
        mutex_unlock(&mdsc->mutex);
 }
 
+static void maybe_recover_session(struct ceph_mds_client *mdsc)
+{
+       struct ceph_fs_client *fsc = mdsc->fsc;
+
+       if (!ceph_test_mount_opt(fsc, CLEANRECOVER))
+               return;
+
+       if (READ_ONCE(fsc->mount_state) != CEPH_MOUNT_MOUNTED)
+               return;
+
+       if (!READ_ONCE(fsc->blacklisted))
+               return;
 
+       if (fsc->last_auto_reconnect &&
+           time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30))
+               return;
+
+       pr_info("auto reconnect after blacklisted\n");
+       fsc->last_auto_reconnect = jiffies;
+       ceph_force_reconnect(fsc->sb);
+}
 
 /*
  * delayed work -- periodically trim expired leases, renew caps with mds
@@ -4044,7 +4089,9 @@ static void delayed_work(struct work_struct *work)
                                pr_info("mds%d hung\n", s->s_mds);
                        }
                }
-               if (s->s_state < CEPH_MDS_SESSION_OPEN) {
+               if (s->s_state == CEPH_MDS_SESSION_NEW ||
+                   s->s_state == CEPH_MDS_SESSION_RESTARTING ||
+                   s->s_state == CEPH_MDS_SESSION_REJECTED) {
                        /* this mds is failed or recovering, just wait */
                        ceph_put_mds_session(s);
                        continue;
@@ -4072,6 +4119,8 @@ static void delayed_work(struct work_struct *work)
 
        ceph_trim_snapid_map(mdsc);
 
+       maybe_recover_session(mdsc);
+
        schedule_delayed(mdsc);
 }
 
@@ -4355,7 +4404,12 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
                session = __ceph_lookup_mds_session(mdsc, mds);
                if (!session)
                        continue;
+
+               if (session->s_state == CEPH_MDS_SESSION_REJECTED)
+                       __unregister_session(mdsc, session);
+               __wake_requests(mdsc, &session->s_waiting);
                mutex_unlock(&mdsc->mutex);
+
                mutex_lock(&session->s_mutex);
                __close_session(mdsc, session);
                if (session->s_state == CEPH_MDS_SESSION_CLOSING) {
@@ -4364,6 +4418,7 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
                }
                mutex_unlock(&session->s_mutex);
                ceph_put_mds_session(session);
+
                mutex_lock(&mdsc->mutex);
                kick_requests(mdsc, mds);
        }
index f7c8603..5cd131b 100644 (file)
@@ -148,9 +148,9 @@ enum {
        CEPH_MDS_SESSION_OPENING = 2,
        CEPH_MDS_SESSION_OPEN = 3,
        CEPH_MDS_SESSION_HUNG = 4,
-       CEPH_MDS_SESSION_CLOSING = 5,
-       CEPH_MDS_SESSION_RESTARTING = 6,
-       CEPH_MDS_SESSION_RECONNECTING = 7,
+       CEPH_MDS_SESSION_RESTARTING = 5,
+       CEPH_MDS_SESSION_RECONNECTING = 6,
+       CEPH_MDS_SESSION_CLOSING = 7,
        CEPH_MDS_SESSION_REJECTED = 8,
 };
 
@@ -176,7 +176,7 @@ struct ceph_mds_session {
        spinlock_t        s_cap_lock;
        struct list_head  s_caps;     /* all caps issued by this session */
        struct ceph_cap  *s_cap_iterator;
-       int               s_nr_caps, s_trim_caps;
+       int               s_nr_caps;
        int               s_num_cap_releases;
        int               s_cap_reconnect;
        int               s_readonly;
index 377fafc..edfd643 100644 (file)
@@ -143,6 +143,7 @@ enum {
        Opt_snapdirname,
        Opt_mds_namespace,
        Opt_fscache_uniq,
+       Opt_recover_session,
        Opt_last_string,
        /* string args above */
        Opt_dirstat,
@@ -184,6 +185,7 @@ static match_table_t fsopt_tokens = {
        /* int args above */
        {Opt_snapdirname, "snapdirname=%s"},
        {Opt_mds_namespace, "mds_namespace=%s"},
+       {Opt_recover_session, "recover_session=%s"},
        {Opt_fscache_uniq, "fsc=%s"},
        /* string args above */
        {Opt_dirstat, "dirstat"},
@@ -254,6 +256,17 @@ static int parse_fsopt_token(char *c, void *private)
                if (!fsopt->mds_namespace)
                        return -ENOMEM;
                break;
+       case Opt_recover_session:
+               if (!strncmp(argstr[0].from, "no",
+                            argstr[0].to - argstr[0].from)) {
+                       fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER;
+               } else if (!strncmp(argstr[0].from, "clean",
+                                   argstr[0].to - argstr[0].from)) {
+                       fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER;
+               } else {
+                       return -EINVAL;
+               }
+               break;
        case Opt_fscache_uniq:
                kfree(fsopt->fscache_uniq);
                fsopt->fscache_uniq = kstrndup(argstr[0].from,
@@ -576,6 +589,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 
        if (fsopt->mds_namespace)
                seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
+
+       if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
+               seq_show_option(m, "recover_session", "clean");
+
        if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
                seq_printf(m, ",wsize=%d", fsopt->wsize);
        if (fsopt->rsize != CEPH_MAX_READ_SIZE)
@@ -664,6 +681,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 
        fsc->sb = NULL;
        fsc->mount_state = CEPH_MOUNT_MOUNTING;
+       fsc->filp_gen = 1;
 
        atomic_long_set(&fsc->writeback_count, 0);
 
@@ -713,6 +731,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
 {
        dout("destroy_fs_client %p\n", fsc);
 
+       ceph_mdsc_destroy(fsc);
        destroy_workqueue(fsc->inode_wq);
        destroy_workqueue(fsc->cap_wq);
 
@@ -829,7 +848,7 @@ static void ceph_umount_begin(struct super_block *sb)
        fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
        ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
        ceph_mdsc_force_umount(fsc->mdsc);
-       return;
+       fsc->filp_gen++; // invalidate open files
 }
 
 static int ceph_remount(struct super_block *sb, int *flags, char *data)
@@ -1089,7 +1108,6 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
        }
 
        if (ceph_sb_to_client(sb) != fsc) {
-               ceph_mdsc_destroy(fsc);
                destroy_fs_client(fsc);
                fsc = ceph_sb_to_client(sb);
                dout("get_sb got existing client %p\n", fsc);
@@ -1115,7 +1133,6 @@ out_splat:
        goto out_final;
 
 out:
-       ceph_mdsc_destroy(fsc);
        destroy_fs_client(fsc);
 out_final:
        dout("ceph_mount fail %ld\n", PTR_ERR(res));
@@ -1139,8 +1156,6 @@ static void ceph_kill_sb(struct super_block *s)
 
        ceph_fscache_unregister_fs(fsc);
 
-       ceph_mdsc_destroy(fsc);
-
        destroy_fs_client(fsc);
        free_anon_bdev(dev);
 }
@@ -1154,6 +1169,33 @@ static struct file_system_type ceph_fs_type = {
 };
 MODULE_ALIAS_FS("ceph");
 
+int ceph_force_reconnect(struct super_block *sb)
+{
+       struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+       int err = 0;
+
+       ceph_umount_begin(sb);
+
+       /* Make sure all page caches get invalidated.
+        * see remove_session_caps_cb() */
+       flush_workqueue(fsc->inode_wq);
+
+       /* In case that we were blacklisted. This also reset
+        * all mon/osd connections */
+       ceph_reset_client_addr(fsc->client);
+
+       ceph_osdc_clear_abort_err(&fsc->client->osdc);
+
+       fsc->blacklisted = false;
+       fsc->mount_state = CEPH_MOUNT_MOUNTED;
+
+       if (sb->s_root) {
+               err = __ceph_do_getattr(d_inode(sb->s_root), NULL,
+                                       CEPH_STAT_CAP_INODE, true);
+       }
+       return err;
+}
+
 static int __init init_ceph(void)
 {
        int ret = init_caches();
index 6b9f1ee..f98d924 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/posix_acl.h>
 #include <linux/refcount.h>
+#include <linux/security.h>
 
 #include <linux/ceph/libceph.h>
 
@@ -31,6 +32,7 @@
 #define CEPH_BLOCK_SHIFT   22  /* 4 MB */
 #define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT)
 
+#define CEPH_MOUNT_OPT_CLEANRECOVER    (1<<1) /* auto reonnect (clean mode) after blacklisted */
 #define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */
 #define CEPH_MOUNT_OPT_RBYTES          (1<<5) /* dir st_bytes = rbytes */
 #define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */
@@ -101,6 +103,11 @@ struct ceph_fs_client {
        struct ceph_client *client;
 
        unsigned long mount_state;
+
+       unsigned long last_auto_reconnect;
+       bool blacklisted;
+
+       u32 filp_gen;
        loff_t max_file_size;
 
        struct ceph_mds_client *mdsc;
@@ -395,6 +402,8 @@ struct ceph_inode_info {
        struct fscache_cookie *fscache;
        u32 i_fscache_gen;
 #endif
+       errseq_t i_meta_err;
+
        struct inode vfs_inode; /* at end */
 };
 
@@ -499,17 +508,16 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 #define CEPH_I_DIR_ORDERED     (1 << 0)  /* dentries in dir are ordered */
 #define CEPH_I_NODELAY         (1 << 1)  /* do not delay cap release */
 #define CEPH_I_FLUSH           (1 << 2)  /* do not delay flush of dirty metadata */
-#define CEPH_I_NOFLUSH         (1 << 3)  /* do not flush dirty caps */
-#define CEPH_I_POOL_PERM       (1 << 4)  /* pool rd/wr bits are valid */
-#define CEPH_I_POOL_RD         (1 << 5)  /* can read from pool */
-#define CEPH_I_POOL_WR         (1 << 6)  /* can write to pool */
-#define CEPH_I_SEC_INITED      (1 << 7)  /* security initialized */
-#define CEPH_I_CAP_DROPPED     (1 << 8)  /* caps were forcibly dropped */
-#define CEPH_I_KICK_FLUSH      (1 << 9)  /* kick flushing caps */
-#define CEPH_I_FLUSH_SNAPS     (1 << 10) /* need flush snapss */
-#define CEPH_I_ERROR_WRITE     (1 << 11) /* have seen write errors */
-#define CEPH_I_ERROR_FILELOCK  (1 << 12) /* have seen file lock errors */
-
+#define CEPH_I_POOL_PERM       (1 << 3)  /* pool rd/wr bits are valid */
+#define CEPH_I_POOL_RD         (1 << 4)  /* can read from pool */
+#define CEPH_I_POOL_WR         (1 << 5)  /* can write to pool */
+#define CEPH_I_SEC_INITED      (1 << 6)  /* security initialized */
+#define CEPH_I_CAP_DROPPED     (1 << 7)  /* caps were forcibly dropped */
+#define CEPH_I_KICK_FLUSH      (1 << 8)  /* kick flushing caps */
+#define CEPH_I_FLUSH_SNAPS     (1 << 9)  /* need flush snapss */
+#define CEPH_I_ERROR_WRITE     (1 << 10) /* have seen write errors */
+#define CEPH_I_ERROR_FILELOCK  (1 << 11) /* have seen file lock errors */
+#define CEPH_I_ODIRECT         (1 << 12) /* inode in direct I/O mode */
 
 /*
  * Masks of ceph inode work.
@@ -703,6 +711,10 @@ struct ceph_file_info {
 
        spinlock_t rw_contexts_lock;
        struct list_head rw_contexts;
+
+       errseq_t meta_err;
+       u32 filp_gen;
+       atomic_t num_locks;
 };
 
 struct ceph_dir_file_info {
@@ -842,7 +854,8 @@ static inline int default_congestion_kb(void)
 }
 
 
-
+/* super.c */
+extern int ceph_force_reconnect(struct super_block *sb);
 /* snap.c */
 struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
                                               u64 ino);
@@ -959,7 +972,10 @@ static inline bool ceph_security_xattr_wanted(struct inode *in)
 #ifdef CONFIG_CEPH_FS_SECURITY_LABEL
 extern int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
                                     struct ceph_acl_sec_ctx *ctx);
-extern void ceph_security_invalidate_secctx(struct inode *inode);
+static inline void ceph_security_invalidate_secctx(struct inode *inode)
+{
+       security_inode_invalidate_secctx(inode);
+}
 #else
 static inline int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
                                            struct ceph_acl_sec_ctx *ctx)
@@ -1039,7 +1055,6 @@ extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
                                    struct ceph_mds_session *session);
 extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
                                             int mds);
-extern int ceph_get_cap_mds(struct inode *inode);
 extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
 extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
 extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
@@ -1058,9 +1073,9 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
                                      struct inode *dir,
                                      int mds, int drop, int unless);
 
-extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
+extern int ceph_get_caps(struct file *filp, int need, int want,
                         loff_t endoff, int *got, struct page **pinned_page);
-extern int ceph_try_get_caps(struct ceph_inode_info *ci,
+extern int ceph_try_get_caps(struct inode *inode,
                             int need, int want, bool nonblock, int *got);
 
 /* for counting open files by mode */
@@ -1071,7 +1086,7 @@ extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode);
 extern const struct address_space_operations ceph_aops;
 extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
 extern int ceph_uninline_data(struct file *filp, struct page *locked_page);
-extern int ceph_pool_perm_check(struct ceph_inode_info *ci, int need);
+extern int ceph_pool_perm_check(struct inode *inode, int need);
 extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
 
 /* file.c */
index 939eab7..cb18ee6 100644 (file)
@@ -20,7 +20,8 @@ static int __remove_xattr(struct ceph_inode_info *ci,
 
 static bool ceph_is_valid_xattr(const char *name)
 {
-       return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
+       return !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
+              !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
               !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
               !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
 }
@@ -892,7 +893,8 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
        memcpy(value, xattr->val, xattr->val_len);
 
        if (current->journal_info &&
-           !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
+           !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
+           security_ismaclabel(name + XATTR_SECURITY_PREFIX_LEN))
                ci->i_ceph_flags |= CEPH_I_SEC_INITED;
 out:
        spin_unlock(&ci->i_ceph_lock);
@@ -903,11 +905,9 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
 {
        struct inode *inode = d_inode(dentry);
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
        bool len_only = (size == 0);
        u32 namelen;
        int err;
-       int i;
 
        spin_lock(&ci->i_ceph_lock);
        dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
@@ -936,33 +936,6 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
                names = __copy_xattr_names(ci, names);
                size -= namelen;
        }
-
-
-       /* virtual xattr names, too */
-       if (vxattrs) {
-               for (i = 0; vxattrs[i].name; i++) {
-                       size_t this_len;
-
-                       if (vxattrs[i].flags & VXATTR_FLAG_HIDDEN)
-                               continue;
-                       if (vxattrs[i].exists_cb && !vxattrs[i].exists_cb(ci))
-                               continue;
-
-                       this_len = strlen(vxattrs[i].name) + 1;
-                       namelen += this_len;
-                       if (len_only)
-                               continue;
-
-                       if (this_len > size) {
-                               err = -ERANGE;
-                               goto out;
-                       }
-
-                       memcpy(names, vxattrs[i].name, this_len);
-                       names += this_len;
-                       size -= this_len;
-               }
-       }
        err = namelen;
 out:
        spin_unlock(&ci->i_ceph_lock);
@@ -1293,42 +1266,8 @@ out:
                ceph_pagelist_release(pagelist);
        return err;
 }
-
-void ceph_security_invalidate_secctx(struct inode *inode)
-{
-       security_inode_invalidate_secctx(inode);
-}
-
-static int ceph_xattr_set_security_label(const struct xattr_handler *handler,
-                                   struct dentry *unused, struct inode *inode,
-                                   const char *key, const void *buf,
-                                   size_t buflen, int flags)
-{
-       if (security_ismaclabel(key)) {
-               const char *name = xattr_full_name(handler, key);
-               return __ceph_setxattr(inode, name, buf, buflen, flags);
-       }
-       return  -EOPNOTSUPP;
-}
-
-static int ceph_xattr_get_security_label(const struct xattr_handler *handler,
-                                   struct dentry *unused, struct inode *inode,
-                                   const char *key, void *buf, size_t buflen)
-{
-       if (security_ismaclabel(key)) {
-               const char *name = xattr_full_name(handler, key);
-               return __ceph_getxattr(inode, name, buf, buflen);
-       }
-       return  -EOPNOTSUPP;
-}
-
-static const struct xattr_handler ceph_security_label_handler = {
-       .prefix = XATTR_SECURITY_PREFIX,
-       .get    = ceph_xattr_get_security_label,
-       .set    = ceph_xattr_set_security_label,
-};
-#endif
-#endif
+#endif /* CONFIG_CEPH_FS_SECURITY_LABEL */
+#endif /* CONFIG_SECURITY */
 
 void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx)
 {
@@ -1351,9 +1290,6 @@ const struct xattr_handler *ceph_xattr_handlers[] = {
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
        &posix_acl_access_xattr_handler,
        &posix_acl_default_xattr_handler,
-#endif
-#ifdef CONFIG_CEPH_FS_SECURITY_LABEL
-       &ceph_security_label_handler,
 #endif
        &ceph_other_xattr_handler,
        NULL,
index 6c3bd07..0f0dc1c 100644 (file)
@@ -57,9 +57,18 @@ struct smb_query_info {
        /* char buffer[]; */
 } __packed;
 
+struct smb3_key_debug_info {
+       __u64   Suid;
+       __u16   cipher_type;
+       __u8    auth_key[16]; /* SMB2_NTLMV2_SESSKEY_SIZE */
+       __u8    smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
+       __u8    smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
+} __packed;
+
 #define CIFS_IOCTL_MAGIC       0xCF
 #define CIFS_IOC_COPYCHUNK_FILE        _IOW(CIFS_IOCTL_MAGIC, 3, int)
 #define CIFS_IOC_SET_INTEGRITY  _IO(CIFS_IOCTL_MAGIC, 4)
 #define CIFS_IOC_GET_MNT_INFO _IOR(CIFS_IOCTL_MAGIC, 5, struct smb_mnt_fs_info)
 #define CIFS_ENUMERATE_SNAPSHOTS _IOR(CIFS_IOCTL_MAGIC, 6, struct smb_snapshot_array)
 #define CIFS_QUERY_INFO _IOWR(CIFS_IOCTL_MAGIC, 7, struct smb_query_info)
+#define CIFS_DUMP_KEY _IOWR(CIFS_IOCTL_MAGIC, 8, struct smb3_key_debug_info)
index eb42834..439b99c 100644 (file)
@@ -90,14 +90,93 @@ struct cifs_acl {
        __le32 num_aces;
 } __attribute__((packed));
 
+/* ACE types - see MS-DTYP 2.4.4.1 */
+#define ACCESS_ALLOWED_ACE_TYPE        0x00
+#define ACCESS_DENIED_ACE_TYPE 0x01
+#define SYSTEM_AUDIT_ACE_TYPE  0x02
+#define SYSTEM_ALARM_ACE_TYPE  0x03
+#define ACCESS_ALLOWED_COMPOUND_ACE_TYPE 0x04
+#define ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05
+#define ACCESS_DENIED_OBJECT_ACE_TYPE  0x06
+#define SYSTEM_AUDIT_OBJECT_ACE_TYPE   0x07
+#define SYSTEM_ALARM_OBJECT_ACE_TYPE   0x08
+#define ACCESS_ALLOWED_CALLBACK_ACE_TYPE 0x09
+#define ACCESS_DENIED_CALLBACK_ACE_TYPE        0x0A
+#define ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE 0x0B
+#define ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE  0x0C
+#define SYSTEM_AUDIT_CALLBACK_ACE_TYPE 0x0D
+#define SYSTEM_ALARM_CALLBACK_ACE_TYPE 0x0E /* Reserved */
+#define SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE 0x0F
+#define SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE 0x10 /* reserved */
+#define SYSTEM_MANDATORY_LABEL_ACE_TYPE        0x11
+#define SYSTEM_RESOURCE_ATTRIBUTE_ACE_TYPE 0x12
+#define SYSTEM_SCOPED_POLICY_ID_ACE_TYPE 0x13
+
+/* ACE flags */
+#define OBJECT_INHERIT_ACE     0x01
+#define CONTAINER_INHERIT_ACE  0x02
+#define NO_PROPAGATE_INHERIT_ACE 0x04
+#define INHERIT_ONLY_ACE       0x08
+#define INHERITED_ACE          0x10
+#define SUCCESSFUL_ACCESS_ACE_FLAG 0x40
+#define FAILED_ACCESS_ACE_FLAG 0x80
+
 struct cifs_ace {
-       __u8 type;
+       __u8 type; /* see above and MS-DTYP 2.4.4.1 */
        __u8 flags;
        __le16 size;
        __le32 access_req;
        struct cifs_sid sid; /* ie UUID of user or group who gets these perms */
 } __attribute__((packed));
 
+/*
+ * The current SMB3 form of security descriptor is similar to what was used for
+ * cifs (see above) but some fields are split, and fields in the struct below
+ * matches names of fields to the the spec, MS-DTYP (see sections 2.4.5 and
+ * 2.4.6). Note that "CamelCase" fields are used in this struct in order to
+ * match the MS-DTYP and MS-SMB2 specs which define the wire format.
+ */
+struct smb3_sd {
+       __u8 Revision; /* revision level, MUST be one */
+       __u8 Sbz1; /* only meaningful if 'RM' flag set below */
+       __le16 Control;
+       __le32 OffsetOwner;
+       __le32 OffsetGroup;
+       __le32 OffsetSacl;
+       __le32 OffsetDacl;
+} __packed;
+
+/* Meaning of 'Control' field flags */
+#define ACL_CONTROL_SR 0x0001  /* Self relative */
+#define ACL_CONTROL_RM 0x0002  /* Resource manager control bits */
+#define ACL_CONTROL_PS 0x0004  /* SACL protected from inherits */
+#define ACL_CONTROL_PD 0x0008  /* DACL protected from inherits */
+#define ACL_CONTROL_SI 0x0010  /* SACL Auto-Inherited */
+#define ACL_CONTROL_DI 0x0020  /* DACL Auto-Inherited */
+#define ACL_CONTROL_SC 0x0040  /* SACL computed through inheritance */
+#define ACL_CONTROL_DC 0x0080  /* DACL computed through inheritence */
+#define ACL_CONTROL_SS 0x0100  /* Create server ACL */
+#define ACL_CONTROL_DT 0x0200  /* DACL provided by trusteed source */
+#define ACL_CONTROL_SD 0x0400  /* SACL defaulted */
+#define ACL_CONTROL_SP 0x0800  /* SACL is present on object */
+#define ACL_CONTROL_DD 0x1000  /* DACL defaulted */
+#define ACL_CONTROL_DP 0x2000  /* DACL is present on object */
+#define ACL_CONTROL_GD 0x4000  /* Group was defaulted */
+#define ACL_CONTROL_OD 0x8000  /* User was defaulted */
+
+/* Meaning of AclRevision flags */
+#define ACL_REVISION   0x02 /* See section 2.4.4.1 of MS-DTYP */
+#define ACL_REVISION_DS        0x04 /* Additional AceTypes allowed */
+
+struct smb3_acl {
+       u8 AclRevision; /* revision level */
+       u8 Sbz1; /* MBZ */
+       __le16 AclSize;
+       __le16 AceCount;
+       __le16 Sbz2; /* MBZ */
+} __packed;
+
+
 /*
  * Minimum security identifier can be one for system defined Users
  * and Groups such as NULL SID and World or Built-in accounts such
index 2e9c7f4..1a135d1 100644 (file)
@@ -169,18 +169,32 @@ cifs_read_super(struct super_block *sb)
        else
                sb->s_maxbytes = MAX_NON_LFS;
 
-       /* BB FIXME fix time_gran to be larger for LANMAN sessions */
-       sb->s_time_gran = 100;
-
-       if (tcon->unix_ext) {
-               ts = cifs_NTtimeToUnix(0);
+       /*
+        * Some very old servers like DOS and OS/2 used 2 second granularity
+        * (while all current servers use 100ns granularity - see MS-DTYP)
+        * but 1 second is the maximum allowed granularity for the VFS
+        * so for old servers set time granularity to 1 second while for
+        * everything else (current servers) set it to 100ns.
+        */
+       if ((tcon->ses->server->vals->protocol_id == SMB10_PROT_ID) &&
+           ((tcon->ses->capabilities &
+             tcon->ses->server->vals->cap_nt_find) == 0) &&
+           !tcon->unix_ext) {
+               sb->s_time_gran = 1000000000; /* 1 second is max allowed gran */
+               ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MIN), 0, 0);
                sb->s_time_min = ts.tv_sec;
-               ts = cifs_NTtimeToUnix(cpu_to_le64(S64_MAX));
+               ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MAX),
+                                   cpu_to_le16(SMB_TIME_MAX), 0);
                sb->s_time_max = ts.tv_sec;
        } else {
-               ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MIN), 0, 0);
+               /*
+                * Almost every server, including all SMB2+, uses DCE TIME
+                * ie 100 nanosecond units, since 1601.  See MS-DTYP and MS-FSCC
+                */
+               sb->s_time_gran = 100;
+               ts = cifs_NTtimeToUnix(0);
                sb->s_time_min = ts.tv_sec;
-               ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MAX), cpu_to_le16(SMB_TIME_MAX), 0);
+               ts = cifs_NTtimeToUnix(cpu_to_le64(S64_MAX));
                sb->s_time_max = ts.tv_sec;
        }
 
index 54e2045..d78bfcc 100644 (file)
@@ -331,8 +331,9 @@ struct smb_version_operations {
                        umode_t mode, struct cifs_tcon *tcon,
                        const char *full_path,
                        struct cifs_sb_info *cifs_sb);
-       int (*mkdir)(const unsigned int, struct cifs_tcon *, const char *,
-                    struct cifs_sb_info *);
+       int (*mkdir)(const unsigned int xid, struct inode *inode, umode_t mode,
+                    struct cifs_tcon *tcon, const char *name,
+                    struct cifs_sb_info *sb);
        /* set info on created directory */
        void (*mkdir_setinfo)(struct inode *, const char *,
                              struct cifs_sb_info *, struct cifs_tcon *,
@@ -1209,6 +1210,7 @@ struct cifs_search_info {
        bool smallBuf:1; /* so we know which buf_release function to call */
 };
 
+#define ACL_NO_MODE    ((umode_t)(-1))
 struct cifs_open_parms {
        struct cifs_tcon *tcon;
        struct cifs_sb_info *cifs_sb;
@@ -1389,6 +1391,11 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
 struct cifsInodeInfo {
        bool can_cache_brlcks;
        struct list_head llist; /* locks helb by this inode */
+       /*
+        * NOTE: Some code paths call down_read(lock_sem) twice, so
+        * we must always use use cifs_down_write() instead of down_write()
+        * for this semaphore to avoid deadlocks.
+        */
        struct rw_semaphore lock_sem;   /* protect the fields above */
        /* BB add in lists for dirty pages i.e. write caching info for oplock */
        struct list_head openFileList;
index 99b1b1e..fe597d3 100644 (file)
@@ -170,6 +170,7 @@ extern int cifs_unlock_range(struct cifsFileInfo *cfile,
                             struct file_lock *flock, const unsigned int xid);
 extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile);
 
+extern void cifs_down_write(struct rw_semaphore *sem);
 extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid,
                                              struct file *file,
                                              struct tcon_link *tlink,
@@ -372,7 +373,8 @@ extern int CIFSSMBUnixSetPathInfo(const unsigned int xid,
                                  const struct nls_table *nls_codepage,
                                  int remap);
 
-extern int CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon,
+extern int CIFSSMBMkDir(const unsigned int xid, struct inode *inode,
+                       umode_t mode, struct cifs_tcon *tcon,
                        const char *name, struct cifs_sb_info *cifs_sb);
 extern int CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon,
                        const char *name, struct cifs_sb_info *cifs_sb);
index dbee213..4f554f0 100644 (file)
@@ -1078,7 +1078,8 @@ RmDirRetry:
 }
 
 int
-CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
+CIFSSMBMkDir(const unsigned int xid, struct inode *inode, umode_t mode,
+            struct cifs_tcon *tcon, const char *name,
             struct cifs_sb_info *cifs_sb)
 {
        int rc = 0;
index 2850c3c..ccaa8ba 100644 (file)
@@ -564,9 +564,11 @@ cifs_reconnect(struct TCP_Server_Info *server)
        spin_lock(&GlobalMid_Lock);
        list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
                mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
+               kref_get(&mid_entry->refcount);
                if (mid_entry->mid_state == MID_REQUEST_SUBMITTED)
                        mid_entry->mid_state = MID_RETRY_NEEDED;
                list_move(&mid_entry->qhead, &retry_list);
+               mid_entry->mid_flags |= MID_DELETED;
        }
        spin_unlock(&GlobalMid_Lock);
        mutex_unlock(&server->srv_mutex);
@@ -576,6 +578,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
                mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
                list_del_init(&mid_entry->qhead);
                mid_entry->callback(mid_entry);
+               cifs_mid_q_entry_release(mid_entry);
        }
 
        if (cifs_rdma_enabled(server)) {
@@ -895,8 +898,10 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed)
        if (mid->mid_flags & MID_DELETED)
                printk_once(KERN_WARNING
                            "trying to dequeue a deleted mid\n");
-       else
+       else {
                list_del_init(&mid->qhead);
+               mid->mid_flags |= MID_DELETED;
+       }
        spin_unlock(&GlobalMid_Lock);
 }
 
@@ -966,8 +971,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
                list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
                        mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
                        cifs_dbg(FYI, "Clearing mid 0x%llx\n", mid_entry->mid);
+                       kref_get(&mid_entry->refcount);
                        mid_entry->mid_state = MID_SHUTDOWN;
                        list_move(&mid_entry->qhead, &dispose_list);
+                       mid_entry->mid_flags |= MID_DELETED;
                }
                spin_unlock(&GlobalMid_Lock);
 
@@ -977,6 +984,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
                        cifs_dbg(FYI, "Callback mid 0x%llx\n", mid_entry->mid);
                        list_del_init(&mid_entry->qhead);
                        mid_entry->callback(mid_entry);
+                       cifs_mid_q_entry_release(mid_entry);
                }
                /* 1/8th of sec is more than enough time for them to exit */
                msleep(125);
@@ -3882,8 +3890,12 @@ generic_ip_connect(struct TCP_Server_Info *server)
 
        rc = socket->ops->connect(socket, saddr, slen,
                                  server->noblockcnt ? O_NONBLOCK : 0);
-
-       if (rc == -EINPROGRESS)
+       /*
+        * When mounting SMB root file systems, we do not want to block in
+        * connect. Otherwise bail out and then let cifs_reconnect() perform
+        * reconnect failover - if possible.
+        */
+       if (server->noblockcnt && rc == -EINPROGRESS)
                rc = 0;
        if (rc < 0) {
                cifs_dbg(FYI, "Error %d connecting to server\n", rc);
@@ -4264,7 +4276,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
                server->ops->qfs_tcon(*xid, tcon);
                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) {
                        if (tcon->fsDevInfo.DeviceCharacteristics &
-                           FILE_READ_ONLY_DEVICE)
+                           cpu_to_le32(FILE_READ_ONLY_DEVICE))
                                cifs_dbg(VFS, "mounted to read only share\n");
                        else if ((cifs_sb->mnt_cifs_flags &
                                  CIFS_MOUNT_RW_CACHE) == 0)
@@ -4445,7 +4457,7 @@ static int setup_dfs_tgt_conn(const char *path,
        int rc;
        struct dfs_info3_param ref = {0};
        char *mdata = NULL, *fake_devname = NULL;
-       struct smb_vol fake_vol = {0};
+       struct smb_vol fake_vol = {NULL};
 
        cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path);
 
index dd5ac84..7ce689d 100644 (file)
@@ -738,10 +738,16 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 static int
 cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
 {
+       struct inode *inode;
+
        if (flags & LOOKUP_RCU)
                return -ECHILD;
 
        if (d_really_is_positive(direntry)) {
+               inode = d_inode(direntry);
+               if ((flags & LOOKUP_REVAL) && !CIFS_CACHE_READ(CIFS_I(inode)))
+                       CIFS_I(inode)->time = 0; /* force reval */
+
                if (cifs_revalidate_dentry(direntry))
                        return 0;
                else {
@@ -752,7 +758,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
                         * attributes will have been updated by
                         * cifs_revalidate_dentry().
                         */
-                       if (IS_AUTOMOUNT(d_inode(direntry)) &&
+                       if (IS_AUTOMOUNT(inode) &&
                           !(direntry->d_flags & DCACHE_NEED_AUTOMOUNT)) {
                                spin_lock(&direntry->d_lock);
                                direntry->d_flags |= DCACHE_NEED_AUTOMOUNT;
index 4b95700..fa7b0fa 100644 (file)
@@ -253,6 +253,12 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
                                         xid, fid);
 
+       if (rc) {
+               server->ops->close(xid, tcon, fid);
+               if (rc == -ESTALE)
+                       rc = -EOPENSTALE;
+       }
+
 out:
        kfree(buf);
        return rc;
@@ -275,6 +281,13 @@ cifs_has_mand_locks(struct cifsInodeInfo *cinode)
        return has_locks;
 }
 
+void
+cifs_down_write(struct rw_semaphore *sem)
+{
+       while (!down_write_trylock(sem))
+               msleep(10);
+}
+
 struct cifsFileInfo *
 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
                  struct tcon_link *tlink, __u32 oplock)
@@ -300,7 +313,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
        INIT_LIST_HEAD(&fdlocks->locks);
        fdlocks->cfile = cfile;
        cfile->llist = fdlocks;
-       down_write(&cinode->lock_sem);
+       cifs_down_write(&cinode->lock_sem);
        list_add(&fdlocks->llist, &cinode->llist);
        up_write(&cinode->lock_sem);
 
@@ -399,10 +412,11 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
        bool oplock_break_cancelled;
 
        spin_lock(&tcon->open_file_lock);
-
+       spin_lock(&cifsi->open_file_lock);
        spin_lock(&cifs_file->file_info_lock);
        if (--cifs_file->count > 0) {
                spin_unlock(&cifs_file->file_info_lock);
+               spin_unlock(&cifsi->open_file_lock);
                spin_unlock(&tcon->open_file_lock);
                return;
        }
@@ -415,9 +429,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
        cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 
        /* remove it from the lists */
-       spin_lock(&cifsi->open_file_lock);
        list_del(&cifs_file->flist);
-       spin_unlock(&cifsi->open_file_lock);
        list_del(&cifs_file->tlist);
        atomic_dec(&tcon->num_local_opens);
 
@@ -434,6 +446,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
                cifs_set_oplock_level(cifsi, 0);
        }
 
+       spin_unlock(&cifsi->open_file_lock);
        spin_unlock(&tcon->open_file_lock);
 
        oplock_break_cancelled = wait_oplock_handler ?
@@ -458,7 +471,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
         * Delete any outstanding lock records. We'll lose them when the file
         * is closed anyway.
         */
-       down_write(&cifsi->lock_sem);
+       cifs_down_write(&cifsi->lock_sem);
        list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
                list_del(&li->llist);
                cifs_del_lock_waiters(li);
@@ -1021,7 +1034,7 @@ static void
 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 {
        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
-       down_write(&cinode->lock_sem);
+       cifs_down_write(&cinode->lock_sem);
        list_add_tail(&lock->llist, &cfile->llist->locks);
        up_write(&cinode->lock_sem);
 }
@@ -1043,7 +1056,7 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
 
 try_again:
        exist = false;
-       down_write(&cinode->lock_sem);
+       cifs_down_write(&cinode->lock_sem);
 
        exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
                                        lock->type, lock->flags, &conf_lock,
@@ -1066,7 +1079,7 @@ try_again:
                                        (lock->blist.next == &lock->blist));
                if (!rc)
                        goto try_again;
-               down_write(&cinode->lock_sem);
+               cifs_down_write(&cinode->lock_sem);
                list_del_init(&lock->blist);
        }
 
@@ -1119,7 +1132,7 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock)
                return rc;
 
 try_again:
-       down_write(&cinode->lock_sem);
+       cifs_down_write(&cinode->lock_sem);
        if (!cinode->can_cache_brlcks) {
                up_write(&cinode->lock_sem);
                return rc;
@@ -1325,7 +1338,7 @@ cifs_push_locks(struct cifsFileInfo *cfile)
        int rc = 0;
 
        /* we are going to update can_cache_brlcks here - need a write access */
-       down_write(&cinode->lock_sem);
+       cifs_down_write(&cinode->lock_sem);
        if (!cinode->can_cache_brlcks) {
                up_write(&cinode->lock_sem);
                return rc;
@@ -1516,7 +1529,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
        if (!buf)
                return -ENOMEM;
 
-       down_write(&cinode->lock_sem);
+       cifs_down_write(&cinode->lock_sem);
        for (i = 0; i < 2; i++) {
                cur = buf;
                num = 0;
@@ -1840,13 +1853,12 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 {
        struct cifsFileInfo *open_file = NULL;
        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
-       struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
 
        /* only filter by fsuid on multiuser mounts */
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
                fsuid_only = false;
 
-       spin_lock(&tcon->open_file_lock);
+       spin_lock(&cifs_inode->open_file_lock);
        /* we could simply get the first_list_entry since write-only entries
           are always at the end of the list but since the first entry might
           have a close pending, we go through the whole list */
@@ -1858,7 +1870,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
                                /* found a good file */
                                /* lock it so it will not be closed on us */
                                cifsFileInfo_get(open_file);
-                               spin_unlock(&tcon->open_file_lock);
+                               spin_unlock(&cifs_inode->open_file_lock);
                                return open_file;
                        } /* else might as well continue, and look for
                             another, or simply have the caller reopen it
@@ -1866,7 +1878,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
                } else /* write only file */
                        break; /* write only files are last so must be done */
        }
-       spin_unlock(&tcon->open_file_lock);
+       spin_unlock(&cifs_inode->open_file_lock);
        return NULL;
 }
 
@@ -1877,7 +1889,6 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
 {
        struct cifsFileInfo *open_file, *inv_file = NULL;
        struct cifs_sb_info *cifs_sb;
-       struct cifs_tcon *tcon;
        bool any_available = false;
        int rc = -EBADF;
        unsigned int refind = 0;
@@ -1897,16 +1908,15 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
        }
 
        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
-       tcon = cifs_sb_master_tcon(cifs_sb);
 
        /* only filter by fsuid on multiuser mounts */
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
                fsuid_only = false;
 
-       spin_lock(&tcon->open_file_lock);
+       spin_lock(&cifs_inode->open_file_lock);
 refind_writable:
        if (refind > MAX_REOPEN_ATT) {
-               spin_unlock(&tcon->open_file_lock);
+               spin_unlock(&cifs_inode->open_file_lock);
                return rc;
        }
        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
@@ -1918,7 +1928,7 @@ refind_writable:
                        if (!open_file->invalidHandle) {
                                /* found a good writable file */
                                cifsFileInfo_get(open_file);
-                               spin_unlock(&tcon->open_file_lock);
+                               spin_unlock(&cifs_inode->open_file_lock);
                                *ret_file = open_file;
                                return 0;
                        } else {
@@ -1938,7 +1948,7 @@ refind_writable:
                cifsFileInfo_get(inv_file);
        }
 
-       spin_unlock(&tcon->open_file_lock);
+       spin_unlock(&cifs_inode->open_file_lock);
 
        if (inv_file) {
                rc = cifs_reopen_file(inv_file, false);
@@ -1953,7 +1963,7 @@ refind_writable:
                cifsFileInfo_put(inv_file);
                ++refind;
                inv_file = NULL;
-               spin_lock(&tcon->open_file_lock);
+               spin_lock(&cifs_inode->open_file_lock);
                goto refind_writable;
        }
 
@@ -4461,17 +4471,15 @@ static int cifs_readpage(struct file *file, struct page *page)
 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
 {
        struct cifsFileInfo *open_file;
-       struct cifs_tcon *tcon =
-               cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
 
-       spin_lock(&tcon->open_file_lock);
+       spin_lock(&cifs_inode->open_file_lock);
        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
-                       spin_unlock(&tcon->open_file_lock);
+                       spin_unlock(&cifs_inode->open_file_lock);
                        return 1;
                }
        }
-       spin_unlock(&tcon->open_file_lock);
+       spin_unlock(&cifs_inode->open_file_lock);
        return 0;
 }
 
index 26cdfbf..df93778 100644 (file)
@@ -414,6 +414,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
                /* if uniqueid is different, return error */
                if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
                    CIFS_I(*pinode)->uniqueid != fattr.cf_uniqueid)) {
+                       CIFS_I(*pinode)->time = 0; /* force reval */
                        rc = -ESTALE;
                        goto cgiiu_exit;
                }
@@ -421,6 +422,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
                /* if filetype is different, return error */
                if (unlikely(((*pinode)->i_mode & S_IFMT) !=
                    (fattr.cf_mode & S_IFMT))) {
+                       CIFS_I(*pinode)->time = 0; /* force reval */
                        rc = -ESTALE;
                        goto cgiiu_exit;
                }
@@ -933,6 +935,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
                /* if uniqueid is different, return error */
                if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
                    CIFS_I(*inode)->uniqueid != fattr.cf_uniqueid)) {
+                       CIFS_I(*inode)->time = 0; /* force reval */
                        rc = -ESTALE;
                        goto cgii_exit;
                }
@@ -940,6 +943,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
                /* if filetype is different, return error */
                if (unlikely(((*inode)->i_mode & S_IFMT) !=
                    (fattr.cf_mode & S_IFMT))) {
+                       CIFS_I(*inode)->time = 0; /* force reval */
                        rc = -ESTALE;
                        goto cgii_exit;
                }
@@ -1622,13 +1626,14 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
        }
 
        /* BB add setting the equivalent of mode via CreateX w/ACLs */
-       rc = server->ops->mkdir(xid, tcon, full_path, cifs_sb);
+       rc = server->ops->mkdir(xid, inode, mode, tcon, full_path, cifs_sb);
        if (rc) {
                cifs_dbg(FYI, "cifs_mkdir returned 0x%x\n", rc);
                d_drop(direntry);
                goto mkdir_out;
        }
 
+       /* TODO: skip this for smb2/smb3 */
        rc = cifs_mkdir_qinfo(inode, direntry, mode, full_path, cifs_sb, tcon,
                              xid);
 mkdir_out:
@@ -2470,9 +2475,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
                        rc = tcon->ses->server->ops->flush(xid, tcon, &wfile->fid);
                        cifsFileInfo_put(wfile);
                        if (rc)
-                               return rc;
+                               goto cifs_setattr_exit;
                } else if (rc != -EBADF)
-                       return rc;
+                       goto cifs_setattr_exit;
                else
                        rc = 0;
        }
index 76ddd98..1a01e10 100644 (file)
@@ -164,6 +164,7 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon,
 long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 {
        struct inode *inode = file_inode(filep);
+       struct smb3_key_debug_info pkey_inf;
        int rc = -ENOTTY; /* strange error - but the precedent */
        unsigned int xid;
        struct cifsFileInfo *pSMBFile = filep->private_data;
@@ -270,6 +271,34 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
                        else
                                rc = -EOPNOTSUPP;
                        break;
+               case CIFS_DUMP_KEY:
+                       if (pSMBFile == NULL)
+                               break;
+                       if (!capable(CAP_SYS_ADMIN)) {
+                               rc = -EACCES;
+                               break;
+                       }
+
+                       tcon = tlink_tcon(pSMBFile->tlink);
+                       if (!smb3_encryption_required(tcon)) {
+                               rc = -EOPNOTSUPP;
+                               break;
+                       }
+                       pkey_inf.cipher_type =
+                               le16_to_cpu(tcon->ses->server->cipher_type);
+                       pkey_inf.Suid = tcon->ses->Suid;
+                       memcpy(pkey_inf.auth_key, tcon->ses->auth_key.response,
+                                       16 /* SMB2_NTLMV2_SESSKEY_SIZE */);
+                       memcpy(pkey_inf.smb3decryptionkey,
+                             tcon->ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
+                       memcpy(pkey_inf.smb3encryptionkey,
+                             tcon->ses->smb3encryptionkey, SMB3_SIGN_KEY_SIZE);
+                       if (copy_to_user((void __user *)arg, &pkey_inf,
+                                       sizeof(struct smb3_key_debug_info)))
+                               rc = -EFAULT;
+                       else
+                               rc = 0;
+                       break;
                default:
                        cifs_dbg(FYI, "unsupported ioctl\n");
                        break;
index 49c17ee..9b41436 100644 (file)
@@ -117,10 +117,6 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = {
        {0, 0}
 };
 
-static const struct smb_to_posix_error mapping_table_ERRHRD[] = {
-       {0, 0}
-};
-
 /*
  * Convert a string containing text IPv4 or IPv6 address to binary form.
  *
index 4c764ff..85bd644 100644 (file)
@@ -698,7 +698,6 @@ sess_auth_lanman(struct sess_data *sess_data)
        char *bcc_ptr;
        struct cifs_ses *ses = sess_data->ses;
        char lnm_session_key[CIFS_AUTH_RESP_SIZE];
-       __u32 capabilities;
        __u16 bytes_remaining;
 
        /* lanman 2 style sessionsetup */
@@ -709,7 +708,7 @@ sess_auth_lanman(struct sess_data *sess_data)
 
        pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
        bcc_ptr = sess_data->iov[2].iov_base;
-       capabilities = cifs_ssetup_hdr(ses, pSMB);
+       (void)cifs_ssetup_hdr(ses, pSMB);
 
        pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
 
index b7421a0..5148106 100644 (file)
@@ -171,6 +171,9 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
        /* we do not want to loop forever */
        last_mid = cur_mid;
        cur_mid++;
+       /* avoid 0xFFFF MID */
+       if (cur_mid == 0xffff)
+               cur_mid++;
 
        /*
         * This nested loop looks more expensive than it is.
index e6a1fc7..8b0b512 100644 (file)
@@ -145,7 +145,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 
        cur = buf;
 
-       down_write(&cinode->lock_sem);
+       cifs_down_write(&cinode->lock_sem);
        list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
                if (flock->fl_start > li->offset ||
                    (flock->fl_start + length) <
index d2a3fb7..4121ac1 100644 (file)
@@ -51,7 +51,7 @@ static int
 smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
                 struct cifs_sb_info *cifs_sb, const char *full_path,
                 __u32 desired_access, __u32 create_disposition,
-                __u32 create_options, void *ptr, int command,
+                __u32 create_options, umode_t mode, void *ptr, int command,
                 struct cifsFileInfo *cfile)
 {
        int rc;
@@ -103,6 +103,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
                oparms.create_options |= CREATE_OPEN_BACKUP_INTENT;
        oparms.fid = &fid;
        oparms.reconnect = false;
+       oparms.mode = mode;
 
        memset(&open_iov, 0, sizeof(open_iov));
        rqst[num_rqst].rq_iov = open_iov;
@@ -478,7 +479,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
        cifs_get_readable_path(tcon, full_path, &cfile);
        rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
                              FILE_READ_ATTRIBUTES, FILE_OPEN, create_options,
-                             smb2_data, SMB2_OP_QUERY_INFO, cfile);
+                             ACL_NO_MODE, smb2_data, SMB2_OP_QUERY_INFO, cfile);
        if (rc == -EOPNOTSUPP) {
                *symlink = true;
                create_options |= OPEN_REPARSE_POINT;
@@ -486,8 +487,8 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
                /* Failed on a symbolic link - query a reparse point info */
                rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
                                      FILE_READ_ATTRIBUTES, FILE_OPEN,
-                                     create_options, smb2_data,
-                                     SMB2_OP_QUERY_INFO, NULL);
+                                     create_options, ACL_NO_MODE,
+                                     smb2_data, SMB2_OP_QUERY_INFO, NULL);
        }
        if (rc)
                goto out;
@@ -499,12 +500,14 @@ out:
 }
 
 int
-smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
+smb2_mkdir(const unsigned int xid, struct inode *parent_inode, umode_t mode,
+          struct cifs_tcon *tcon, const char *name,
           struct cifs_sb_info *cifs_sb)
 {
        return smb2_compound_op(xid, tcon, cifs_sb, name,
                                FILE_WRITE_ATTRIBUTES, FILE_CREATE,
-                               CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR, NULL);
+                               CREATE_NOT_FILE, mode, NULL, SMB2_OP_MKDIR,
+                               NULL);
 }
 
 void
@@ -525,8 +528,8 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name,
        cifs_get_writable_path(tcon, name, &cfile);
        tmprc = smb2_compound_op(xid, tcon, cifs_sb, name,
                                 FILE_WRITE_ATTRIBUTES, FILE_CREATE,
-                                CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO,
-                                cfile);
+                                CREATE_NOT_FILE, ACL_NO_MODE,
+                                &data, SMB2_OP_SET_INFO, cfile);
        if (tmprc == 0)
                cifs_i->cifsAttrs = dosattrs;
 }
@@ -536,7 +539,7 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
           struct cifs_sb_info *cifs_sb)
 {
        return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
-                               CREATE_NOT_FILE,
+                               CREATE_NOT_FILE, ACL_NO_MODE,
                                NULL, SMB2_OP_RMDIR, NULL);
 }
 
@@ -546,7 +549,7 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
 {
        return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
                                CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
-                               NULL, SMB2_OP_DELETE, NULL);
+                               ACL_NO_MODE, NULL, SMB2_OP_DELETE, NULL);
 }
 
 static int
@@ -564,7 +567,8 @@ smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon,
                goto smb2_rename_path;
        }
        rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access,
-                             FILE_OPEN, 0, smb2_to_name, command, cfile);
+                             FILE_OPEN, 0, ACL_NO_MODE, smb2_to_name,
+                             command, cfile);
 smb2_rename_path:
        kfree(smb2_to_name);
        return rc;
@@ -601,8 +605,8 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
        __le64 eof = cpu_to_le64(size);
 
        return smb2_compound_op(xid, tcon, cifs_sb, full_path,
-                               FILE_WRITE_DATA, FILE_OPEN, 0, &eof,
-                               SMB2_OP_SET_EOF, NULL);
+                               FILE_WRITE_DATA, FILE_OPEN, 0, ACL_NO_MODE,
+                               &eof, SMB2_OP_SET_EOF, NULL);
 }
 
 int
@@ -623,8 +627,8 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
                return PTR_ERR(tlink);
 
        rc = smb2_compound_op(xid, tlink_tcon(tlink), cifs_sb, full_path,
-                             FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf,
-                             SMB2_OP_SET_INFO, NULL);
+                             FILE_WRITE_ATTRIBUTES, FILE_OPEN,
+                             0, ACL_NO_MODE, buf, SMB2_OP_SET_INFO, NULL);
        cifs_put_tlink(tlink);
        return rc;
 }
index eaed180..cd55af9 100644 (file)
@@ -751,6 +751,8 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
                goto oshr_exit;
        }
 
+       atomic_inc(&tcon->num_remote_opens);
+
        o_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base;
        oparms.fid->persistent_fid = o_rsp->PersistentFileId;
        oparms.fid->volatile_fid = o_rsp->VolatileFileId;
@@ -1176,6 +1178,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
 
        rc = compound_send_recv(xid, ses, flags, 3, rqst,
                                resp_buftype, rsp_iov);
+       /* no need to bump num_remote_opens because handle immediately closed */
 
  sea_exit:
        kfree(ea);
@@ -1518,6 +1521,8 @@ smb2_ioctl_query_info(const unsigned int xid,
                                resp_buftype, rsp_iov);
        if (rc)
                goto iqinf_exit;
+
+       /* No need to bump num_remote_opens since handle immediately closed */
        if (qi.flags & PASSTHRU_FSCTL) {
                pqi = (struct smb_query_info __user *)arg;
                io_rsp = (struct smb2_ioctl_rsp *)rsp_iov[1].iov_base;
@@ -3328,6 +3333,11 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
        if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
                return;
 
+       /* Check if the server granted an oplock rather than a lease */
+       if (oplock & SMB2_OPLOCK_LEVEL_EXCLUSIVE)
+               return smb2_set_oplock_level(cinode, oplock, epoch,
+                                            purge_cache);
+
        if (oplock & SMB2_LEASE_READ_CACHING_HE) {
                new_oplock |= CIFS_CACHE_READ_FLG;
                strcat(message, "R");
@@ -4074,6 +4084,7 @@ free_pages:
 
        kfree(dw->ppages);
        cifs_small_buf_release(dw->buf);
+       kfree(dw);
 }
 
 
@@ -4147,7 +4158,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid,
                dw->server = server;
                dw->ppages = pages;
                dw->len = len;
-               queue_work(cifsiod_wq, &dw->decrypt);
+               queue_work(decrypt_wq, &dw->decrypt);
                *num_mids = 0; /* worker thread takes care of finding mid */
                return -1;
        }
index 87066f1..0514986 100644 (file)
@@ -751,6 +751,8 @@ add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode)
        unsigned int num = *num_iovec;
 
        iov[num].iov_base = create_posix_buf(mode);
+       if (mode == ACL_NO_MODE)
+               cifs_dbg(FYI, "illegal mode\n");
        if (iov[num].iov_base == NULL)
                return -ENOMEM;
        iov[num].iov_len = sizeof(struct create_posix);
@@ -2352,6 +2354,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
        rqst.rq_iov = iov;
        rqst.rq_nvec = n_iov;
 
+       /* no need to inc num_remote_opens because we close it just below */
        trace_smb3_posix_mkdir_enter(xid, tcon->tid, ses->Suid, CREATE_NOT_FILE,
                                    FILE_WRITE_ATTRIBUTES);
        /* resource #4: response buffer */
@@ -2416,6 +2419,7 @@ SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock,
        /* File attributes ignored on open (used in create though) */
        req->FileAttributes = cpu_to_le32(file_attributes);
        req->ShareAccess = FILE_SHARE_ALL_LE;
+
        req->CreateDisposition = cpu_to_le32(oparms->disposition);
        req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK);
        req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req));
@@ -2517,6 +2521,20 @@ SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock,
                        return rc;
        }
 
+       if ((oparms->disposition == FILE_CREATE) &&
+           (oparms->mode != ACL_NO_MODE)) {
+               if (n_iov > 2) {
+                       struct create_context *ccontext =
+                           (struct create_context *)iov[n_iov-1].iov_base;
+                       ccontext->Next =
+                               cpu_to_le32(iov[n_iov-1].iov_len);
+               }
+
+               /* rc = add_sd_context(iov, &n_iov, oparms->mode); */
+               if (rc)
+                       return rc;
+       }
+
        if (n_iov > 2) {
                struct create_context *ccontext =
                        (struct create_context *)iov[n_iov-1].iov_base;
@@ -3180,7 +3198,7 @@ SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon,
  * See MS-SMB2 2.2.35 and 2.2.36
  */
 
-int
+static int
 SMB2_notify_init(const unsigned int xid, struct smb_rqst *rqst,
                struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid,
                u32 completion_filter, bool watch_tree)
@@ -3196,7 +3214,8 @@ SMB2_notify_init(const unsigned int xid, struct smb_rqst *rqst,
 
        req->PersistentFileId = persistent_fid;
        req->VolatileFileId = volatile_fid;
-       req->OutputBufferLength = SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE;
+       req->OutputBufferLength =
+               cpu_to_le32(SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE);
        req->CompletionFilter = cpu_to_le32(completion_filter);
        if (watch_tree)
                req->Flags = cpu_to_le16(SMB2_WATCH_TREE);
index 67a91b1..71b2930 100644 (file)
@@ -84,7 +84,8 @@ extern int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
                               umode_t mode, struct cifs_tcon *tcon,
                               const char *full_path,
                               struct cifs_sb_info *cifs_sb);
-extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon,
+extern int smb2_mkdir(const unsigned int xid, struct inode *inode,
+                     umode_t mode, struct cifs_tcon *tcon,
                      const char *name, struct cifs_sb_info *cifs_sb);
 extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path,
                               struct cifs_sb_info *cifs_sb,
@@ -149,6 +150,10 @@ extern int SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
                           bool is_fsctl, char *in_data, u32 indatalen,
                           __u32 max_response_size);
 extern void SMB2_ioctl_free(struct smb_rqst *rqst);
+extern int SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
+                       u64 persistent_fid, u64 volatile_fid, bool watch_tree,
+                       u32 completion_filter);
+
 extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
                      u64 persistent_file_id, u64 volatile_file_id);
 extern int SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
index 08628e6..1ff2852 100644 (file)
 #define IO_REPARSE_APPXSTREAM       0xC0000014
 /* NFS symlinks, Win 8/SMB3 and later */
 #define IO_REPARSE_TAG_NFS           0x80000014
+/*
+ * AzureFileSync - see
+ * https://docs.microsoft.com/en-us/azure/storage/files/storage-sync-cloud-tiering
+ */
+#define IO_REPARSE_TAG_AZ_FILE_SYNC  0x8000001e
+/* WSL reparse tags */
+#define IO_REPARSE_TAG_LX_SYMLINK    0xA000001D
+#define IO_REPARSE_TAG_AF_UNIX      0x80000023
+#define IO_REPARSE_TAG_LX_FIFO      0x80000024
+#define IO_REPARSE_TAG_LX_CHR       0x80000025
+#define IO_REPARSE_TAG_LX_BLK       0x80000026
 
 /* fsctl flags */
 /* If Flags is set to this value, the request is an FSCTL not ioctl request */
index 308ad0f..ca3de62 100644 (file)
@@ -86,22 +86,8 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
 
 static void _cifs_mid_q_entry_release(struct kref *refcount)
 {
-       struct mid_q_entry *mid = container_of(refcount, struct mid_q_entry,
-                                              refcount);
-
-       mempool_free(mid, cifs_mid_poolp);
-}
-
-void cifs_mid_q_entry_release(struct mid_q_entry *midEntry)
-{
-       spin_lock(&GlobalMid_Lock);
-       kref_put(&midEntry->refcount, _cifs_mid_q_entry_release);
-       spin_unlock(&GlobalMid_Lock);
-}
-
-void
-DeleteMidQEntry(struct mid_q_entry *midEntry)
-{
+       struct mid_q_entry *midEntry =
+                       container_of(refcount, struct mid_q_entry, refcount);
 #ifdef CONFIG_CIFS_STATS2
        __le16 command = midEntry->server->vals->lock_cmd;
        __u16 smb_cmd = le16_to_cpu(midEntry->command);
@@ -166,6 +152,19 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
                }
        }
 #endif
+
+       mempool_free(midEntry, cifs_mid_poolp);
+}
+
+void cifs_mid_q_entry_release(struct mid_q_entry *midEntry)
+{
+       spin_lock(&GlobalMid_Lock);
+       kref_put(&midEntry->refcount, _cifs_mid_q_entry_release);
+       spin_unlock(&GlobalMid_Lock);
+}
+
+void DeleteMidQEntry(struct mid_q_entry *midEntry)
+{
        cifs_mid_q_entry_release(midEntry);
 }
 
@@ -173,8 +172,10 @@ void
 cifs_delete_mid(struct mid_q_entry *mid)
 {
        spin_lock(&GlobalMid_Lock);
-       list_del_init(&mid->qhead);
-       mid->mid_flags |= MID_DELETED;
+       if (!(mid->mid_flags & MID_DELETED)) {
+               list_del_init(&mid->qhead);
+               mid->mid_flags |= MID_DELETED;
+       }
        spin_unlock(&GlobalMid_Lock);
 
        DeleteMidQEntry(mid);
@@ -872,7 +873,10 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
                rc = -EHOSTDOWN;
                break;
        default:
-               list_del_init(&mid->qhead);
+               if (!(mid->mid_flags & MID_DELETED)) {
+                       list_del_init(&mid->qhead);
+                       mid->mid_flags |= MID_DELETED;
+               }
                cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n",
                         __func__, mid->mid, mid->mid_state);
                rc = -EIO;
index 9076150..db4ba8f 100644 (file)
@@ -31,7 +31,7 @@
 #include "cifs_fs_sb.h"
 #include "cifs_unicode.h"
 
-#define MAX_EA_VALUE_SIZE 65535
+#define MAX_EA_VALUE_SIZE CIFSMaxBufSize
 #define CIFS_XATTR_CIFS_ACL "system.cifs_acl"
 #define CIFS_XATTR_ATTRIB "cifs.dosattrib"  /* full name: user.cifs.dosattrib */
 #define CIFS_XATTR_CREATETIME "cifs.creationtime"  /* user.cifs.creationtime */
index 6bf81f9..2cc43cd 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -220,10 +220,11 @@ static void *get_unlocked_entry(struct xa_state *xas, unsigned int order)
 
        for (;;) {
                entry = xas_find_conflict(xas);
+               if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
+                       return entry;
                if (dax_entry_order(entry) < order)
                        return XA_RETRY_ENTRY;
-               if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) ||
-                               !dax_is_locked(entry))
+               if (!dax_is_locked(entry))
                        return entry;
 
                wq = dax_entry_waitqueue(xas, entry, &ewait.key);
index 93e4ca6..87846aa 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/atomic.h>
 #include <linux/device.h>
 #include <linux/poll.h>
+#include <linux/security.h>
 
 #include "internal.h"
 
@@ -136,6 +137,25 @@ void debugfs_file_put(struct dentry *dentry)
 }
 EXPORT_SYMBOL_GPL(debugfs_file_put);
 
+/*
+ * Only permit access to world-readable files when the kernel is locked down.
+ * We also need to exclude any file that has ways to write or alter it as root
+ * can bypass the permissions check.
+ */
+static bool debugfs_is_locked_down(struct inode *inode,
+                                  struct file *filp,
+                                  const struct file_operations *real_fops)
+{
+       if ((inode->i_mode & 07777) == 0444 &&
+           !(filp->f_mode & FMODE_WRITE) &&
+           !real_fops->unlocked_ioctl &&
+           !real_fops->compat_ioctl &&
+           !real_fops->mmap)
+               return false;
+
+       return security_locked_down(LOCKDOWN_DEBUGFS);
+}
+
 static int open_proxy_open(struct inode *inode, struct file *filp)
 {
        struct dentry *dentry = F_DENTRY(filp);
@@ -147,6 +167,11 @@ static int open_proxy_open(struct inode *inode, struct file *filp)
                return r == -EIO ? -ENOENT : r;
 
        real_fops = debugfs_real_fops(filp);
+
+       r = debugfs_is_locked_down(inode, filp, real_fops);
+       if (r)
+               goto out;
+
        real_fops = fops_get(real_fops);
        if (!real_fops) {
                /* Huh? Module did not clean up after itself at exit? */
@@ -272,6 +297,11 @@ static int full_proxy_open(struct inode *inode, struct file *filp)
                return r == -EIO ? -ENOENT : r;
 
        real_fops = debugfs_real_fops(filp);
+
+       r = debugfs_is_locked_down(inode, filp, real_fops);
+       if (r)
+               goto out;
+
        real_fops = fops_get(real_fops);
        if (!real_fops) {
                /* Huh? Module did not cleanup after itself at exit? */
index 042b688..7b975db 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
+#include <linux/security.h>
 
 #include "internal.h"
 
@@ -35,6 +36,32 @@ static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
 static bool debugfs_registered;
 
+/*
+ * Don't allow access attributes to be changed whilst the kernel is locked down
+ * so that we can use the file mode as part of a heuristic to determine whether
+ * to lock down individual files.
+ */
+static int debugfs_setattr(struct dentry *dentry, struct iattr *ia)
+{
+       int ret = security_locked_down(LOCKDOWN_DEBUGFS);
+
+       if (ret && (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)))
+               return ret;
+       return simple_setattr(dentry, ia);
+}
+
+static const struct inode_operations debugfs_file_inode_operations = {
+       .setattr        = debugfs_setattr,
+};
+static const struct inode_operations debugfs_dir_inode_operations = {
+       .lookup         = simple_lookup,
+       .setattr        = debugfs_setattr,
+};
+static const struct inode_operations debugfs_symlink_inode_operations = {
+       .get_link       = simple_get_link,
+       .setattr        = debugfs_setattr,
+};
+
 static struct inode *debugfs_get_inode(struct super_block *sb)
 {
        struct inode *inode = new_inode(sb);
@@ -369,6 +396,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
        inode->i_mode = mode;
        inode->i_private = data;
 
+       inode->i_op = &debugfs_file_inode_operations;
        inode->i_fop = proxy_fops;
        dentry->d_fsdata = (void *)((unsigned long)real_fops |
                                DEBUGFS_FSDATA_IS_REAL_FOPS_BIT);
@@ -532,7 +560,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
        }
 
        inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
-       inode->i_op = &simple_dir_inode_operations;
+       inode->i_op = &debugfs_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
 
        /* directory inodes start off with i_nlink == 2 (for "." entry) */
@@ -632,7 +660,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
                return failed_creating(dentry);
        }
        inode->i_mode = S_IFLNK | S_IRWXUGO;
-       inode->i_op = &simple_symlink_inode_operations;
+       inode->i_op = &debugfs_symlink_inode_operations;
        inode->i_link = link;
        d_instantiate(dentry, inode);
        return end_creating(dentry);
index ae19678..9329ced 100644 (file)
@@ -241,9 +241,8 @@ void dio_warn_stale_pagecache(struct file *filp)
        }
 }
 
-/**
+/*
  * dio_complete() - called when all DIO BIO I/O has been completed
- * @offset: the byte offset in the file of the completed operation
  *
  * This drops i_dio_count, lets interested parties know that a DIO operation
  * has completed, and calculates the resulting return code for the operation.
index 8a9fcbd..fc3a8d8 100644 (file)
@@ -34,11 +34,15 @@ static void erofs_readendio(struct bio *bio)
 
 struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr)
 {
-       struct inode *const bd_inode = sb->s_bdev->bd_inode;
-       struct address_space *const mapping = bd_inode->i_mapping;
+       struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping;
+       struct page *page;
 
-       return read_cache_page_gfp(mapping, blkaddr,
+       page = read_cache_page_gfp(mapping, blkaddr,
                                   mapping_gfp_constraint(mapping, ~__GFP_FS));
+       /* should already be PageUptodate */
+       if (!IS_ERR(page))
+               lock_page(page);
+       return page;
 }
 
 static int erofs_map_blocks_flatmode(struct inode *inode,
index caf9a95..0e36949 100644 (file)
@@ -105,9 +105,9 @@ static int erofs_read_superblock(struct super_block *sb)
        int ret;
 
        page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL);
-       if (!page) {
+       if (IS_ERR(page)) {
                erofs_err(sb, "cannot read erofs superblock");
-               return -EIO;
+               return PTR_ERR(page);
        }
 
        sbi = EROFS_SB(sb);
index 96e34c9..fad80c9 100644 (file)
@@ -575,7 +575,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
        struct erofs_map_blocks *const map = &fe->map;
        struct z_erofs_collector *const clt = &fe->clt;
        const loff_t offset = page_offset(page);
-       bool tight = (clt->mode >= COLLECT_PRIMARY_HOOKED);
+       bool tight = true;
 
        enum z_erofs_cache_alloctype cache_strategy;
        enum z_erofs_page_type page_type;
@@ -628,8 +628,16 @@ restart_now:
        preload_compressed_pages(clt, MNGD_MAPPING(sbi),
                                 cache_strategy, pagepool);
 
-       tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED);
 hitted:
+       /*
+        * Ensure the current partial page belongs to this submit chain rather
+        * than other concurrent submit chains or the noio(bypass) chain since
+        * those chains are handled asynchronously thus the page cannot be used
+        * for inplace I/O or pagevec (should be processed in strict order.)
+        */
+       tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED &&
+                 clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
+
        cur = end - min_t(unsigned int, offset + end - map->m_la, end);
        if (!(map->m_flags & EROFS_MAP_MAPPED)) {
                zero_user_segment(page, cur, end);
index f7f6a14..555e93c 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1033,6 +1033,7 @@ static int exec_mmap(struct mm_struct *mm)
        }
        task_lock(tsk);
        active_mm = tsk->active_mm;
+       membarrier_exec_mmap(mm);
        tsk->mm = mm;
        tsk->active_mm = mm;
        activate_mm(active_mm, mm);
@@ -1825,7 +1826,6 @@ static int __do_execve_file(int fd, struct filename *filename,
        /* execve succeeded */
        current->fs->in_exec = 0;
        current->in_execve = 0;
-       membarrier_execve(current);
        rseq_execve(current);
        acct_update_integrals(current);
        task_numa_free(current, false);
index 123e3de..516faa2 100644 (file)
@@ -4551,6 +4551,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
        struct buffer_head      *bh;
        struct super_block      *sb = inode->i_sb;
        ext4_fsblk_t            block;
+       struct blk_plug         plug;
        int                     inodes_per_block, inode_offset;
 
        iloc->bh = NULL;
@@ -4639,6 +4640,7 @@ make_io:
                 * If we need to do any I/O, try to pre-readahead extra
                 * blocks from the inode table.
                 */
+               blk_start_plug(&plug);
                if (EXT4_SB(sb)->s_inode_readahead_blks) {
                        ext4_fsblk_t b, end, table;
                        unsigned num;
@@ -4669,6 +4671,7 @@ make_io:
                get_bh(bh);
                bh->b_end_io = end_buffer_read_sync;
                submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
+               blk_finish_plug(&plug);
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh)) {
                        EXT4_ERROR_INODE_BLOCK(inode, block,
index 1bda2ab..054acd9 100644 (file)
@@ -88,9 +88,7 @@ static int fat__get_entry(struct inode *dir, loff_t *pos,
        int err, offset;
 
 next:
-       if (*bh)
-               brelse(*bh);
-
+       brelse(*bh);
        *bh = NULL;
        iblock = *pos >> sb->s_blocksize_bits;
        err = fat_bmap(dir, iblock, &phys, &mapped_blocks, 0, false);
@@ -1100,8 +1098,11 @@ static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
                        err = -ENOMEM;
                        goto error;
                }
+               /* Avoid race with userspace read via bdev */
+               lock_buffer(bhs[n]);
                memset(bhs[n]->b_data, 0, sb->s_blocksize);
                set_buffer_uptodate(bhs[n]);
+               unlock_buffer(bhs[n]);
                mark_buffer_dirty_inode(bhs[n], dir);
 
                n++;
@@ -1158,6 +1159,8 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts)
        fat_time_unix2fat(sbi, ts, &time, &date, &time_cs);
 
        de = (struct msdos_dir_entry *)bhs[0]->b_data;
+       /* Avoid race with userspace read via bdev */
+       lock_buffer(bhs[0]);
        /* filling the new directory slots ("." and ".." entries) */
        memcpy(de[0].name, MSDOS_DOT, MSDOS_NAME);
        memcpy(de[1].name, MSDOS_DOTDOT, MSDOS_NAME);
@@ -1180,6 +1183,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts)
        de[0].size = de[1].size = 0;
        memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de));
        set_buffer_uptodate(bhs[0]);
+       unlock_buffer(bhs[0]);
        mark_buffer_dirty_inode(bhs[0], dir);
 
        err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE);
@@ -1237,11 +1241,14 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
 
                        /* fill the directory entry */
                        copy = min(size, sb->s_blocksize);
+                       /* Avoid race with userspace read via bdev */
+                       lock_buffer(bhs[n]);
                        memcpy(bhs[n]->b_data, slots, copy);
-                       slots += copy;
-                       size -= copy;
                        set_buffer_uptodate(bhs[n]);
+                       unlock_buffer(bhs[n]);
                        mark_buffer_dirty_inode(bhs[n], dir);
+                       slots += copy;
+                       size -= copy;
                        if (!size)
                                break;
                        n++;
index 2659836..3647c65 100644 (file)
@@ -388,8 +388,11 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs,
                                err = -ENOMEM;
                                goto error;
                        }
+                       /* Avoid race with userspace read via bdev */
+                       lock_buffer(c_bh);
                        memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize);
                        set_buffer_uptodate(c_bh);
+                       unlock_buffer(c_bh);
                        mark_buffer_dirty_inode(c_bh, sbi->fat_inode);
                        if (sb->s_flags & SB_SYNCHRONOUS)
                                err = sync_dirty_buffer(c_bh);
index 0ee7274..01263ff 100644 (file)
@@ -246,7 +246,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
  * sys_open_by_handle_at: Open the file handle
  * @mountdirfd: directory file descriptor
  * @handle: file handle to be opened
- * @flag: open flags.
+ * @flags: open flags.
  *
  * @mountdirfd indicate the directory file descriptor
  * of the mount point. file handle is decoded relative
index b07b53f..30d55c9 100644 (file)
@@ -327,6 +327,7 @@ void flush_delayed_fput(void)
 {
        delayed_fput(NULL);
 }
+EXPORT_SYMBOL_GPL(flush_delayed_fput);
 
 static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
 
index 8aaa7ee..8461a63 100644 (file)
@@ -164,8 +164,13 @@ static void finish_writeback_work(struct bdi_writeback *wb,
 
        if (work->auto_free)
                kfree(work);
-       if (done && atomic_dec_and_test(&done->cnt))
-               wake_up_all(done->waitq);
+       if (done) {
+               wait_queue_head_t *waitq = done->waitq;
+
+               /* @done can't be accessed after the following dec */
+               if (atomic_dec_and_test(&done->cnt))
+                       wake_up_all(waitq);
+       }
 }
 
 static void wb_queue_work(struct bdi_writeback *wb,
@@ -900,7 +905,7 @@ restart:
  * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
  * @bdi_id: target bdi id
  * @memcg_id: target memcg css id
- * @nr_pages: number of pages to write, 0 for best-effort dirty flushing
+ * @nr: number of pages to write, 0 for best-effort dirty flushing
  * @reason: reason why some writeback work initiated
  * @done: target wb_completion
  *
index 87c2c96..138b5b4 100644 (file)
@@ -504,7 +504,6 @@ void put_fs_context(struct fs_context *fc)
        put_net(fc->net_ns);
        put_user_ns(fc->user_ns);
        put_cred(fc->cred);
-       kfree(fc->subtype);
        put_fc_log(fc);
        put_filesystem(fc->fs_type);
        kfree(fc->source);
@@ -571,17 +570,6 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
                return 0;
        }
 
-       if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) &&
-           strcmp(param->key, "subtype") == 0) {
-               if (param->type != fs_value_is_string)
-                       return invalf(fc, "VFS: Legacy: Non-string subtype");
-               if (fc->subtype)
-                       return invalf(fc, "VFS: Legacy: Multiple subtype");
-               fc->subtype = param->string;
-               param->string = NULL;
-               return 0;
-       }
-
        if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS)
                return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options");
 
@@ -738,8 +726,6 @@ void vfs_clean_context(struct fs_context *fc)
        fc->s_fs_info = NULL;
        fc->sb_flags = 0;
        security_free_mnt_opts(&fc->security);
-       kfree(fc->subtype);
-       fc->subtype = NULL;
        kfree(fc->source);
        fc->source = NULL;
 
index 24fc5a5..0635cba 100644 (file)
@@ -27,3 +27,14 @@ config CUSE
 
          If you want to develop or use a userspace character device
          based on CUSE, answer Y or M.
+
+config VIRTIO_FS
+       tristate "Virtio Filesystem"
+       depends on FUSE_FS
+       select VIRTIO
+       help
+         The Virtio Filesystem allows guests to mount file systems from the
+          host.
+
+         If you want to share files between guests or with the host, answer Y
+          or M.
index 9485019..3e8cebf 100644 (file)
@@ -5,5 +5,7 @@
 
 obj-$(CONFIG_FUSE_FS) += fuse.o
 obj-$(CONFIG_CUSE) += cuse.o
+obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
 
 fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
+virtiofs-y += virtio_fs.o
index bab7a0d..00015d8 100644 (file)
@@ -142,11 +142,10 @@ static int cuse_open(struct inode *inode, struct file *file)
 
 static int cuse_release(struct inode *inode, struct file *file)
 {
-       struct fuse_inode *fi = get_fuse_inode(inode);
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = ff->fc;
 
-       fuse_sync_release(fi, ff, file->f_flags);
+       fuse_sync_release(NULL, ff, file->f_flags);
        fuse_conn_put(fc);
 
        return 0;
@@ -299,6 +298,14 @@ static void cuse_gendev_release(struct device *dev)
        kfree(dev);
 }
 
+struct cuse_init_args {
+       struct fuse_args_pages ap;
+       struct cuse_init_in in;
+       struct cuse_init_out out;
+       struct page *page;
+       struct fuse_page_desc desc;
+};
+
 /**
  * cuse_process_init_reply - finish initializing CUSE channel
  *
@@ -306,21 +313,22 @@ static void cuse_gendev_release(struct device *dev)
  * required data structures for it.  Please read the comment at the
  * top of this file for high level overview.
  */
-static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+static void cuse_process_init_reply(struct fuse_conn *fc,
+                                   struct fuse_args *args, int error)
 {
+       struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args);
+       struct fuse_args_pages *ap = &ia->ap;
        struct cuse_conn *cc = fc_to_cc(fc), *pos;
-       struct cuse_init_out *arg = req->out.args[0].value;
-       struct page *page = req->pages[0];
+       struct cuse_init_out *arg = &ia->out;
+       struct page *page = ap->pages[0];
        struct cuse_devinfo devinfo = { };
        struct device *dev;
        struct cdev *cdev;
        dev_t devt;
        int rc, i;
 
-       if (req->out.h.error ||
-           arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
+       if (error || arg->major != FUSE_KERNEL_VERSION || arg->minor < 11)
                goto err;
-       }
 
        fc->minor = arg->minor;
        fc->max_read = max_t(unsigned, arg->max_read, 4096);
@@ -329,7 +337,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
        /* parse init reply */
        cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
 
-       rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size,
+       rc = cuse_parse_devinfo(page_address(page), ap->args.out_args[1].size,
                                &devinfo);
        if (rc)
                goto err;
@@ -396,7 +404,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
        dev_set_uevent_suppress(dev, 0);
        kobject_uevent(&dev->kobj, KOBJ_ADD);
 out:
-       kfree(arg);
+       kfree(ia);
        __free_page(page);
        return;
 
@@ -415,55 +423,49 @@ err:
 static int cuse_send_init(struct cuse_conn *cc)
 {
        int rc;
-       struct fuse_req *req;
        struct page *page;
        struct fuse_conn *fc = &cc->fc;
-       struct cuse_init_in *arg;
-       void *outarg;
+       struct cuse_init_args *ia;
+       struct fuse_args_pages *ap;
 
        BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
 
-       req = fuse_get_req_for_background(fc, 1);
-       if (IS_ERR(req)) {
-               rc = PTR_ERR(req);
-               goto err;
-       }
-
        rc = -ENOMEM;
        page = alloc_page(GFP_KERNEL | __GFP_ZERO);
        if (!page)
-               goto err_put_req;
+               goto err;
 
-       outarg = kzalloc(sizeof(struct cuse_init_out), GFP_KERNEL);
-       if (!outarg)
+       ia = kzalloc(sizeof(*ia), GFP_KERNEL);
+       if (!ia)
                goto err_free_page;
 
-       arg = &req->misc.cuse_init_in;
-       arg->major = FUSE_KERNEL_VERSION;
-       arg->minor = FUSE_KERNEL_MINOR_VERSION;
-       arg->flags |= CUSE_UNRESTRICTED_IOCTL;
-       req->in.h.opcode = CUSE_INIT;
-       req->in.numargs = 1;
-       req->in.args[0].size = sizeof(struct cuse_init_in);
-       req->in.args[0].value = arg;
-       req->out.numargs = 2;
-       req->out.args[0].size = sizeof(struct cuse_init_out);
-       req->out.args[0].value = outarg;
-       req->out.args[1].size = CUSE_INIT_INFO_MAX;
-       req->out.argvar = 1;
-       req->out.argpages = 1;
-       req->pages[0] = page;
-       req->page_descs[0].length = req->out.args[1].size;
-       req->num_pages = 1;
-       req->end = cuse_process_init_reply;
-       fuse_request_send_background(fc, req);
-
-       return 0;
-
+       ap = &ia->ap;
+       ia->in.major = FUSE_KERNEL_VERSION;
+       ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
+       ia->in.flags |= CUSE_UNRESTRICTED_IOCTL;
+       ap->args.opcode = CUSE_INIT;
+       ap->args.in_numargs = 1;
+       ap->args.in_args[0].size = sizeof(ia->in);
+       ap->args.in_args[0].value = &ia->in;
+       ap->args.out_numargs = 2;
+       ap->args.out_args[0].size = sizeof(ia->out);
+       ap->args.out_args[0].value = &ia->out;
+       ap->args.out_args[1].size = CUSE_INIT_INFO_MAX;
+       ap->args.out_argvar = 1;
+       ap->args.out_pages = 1;
+       ap->num_pages = 1;
+       ap->pages = &ia->page;
+       ap->descs = &ia->desc;
+       ia->page = page;
+       ia->desc.length = ap->args.out_args[1].size;
+       ap->args.end = cuse_process_init_reply;
+
+       rc = fuse_simple_background(fc, &ap->args, GFP_KERNEL);
+       if (rc) {
+               kfree(ia);
 err_free_page:
-       __free_page(page);
-err_put_req:
-       fuse_put_request(fc, req);
+               __free_page(page);
+       }
 err:
        return rc;
 }
@@ -504,9 +506,9 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
         * Limit the cuse channel to requests that can
         * be represented in file->f_cred->user_ns.
         */
-       fuse_conn_init(&cc->fc, file->f_cred->user_ns);
+       fuse_conn_init(&cc->fc, file->f_cred->user_ns, &fuse_dev_fiq_ops, NULL);
 
-       fud = fuse_dev_alloc(&cc->fc);
+       fud = fuse_dev_alloc_install(&cc->fc);
        if (!fud) {
                kfree(cc);
                return -ENOMEM;
@@ -519,6 +521,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
        rc = cuse_send_init(cc);
        if (rc) {
                fuse_dev_free(fud);
+               fuse_conn_put(&cc->fc);
                return rc;
        }
        file->private_data = fud;
index ea82375..ed1abc9 100644 (file)
@@ -40,107 +40,30 @@ static struct fuse_dev *fuse_get_dev(struct file *file)
        return READ_ONCE(file->private_data);
 }
 
-static void fuse_request_init(struct fuse_req *req, struct page **pages,
-                             struct fuse_page_desc *page_descs,
-                             unsigned npages)
+static void fuse_request_init(struct fuse_req *req)
 {
        INIT_LIST_HEAD(&req->list);
        INIT_LIST_HEAD(&req->intr_entry);
        init_waitqueue_head(&req->waitq);
        refcount_set(&req->count, 1);
-       req->pages = pages;
-       req->page_descs = page_descs;
-       req->max_pages = npages;
        __set_bit(FR_PENDING, &req->flags);
 }
 
-static struct page **fuse_req_pages_alloc(unsigned int npages, gfp_t flags,
-                                         struct fuse_page_desc **desc)
-{
-       struct page **pages;
-
-       pages = kzalloc(npages * (sizeof(struct page *) +
-                                 sizeof(struct fuse_page_desc)), flags);
-       *desc = (void *) pages + npages * sizeof(struct page *);
-
-       return pages;
-}
-
-static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
+static struct fuse_req *fuse_request_alloc(gfp_t flags)
 {
        struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
-       if (req) {
-               struct page **pages = NULL;
-               struct fuse_page_desc *page_descs = NULL;
-
-               WARN_ON(npages > FUSE_MAX_MAX_PAGES);
-               if (npages > FUSE_REQ_INLINE_PAGES) {
-                       pages = fuse_req_pages_alloc(npages, flags,
-                                                    &page_descs);
-                       if (!pages) {
-                               kmem_cache_free(fuse_req_cachep, req);
-                               return NULL;
-                       }
-               } else if (npages) {
-                       pages = req->inline_pages;
-                       page_descs = req->inline_page_descs;
-               }
+       if (req)
+               fuse_request_init(req);
 
-               fuse_request_init(req, pages, page_descs, npages);
-       }
        return req;
 }
 
-struct fuse_req *fuse_request_alloc(unsigned npages)
-{
-       return __fuse_request_alloc(npages, GFP_KERNEL);
-}
-EXPORT_SYMBOL_GPL(fuse_request_alloc);
-
-struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
-{
-       return __fuse_request_alloc(npages, GFP_NOFS);
-}
-
-static void fuse_req_pages_free(struct fuse_req *req)
-{
-       if (req->pages != req->inline_pages)
-               kfree(req->pages);
-}
-
-bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req,
-                           gfp_t flags)
-{
-       struct page **pages;
-       struct fuse_page_desc *page_descs;
-       unsigned int npages = min_t(unsigned int,
-                                   max_t(unsigned int, req->max_pages * 2,
-                                         FUSE_DEFAULT_MAX_PAGES_PER_REQ),
-                                   fc->max_pages);
-       WARN_ON(npages <= req->max_pages);
-
-       pages = fuse_req_pages_alloc(npages, flags, &page_descs);
-       if (!pages)
-               return false;
-
-       memcpy(pages, req->pages, sizeof(struct page *) * req->max_pages);
-       memcpy(page_descs, req->page_descs,
-              sizeof(struct fuse_page_desc) * req->max_pages);
-       fuse_req_pages_free(req);
-       req->pages = pages;
-       req->page_descs = page_descs;
-       req->max_pages = npages;
-
-       return true;
-}
-
-void fuse_request_free(struct fuse_req *req)
+static void fuse_request_free(struct fuse_req *req)
 {
-       fuse_req_pages_free(req);
        kmem_cache_free(fuse_req_cachep, req);
 }
 
-void __fuse_get_request(struct fuse_req *req)
+static void __fuse_get_request(struct fuse_req *req)
 {
        refcount_inc(&req->count);
 }
@@ -177,8 +100,9 @@ static void fuse_drop_waiting(struct fuse_conn *fc)
        }
 }
 
-static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
-                                      bool for_background)
+static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
+
+static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background)
 {
        struct fuse_req *req;
        int err;
@@ -201,7 +125,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
        if (fc->conn_error)
                goto out;
 
-       req = fuse_request_alloc(npages);
+       req = fuse_request_alloc(GFP_KERNEL);
        err = -ENOMEM;
        if (!req) {
                if (for_background)
@@ -229,101 +153,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
        return ERR_PTR(err);
 }
 
-struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
-{
-       return __fuse_get_req(fc, npages, false);
-}
-EXPORT_SYMBOL_GPL(fuse_get_req);
-
-struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
-                                            unsigned npages)
-{
-       return __fuse_get_req(fc, npages, true);
-}
-EXPORT_SYMBOL_GPL(fuse_get_req_for_background);
-
-/*
- * Return request in fuse_file->reserved_req.  However that may
- * currently be in use.  If that is the case, wait for it to become
- * available.
- */
-static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
-                                        struct file *file)
-{
-       struct fuse_req *req = NULL;
-       struct fuse_inode *fi = get_fuse_inode(file_inode(file));
-       struct fuse_file *ff = file->private_data;
-
-       do {
-               wait_event(fc->reserved_req_waitq, ff->reserved_req);
-               spin_lock(&fi->lock);
-               if (ff->reserved_req) {
-                       req = ff->reserved_req;
-                       ff->reserved_req = NULL;
-                       req->stolen_file = get_file(file);
-               }
-               spin_unlock(&fi->lock);
-       } while (!req);
-
-       return req;
-}
-
-/*
- * Put stolen request back into fuse_file->reserved_req
- */
-static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
-{
-       struct file *file = req->stolen_file;
-       struct fuse_inode *fi = get_fuse_inode(file_inode(file));
-       struct fuse_file *ff = file->private_data;
-
-       WARN_ON(req->max_pages);
-       spin_lock(&fi->lock);
-       memset(req, 0, sizeof(*req));
-       fuse_request_init(req, NULL, NULL, 0);
-       BUG_ON(ff->reserved_req);
-       ff->reserved_req = req;
-       wake_up_all(&fc->reserved_req_waitq);
-       spin_unlock(&fi->lock);
-       fput(file);
-}
-
-/*
- * Gets a requests for a file operation, always succeeds
- *
- * This is used for sending the FLUSH request, which must get to
- * userspace, due to POSIX locks which may need to be unlocked.
- *
- * If allocation fails due to OOM, use the reserved request in
- * fuse_file.
- *
- * This is very unlikely to deadlock accidentally, since the
- * filesystem should not have it's own file open.  If deadlock is
- * intentional, it can still be broken by "aborting" the filesystem.
- */
-struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
-                                            struct file *file)
-{
-       struct fuse_req *req;
-
-       atomic_inc(&fc->num_waiting);
-       wait_event(fc->blocked_waitq, fc->initialized);
-       /* Matches smp_wmb() in fuse_set_initialized() */
-       smp_rmb();
-       req = fuse_request_alloc(0);
-       if (!req)
-               req = get_reserved_req(fc, file);
-
-       req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
-       req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
-       req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
-
-       __set_bit(FR_WAITING, &req->flags);
-       __clear_bit(FR_BACKGROUND, &req->flags);
-       return req;
-}
-
-void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 {
        if (refcount_dec_and_test(&req->count)) {
                if (test_bit(FR_BACKGROUND, &req->flags)) {
@@ -342,15 +172,11 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
                        fuse_drop_waiting(fc);
                }
 
-               if (req->stolen_file)
-                       put_reserved_req(fc, req);
-               else
-                       fuse_request_free(req);
+               fuse_request_free(req);
        }
 }
-EXPORT_SYMBOL_GPL(fuse_put_request);
 
-static unsigned len_args(unsigned numargs, struct fuse_arg *args)
+unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
 {
        unsigned nbytes = 0;
        unsigned i;
@@ -360,25 +186,47 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 
        return nbytes;
 }
+EXPORT_SYMBOL_GPL(fuse_len_args);
 
-static u64 fuse_get_unique(struct fuse_iqueue *fiq)
+u64 fuse_get_unique(struct fuse_iqueue *fiq)
 {
        fiq->reqctr += FUSE_REQ_ID_STEP;
        return fiq->reqctr;
 }
+EXPORT_SYMBOL_GPL(fuse_get_unique);
 
 static unsigned int fuse_req_hash(u64 unique)
 {
        return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
 }
 
-static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
+/**
+ * A new request is available, wake fiq->waitq
+ */
+static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq)
+__releases(fiq->lock)
+{
+       wake_up(&fiq->waitq);
+       kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
+       spin_unlock(&fiq->lock);
+}
+
+const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
+       .wake_forget_and_unlock         = fuse_dev_wake_and_unlock,
+       .wake_interrupt_and_unlock      = fuse_dev_wake_and_unlock,
+       .wake_pending_and_unlock        = fuse_dev_wake_and_unlock,
+};
+EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
+
+static void queue_request_and_unlock(struct fuse_iqueue *fiq,
+                                    struct fuse_req *req)
+__releases(fiq->lock)
 {
        req->in.h.len = sizeof(struct fuse_in_header) +
-               len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
+               fuse_len_args(req->args->in_numargs,
+                             (struct fuse_arg *) req->args->in_args);
        list_add_tail(&req->list, &fiq->pending);
-       wake_up_locked(&fiq->waitq);
-       kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
+       fiq->ops->wake_pending_and_unlock(fiq);
 }
 
 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
@@ -389,16 +237,15 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
        forget->forget_one.nodeid = nodeid;
        forget->forget_one.nlookup = nlookup;
 
-       spin_lock(&fiq->waitq.lock);
+       spin_lock(&fiq->lock);
        if (fiq->connected) {
                fiq->forget_list_tail->next = forget;
                fiq->forget_list_tail = forget;
-               wake_up_locked(&fiq->waitq);
-               kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
+               fiq->ops->wake_forget_and_unlock(fiq);
        } else {
                kfree(forget);
+               spin_unlock(&fiq->lock);
        }
-       spin_unlock(&fiq->waitq.lock);
 }
 
 static void flush_bg_queue(struct fuse_conn *fc)
@@ -412,10 +259,9 @@ static void flush_bg_queue(struct fuse_conn *fc)
                req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
                list_del(&req->list);
                fc->active_background++;
-               spin_lock(&fiq->waitq.lock);
+               spin_lock(&fiq->lock);
                req->in.h.unique = fuse_get_unique(fiq);
-               queue_request(fiq, req);
-               spin_unlock(&fiq->waitq.lock);
+               queue_request_and_unlock(fiq, req);
        }
 }
 
@@ -427,21 +273,24 @@ static void flush_bg_queue(struct fuse_conn *fc)
  * the 'end' callback is called if given, else the reference to the
  * request is released
  */
-static void request_end(struct fuse_conn *fc, struct fuse_req *req)
+void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
 {
        struct fuse_iqueue *fiq = &fc->iq;
+       bool async;
 
        if (test_and_set_bit(FR_FINISHED, &req->flags))
                goto put_request;
+
+       async = req->args->end;
        /*
         * test_and_set_bit() implies smp_mb() between bit
         * changing and below intr_entry check. Pairs with
         * smp_mb() from queue_interrupt().
         */
        if (!list_empty(&req->intr_entry)) {
-               spin_lock(&fiq->waitq.lock);
+               spin_lock(&fiq->lock);
                list_del_init(&req->intr_entry);
-               spin_unlock(&fiq->waitq.lock);
+               spin_unlock(&fiq->lock);
        }
        WARN_ON(test_bit(FR_PENDING, &req->flags));
        WARN_ON(test_bit(FR_SENT, &req->flags));
@@ -475,18 +324,19 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
                wake_up(&req->waitq);
        }
 
-       if (req->end)
-               req->end(fc, req);
+       if (async)
+               req->args->end(fc, req->args, req->out.h.error);
 put_request:
        fuse_put_request(fc, req);
 }
+EXPORT_SYMBOL_GPL(fuse_request_end);
 
 static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
 {
-       spin_lock(&fiq->waitq.lock);
+       spin_lock(&fiq->lock);
        /* Check for we've sent request to interrupt this req */
        if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
-               spin_unlock(&fiq->waitq.lock);
+               spin_unlock(&fiq->lock);
                return -EINVAL;
        }
 
@@ -499,13 +349,13 @@ static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
                smp_mb();
                if (test_bit(FR_FINISHED, &req->flags)) {
                        list_del_init(&req->intr_entry);
-                       spin_unlock(&fiq->waitq.lock);
+                       spin_unlock(&fiq->lock);
                        return 0;
                }
-               wake_up_locked(&fiq->waitq);
-               kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
+               fiq->ops->wake_interrupt_and_unlock(fiq);
+       } else {
+               spin_unlock(&fiq->lock);
        }
-       spin_unlock(&fiq->waitq.lock);
        return 0;
 }
 
@@ -535,16 +385,16 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
                if (!err)
                        return;
 
-               spin_lock(&fiq->waitq.lock);
+               spin_lock(&fiq->lock);
                /* Request is not yet in userspace, bail out */
                if (test_bit(FR_PENDING, &req->flags)) {
                        list_del(&req->list);
-                       spin_unlock(&fiq->waitq.lock);
+                       spin_unlock(&fiq->lock);
                        __fuse_put_request(req);
                        req->out.h.error = -EINTR;
                        return;
                }
-               spin_unlock(&fiq->waitq.lock);
+               spin_unlock(&fiq->lock);
        }
 
        /*
@@ -559,101 +409,110 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
        struct fuse_iqueue *fiq = &fc->iq;
 
        BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
-       spin_lock(&fiq->waitq.lock);
+       spin_lock(&fiq->lock);
        if (!fiq->connected) {
-               spin_unlock(&fiq->waitq.lock);
+               spin_unlock(&fiq->lock);
                req->out.h.error = -ENOTCONN;
        } else {
                req->in.h.unique = fuse_get_unique(fiq);
-               queue_request(fiq, req);
                /* acquire extra reference, since request is still needed
-                  after request_end() */
+                  after fuse_request_end() */
                __fuse_get_request(req);
-               spin_unlock(&fiq->waitq.lock);
+               queue_request_and_unlock(fiq, req);
 
                request_wait_answer(fc, req);
-               /* Pairs with smp_wmb() in request_end() */
+               /* Pairs with smp_wmb() in fuse_request_end() */
                smp_rmb();
        }
 }
 
-void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
-{
-       __set_bit(FR_ISREPLY, &req->flags);
-       if (!test_bit(FR_WAITING, &req->flags)) {
-               __set_bit(FR_WAITING, &req->flags);
-               atomic_inc(&fc->num_waiting);
-       }
-       __fuse_request_send(fc, req);
-}
-EXPORT_SYMBOL_GPL(fuse_request_send);
-
 static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
 {
-       if (fc->minor < 4 && args->in.h.opcode == FUSE_STATFS)
-               args->out.args[0].size = FUSE_COMPAT_STATFS_SIZE;
+       if (fc->minor < 4 && args->opcode == FUSE_STATFS)
+               args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE;
 
        if (fc->minor < 9) {
-               switch (args->in.h.opcode) {
+               switch (args->opcode) {
                case FUSE_LOOKUP:
                case FUSE_CREATE:
                case FUSE_MKNOD:
                case FUSE_MKDIR:
                case FUSE_SYMLINK:
                case FUSE_LINK:
-                       args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+                       args->out_args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
                        break;
                case FUSE_GETATTR:
                case FUSE_SETATTR:
-                       args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+                       args->out_args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
                        break;
                }
        }
        if (fc->minor < 12) {
-               switch (args->in.h.opcode) {
+               switch (args->opcode) {
                case FUSE_CREATE:
-                       args->in.args[0].size = sizeof(struct fuse_open_in);
+                       args->in_args[0].size = sizeof(struct fuse_open_in);
                        break;
                case FUSE_MKNOD:
-                       args->in.args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
+                       args->in_args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
                        break;
                }
        }
 }
 
+static void fuse_force_creds(struct fuse_conn *fc, struct fuse_req *req)
+{
+       req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
+       req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
+       req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
+}
+
+static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
+{
+       req->in.h.opcode = args->opcode;
+       req->in.h.nodeid = args->nodeid;
+       req->args = args;
+}
+
 ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
 {
        struct fuse_req *req;
        ssize_t ret;
 
-       req = fuse_get_req(fc, 0);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
+       if (args->force) {
+               atomic_inc(&fc->num_waiting);
+               req = fuse_request_alloc(GFP_KERNEL | __GFP_NOFAIL);
+
+               if (!args->nocreds)
+                       fuse_force_creds(fc, req);
+
+               __set_bit(FR_WAITING, &req->flags);
+               __set_bit(FR_FORCE, &req->flags);
+       } else {
+               WARN_ON(args->nocreds);
+               req = fuse_get_req(fc, false);
+               if (IS_ERR(req))
+                       return PTR_ERR(req);
+       }
 
        /* Needs to be done after fuse_get_req() so that fc->minor is valid */
        fuse_adjust_compat(fc, args);
+       fuse_args_to_req(req, args);
 
-       req->in.h.opcode = args->in.h.opcode;
-       req->in.h.nodeid = args->in.h.nodeid;
-       req->in.numargs = args->in.numargs;
-       memcpy(req->in.args, args->in.args,
-              args->in.numargs * sizeof(struct fuse_in_arg));
-       req->out.argvar = args->out.argvar;
-       req->out.numargs = args->out.numargs;
-       memcpy(req->out.args, args->out.args,
-              args->out.numargs * sizeof(struct fuse_arg));
-       fuse_request_send(fc, req);
+       if (!args->noreply)
+               __set_bit(FR_ISREPLY, &req->flags);
+       __fuse_request_send(fc, req);
        ret = req->out.h.error;
-       if (!ret && args->out.argvar) {
-               BUG_ON(args->out.numargs != 1);
-               ret = req->out.args[0].size;
+       if (!ret && args->out_argvar) {
+               BUG_ON(args->out_numargs == 0);
+               ret = args->out_args[args->out_numargs - 1].size;
        }
        fuse_put_request(fc, req);
 
        return ret;
 }
 
-bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req)
+static bool fuse_request_queue_background(struct fuse_conn *fc,
+                                         struct fuse_req *req)
 {
        bool queued = false;
 
@@ -681,56 +540,63 @@ bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req)
        return queued;
 }
 
-void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
+int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args,
+                           gfp_t gfp_flags)
 {
-       WARN_ON(!req->end);
+       struct fuse_req *req;
+
+       if (args->force) {
+               WARN_ON(!args->nocreds);
+               req = fuse_request_alloc(gfp_flags);
+               if (!req)
+                       return -ENOMEM;
+               __set_bit(FR_BACKGROUND, &req->flags);
+       } else {
+               WARN_ON(args->nocreds);
+               req = fuse_get_req(fc, true);
+               if (IS_ERR(req))
+                       return PTR_ERR(req);
+       }
+
+       fuse_args_to_req(req, args);
+
        if (!fuse_request_queue_background(fc, req)) {
-               req->out.h.error = -ENOTCONN;
-               req->end(fc, req);
                fuse_put_request(fc, req);
+               return -ENOTCONN;
        }
+
+       return 0;
 }
-EXPORT_SYMBOL_GPL(fuse_request_send_background);
+EXPORT_SYMBOL_GPL(fuse_simple_background);
 
-static int fuse_request_send_notify_reply(struct fuse_conn *fc,
-                                         struct fuse_req *req, u64 unique)
+static int fuse_simple_notify_reply(struct fuse_conn *fc,
+                                   struct fuse_args *args, u64 unique)
 {
-       int err = -ENODEV;
+       struct fuse_req *req;
        struct fuse_iqueue *fiq = &fc->iq;
+       int err = 0;
+
+       req = fuse_get_req(fc, false);
+       if (IS_ERR(req))
+               return PTR_ERR(req);
 
        __clear_bit(FR_ISREPLY, &req->flags);
        req->in.h.unique = unique;
-       spin_lock(&fiq->waitq.lock);
+
+       fuse_args_to_req(req, args);
+
+       spin_lock(&fiq->lock);
        if (fiq->connected) {
-               queue_request(fiq, req);
-               err = 0;
+               queue_request_and_unlock(fiq, req);
+       } else {
+               err = -ENODEV;
+               spin_unlock(&fiq->lock);
+               fuse_put_request(fc, req);
        }
-       spin_unlock(&fiq->waitq.lock);
 
        return err;
 }
 
-void fuse_force_forget(struct file *file, u64 nodeid)
-{
-       struct inode *inode = file_inode(file);
-       struct fuse_conn *fc = get_fuse_conn(inode);
-       struct fuse_req *req;
-       struct fuse_forget_in inarg;
-
-       memset(&inarg, 0, sizeof(inarg));
-       inarg.nlookup = 1;
-       req = fuse_get_req_nofail_nopages(fc, file);
-       req->in.h.opcode = FUSE_FORGET;
-       req->in.h.nodeid = nodeid;
-       req->in.numargs = 1;
-       req->in.args[0].size = sizeof(inarg);
-       req->in.args[0].value = &inarg;
-       __clear_bit(FR_ISREPLY, &req->flags);
-       __fuse_request_send(fc, req);
-       /* ignore errors */
-       fuse_put_request(fc, req);
-}
-
 /*
  * Lock the request.  Up to the next unlock_request() there mustn't be
  * anything that could cause a page-fault.  If the request was already
@@ -1084,14 +950,15 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
 {
        unsigned i;
        struct fuse_req *req = cs->req;
+       struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
+
 
-       for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
+       for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) {
                int err;
-               unsigned offset = req->page_descs[i].offset;
-               unsigned count = min(nbytes, req->page_descs[i].length);
+               unsigned int offset = ap->descs[i].offset;
+               unsigned int count = min(nbytes, ap->descs[i].length);
 
-               err = fuse_copy_page(cs, &req->pages[i], offset, count,
-                                    zeroing);
+               err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing);
                if (err)
                        return err;
 
@@ -1149,12 +1016,12 @@ static int request_pending(struct fuse_iqueue *fiq)
  * Unlike other requests this is assembled on demand, without a need
  * to allocate a separate fuse_req structure.
  *
- * Called with fiq->waitq.lock held, releases it
+ * Called with fiq->lock held, releases it
  */
 static int fuse_read_interrupt(struct fuse_iqueue *fiq,
                               struct fuse_copy_state *cs,
                               size_t nbytes, struct fuse_req *req)
-__releases(fiq->waitq.lock)
+__releases(fiq->lock)
 {
        struct fuse_in_header ih;
        struct fuse_interrupt_in arg;
@@ -1169,7 +1036,7 @@ __releases(fiq->waitq.lock)
        ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT);
        arg.unique = req->in.h.unique;
 
-       spin_unlock(&fiq->waitq.lock);
+       spin_unlock(&fiq->lock);
        if (nbytes < reqsize)
                return -EINVAL;
 
@@ -1181,9 +1048,9 @@ __releases(fiq->waitq.lock)
        return err ? err : reqsize;
 }
 
-static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq,
-                                              unsigned max,
-                                              unsigned *countp)
+struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
+                                            unsigned int max,
+                                            unsigned int *countp)
 {
        struct fuse_forget_link *head = fiq->forget_list_head.next;
        struct fuse_forget_link **newhead = &head;
@@ -1202,14 +1069,15 @@ static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq,
 
        return head;
 }
+EXPORT_SYMBOL(fuse_dequeue_forget);
 
 static int fuse_read_single_forget(struct fuse_iqueue *fiq,
                                   struct fuse_copy_state *cs,
                                   size_t nbytes)
-__releases(fiq->waitq.lock)
+__releases(fiq->lock)
 {
        int err;
-       struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL);
+       struct fuse_forget_link *forget = fuse_dequeue_forget(fiq, 1, NULL);
        struct fuse_forget_in arg = {
                .nlookup = forget->forget_one.nlookup,
        };
@@ -1220,7 +1088,7 @@ __releases(fiq->waitq.lock)
                .len = sizeof(ih) + sizeof(arg),
        };
 
-       spin_unlock(&fiq->waitq.lock);
+       spin_unlock(&fiq->lock);
        kfree(forget);
        if (nbytes < ih.len)
                return -EINVAL;
@@ -1238,7 +1106,7 @@ __releases(fiq->waitq.lock)
 
 static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
                                   struct fuse_copy_state *cs, size_t nbytes)
-__releases(fiq->waitq.lock)
+__releases(fiq->lock)
 {
        int err;
        unsigned max_forgets;
@@ -1252,13 +1120,13 @@ __releases(fiq->waitq.lock)
        };
 
        if (nbytes < ih.len) {
-               spin_unlock(&fiq->waitq.lock);
+               spin_unlock(&fiq->lock);
                return -EINVAL;
        }
 
        max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
-       head = dequeue_forget(fiq, max_forgets, &count);
-       spin_unlock(&fiq->waitq.lock);
+       head = fuse_dequeue_forget(fiq, max_forgets, &count);
+       spin_unlock(&fiq->lock);
 
        arg.count = count;
        ih.len += count * sizeof(struct fuse_forget_one);
@@ -1288,7 +1156,7 @@ __releases(fiq->waitq.lock)
 static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
                            struct fuse_copy_state *cs,
                            size_t nbytes)
-__releases(fiq->waitq.lock)
+__releases(fiq->lock)
 {
        if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
                return fuse_read_single_forget(fiq, cs, nbytes);
@@ -1302,7 +1170,7 @@ __releases(fiq->waitq.lock)
  * the pending list and copies request data to userspace buffer.  If
  * no reply is needed (FORGET) or request has been aborted or there
  * was an error during the copying then it's finished by calling
- * request_end().  Otherwise add it to the processing list, and set
+ * fuse_request_end().  Otherwise add it to the processing list, and set
  * the 'sent' flag.
  */
 static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
@@ -1313,21 +1181,42 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
        struct fuse_iqueue *fiq = &fc->iq;
        struct fuse_pqueue *fpq = &fud->pq;
        struct fuse_req *req;
-       struct fuse_in *in;
+       struct fuse_args *args;
        unsigned reqsize;
        unsigned int hash;
 
+       /*
+        * Require sane minimum read buffer - that has capacity for fixed part
+        * of any request header + negotiated max_write room for data.
+        *
+        * Historically libfuse reserves 4K for fixed header room, but e.g.
+        * GlusterFS reserves only 80 bytes
+        *
+        *      = `sizeof(fuse_in_header) + sizeof(fuse_write_in)`
+        *
+        * which is the absolute minimum any sane filesystem should be using
+        * for header room.
+        */
+       if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER,
+                          sizeof(struct fuse_in_header) +
+                          sizeof(struct fuse_write_in) +
+                          fc->max_write))
+               return -EINVAL;
+
  restart:
-       spin_lock(&fiq->waitq.lock);
-       err = -EAGAIN;
-       if ((file->f_flags & O_NONBLOCK) && fiq->connected &&
-           !request_pending(fiq))
-               goto err_unlock;
+       for (;;) {
+               spin_lock(&fiq->lock);
+               if (!fiq->connected || request_pending(fiq))
+                       break;
+               spin_unlock(&fiq->lock);
 
-       err = wait_event_interruptible_exclusive_locked(fiq->waitq,
+               if (file->f_flags & O_NONBLOCK)
+                       return -EAGAIN;
+               err = wait_event_interruptible_exclusive(fiq->waitq,
                                !fiq->connected || request_pending(fiq));
-       if (err)
-               goto err_unlock;
+               if (err)
+                       return err;
+       }
 
        if (!fiq->connected) {
                err = fc->aborted ? -ECONNABORTED : -ENODEV;
@@ -1351,28 +1240,28 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
        req = list_entry(fiq->pending.next, struct fuse_req, list);
        clear_bit(FR_PENDING, &req->flags);
        list_del_init(&req->list);
-       spin_unlock(&fiq->waitq.lock);
+       spin_unlock(&fiq->lock);
 
-       in = &req->in;
-       reqsize = in->h.len;
+       args = req->args;
+       reqsize = req->in.h.len;
 
        /* If request is too large, reply with an error and restart the read */
        if (nbytes < reqsize) {
                req->out.h.error = -EIO;
                /* SETXATTR is special, since it may contain too large data */
-               if (in->h.opcode == FUSE_SETXATTR)
+               if (args->opcode == FUSE_SETXATTR)
                        req->out.h.error = -E2BIG;
-               request_end(fc, req);
+               fuse_request_end(fc, req);
                goto restart;
        }
        spin_lock(&fpq->lock);
        list_add(&req->list, &fpq->io);
        spin_unlock(&fpq->lock);
        cs->req = req;
-       err = fuse_copy_one(cs, &in->h, sizeof(in->h));
+       err = fuse_copy_one(cs, &req->in.h, sizeof(req->in.h));
        if (!err)
-               err = fuse_copy_args(cs, in->numargs, in->argpages,
-                                    (struct fuse_arg *) in->args, 0);
+               err = fuse_copy_args(cs, args->in_numargs, args->in_pages,
+                                    (struct fuse_arg *) args->in_args, 0);
        fuse_copy_finish(cs);
        spin_lock(&fpq->lock);
        clear_bit(FR_LOCKED, &req->flags);
@@ -1405,11 +1294,11 @@ out_end:
        if (!test_bit(FR_PRIVATE, &req->flags))
                list_del_init(&req->list);
        spin_unlock(&fpq->lock);
-       request_end(fc, req);
+       fuse_request_end(fc, req);
        return err;
 
  err_unlock:
-       spin_unlock(&fiq->waitq.lock);
+       spin_unlock(&fiq->lock);
        return err;
 }
 
@@ -1728,9 +1617,19 @@ out_finish:
        return err;
 }
 
-static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
+struct fuse_retrieve_args {
+       struct fuse_args_pages ap;
+       struct fuse_notify_retrieve_in inarg;
+};
+
+static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_args *args,
+                             int error)
 {
-       release_pages(req->pages, req->num_pages);
+       struct fuse_retrieve_args *ra =
+               container_of(args, typeof(*ra), ap.args);
+
+       release_pages(ra->ap.pages, ra->ap.num_pages);
+       kfree(ra);
 }
 
 static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
@@ -1738,13 +1637,16 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
 {
        int err;
        struct address_space *mapping = inode->i_mapping;
-       struct fuse_req *req;
        pgoff_t index;
        loff_t file_size;
        unsigned int num;
        unsigned int offset;
        size_t total_len = 0;
        unsigned int num_pages;
+       struct fuse_retrieve_args *ra;
+       size_t args_size = sizeof(*ra);
+       struct fuse_args_pages *ap;
+       struct fuse_args *args;
 
        offset = outarg->offset & ~PAGE_MASK;
        file_size = i_size_read(inode);
@@ -1758,19 +1660,26 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
        num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
        num_pages = min(num_pages, fc->max_pages);
 
-       req = fuse_get_req(fc, num_pages);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
+       args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0]));
 
-       req->in.h.opcode = FUSE_NOTIFY_REPLY;
-       req->in.h.nodeid = outarg->nodeid;
-       req->in.numargs = 2;
-       req->in.argpages = 1;
-       req->end = fuse_retrieve_end;
+       ra = kzalloc(args_size, GFP_KERNEL);
+       if (!ra)
+               return -ENOMEM;
+
+       ap = &ra->ap;
+       ap->pages = (void *) (ra + 1);
+       ap->descs = (void *) (ap->pages + num_pages);
+
+       args = &ap->args;
+       args->nodeid = outarg->nodeid;
+       args->opcode = FUSE_NOTIFY_REPLY;
+       args->in_numargs = 2;
+       args->in_pages = true;
+       args->end = fuse_retrieve_end;
 
        index = outarg->offset >> PAGE_SHIFT;
 
-       while (num && req->num_pages < num_pages) {
+       while (num && ap->num_pages < num_pages) {
                struct page *page;
                unsigned int this_num;
 
@@ -1779,27 +1688,25 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
                        break;
 
                this_num = min_t(unsigned, num, PAGE_SIZE - offset);
-               req->pages[req->num_pages] = page;
-               req->page_descs[req->num_pages].offset = offset;
-               req->page_descs[req->num_pages].length = this_num;
-               req->num_pages++;
+               ap->pages[ap->num_pages] = page;
+               ap->descs[ap->num_pages].offset = offset;
+               ap->descs[ap->num_pages].length = this_num;
+               ap->num_pages++;
 
                offset = 0;
                num -= this_num;
                total_len += this_num;
                index++;
        }
-       req->misc.retrieve_in.offset = outarg->offset;
-       req->misc.retrieve_in.size = total_len;
-       req->in.args[0].size = sizeof(req->misc.retrieve_in);
-       req->in.args[0].value = &req->misc.retrieve_in;
-       req->in.args[1].size = total_len;
+       ra->inarg.offset = outarg->offset;
+       ra->inarg.size = total_len;
+       args->in_args[0].size = sizeof(ra->inarg);
+       args->in_args[0].value = &ra->inarg;
+       args->in_args[1].size = total_len;
 
-       err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
-       if (err) {
-               fuse_retrieve_end(fc, req);
-               fuse_put_request(fc, req);
-       }
+       err = fuse_simple_notify_reply(fc, args, outarg->notify_unique);
+       if (err)
+               fuse_retrieve_end(fc, args, err);
 
        return err;
 }
@@ -1885,27 +1792,25 @@ static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
        return NULL;
 }
 
-static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
+static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
                         unsigned nbytes)
 {
        unsigned reqsize = sizeof(struct fuse_out_header);
 
-       if (out->h.error)
-               return nbytes != reqsize ? -EINVAL : 0;
-
-       reqsize += len_args(out->numargs, out->args);
+       reqsize += fuse_len_args(args->out_numargs, args->out_args);
 
-       if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
+       if (reqsize < nbytes || (reqsize > nbytes && !args->out_argvar))
                return -EINVAL;
        else if (reqsize > nbytes) {
-               struct fuse_arg *lastarg = &out->args[out->numargs-1];
+               struct fuse_arg *lastarg = &args->out_args[args->out_numargs-1];
                unsigned diffsize = reqsize - nbytes;
+
                if (diffsize > lastarg->size)
                        return -EINVAL;
                lastarg->size -= diffsize;
        }
-       return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
-                             out->page_zeroing);
+       return fuse_copy_args(cs, args->out_numargs, args->out_pages,
+                             args->out_args, args->page_zeroing);
 }
 
 /*
@@ -1913,7 +1818,7 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
  * the write buffer.  The request is then searched on the processing
  * list by the unique ID found in the header.  If found, then remove
  * it from the list and copy the rest of the buffer to the request.
- * The request is finished by calling request_end()
+ * The request is finished by calling fuse_request_end().
  */
 static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
                                 struct fuse_copy_state *cs, size_t nbytes)
@@ -1984,10 +1889,13 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
        set_bit(FR_LOCKED, &req->flags);
        spin_unlock(&fpq->lock);
        cs->req = req;
-       if (!req->out.page_replace)
+       if (!req->args->page_replace)
                cs->move_pages = 0;
 
-       err = copy_out_args(cs, &req->out, nbytes);
+       if (oh.error)
+               err = nbytes != sizeof(oh) ? -EINVAL : 0;
+       else
+               err = copy_out_args(cs, req->args, nbytes);
        fuse_copy_finish(cs);
 
        spin_lock(&fpq->lock);
@@ -2000,7 +1908,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
                list_del_init(&req->list);
        spin_unlock(&fpq->lock);
 
-       request_end(fc, req);
+       fuse_request_end(fc, req);
 out:
        return err ? err : nbytes;
 
@@ -2121,12 +2029,12 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
        fiq = &fud->fc->iq;
        poll_wait(file, &fiq->waitq, wait);
 
-       spin_lock(&fiq->waitq.lock);
+       spin_lock(&fiq->lock);
        if (!fiq->connected)
                mask = EPOLLERR;
        else if (request_pending(fiq))
                mask |= EPOLLIN | EPOLLRDNORM;
-       spin_unlock(&fiq->waitq.lock);
+       spin_unlock(&fiq->lock);
 
        return mask;
 }
@@ -2140,7 +2048,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
                req->out.h.error = -ECONNABORTED;
                clear_bit(FR_SENT, &req->flags);
                list_del_init(&req->list);
-               request_end(fc, req);
+               fuse_request_end(fc, req);
        }
 }
 
@@ -2221,15 +2129,15 @@ void fuse_abort_conn(struct fuse_conn *fc)
                flush_bg_queue(fc);
                spin_unlock(&fc->bg_lock);
 
-               spin_lock(&fiq->waitq.lock);
+               spin_lock(&fiq->lock);
                fiq->connected = 0;
                list_for_each_entry(req, &fiq->pending, list)
                        clear_bit(FR_PENDING, &req->flags);
                list_splice_tail_init(&fiq->pending, &to_end);
                while (forget_pending(fiq))
-                       kfree(dequeue_forget(fiq, 1, NULL));
-               wake_up_all_locked(&fiq->waitq);
-               spin_unlock(&fiq->waitq.lock);
+                       kfree(fuse_dequeue_forget(fiq, 1, NULL));
+               wake_up_all(&fiq->waitq);
+               spin_unlock(&fiq->lock);
                kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
                end_polls(fc);
                wake_up_all(&fc->blocked_waitq);
@@ -2296,7 +2204,7 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
        if (new->private_data)
                return -EINVAL;
 
-       fud = fuse_dev_alloc(fc);
+       fud = fuse_dev_alloc_install(fc);
        if (!fud)
                return -ENOMEM;
 
index dd0f64f..54d638f 100644 (file)
@@ -24,20 +24,54 @@ static void fuse_advise_use_readdirplus(struct inode *dir)
        set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
 }
 
+#if BITS_PER_LONG >= 64
+static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
+{
+       entry->d_fsdata = (void *) time;
+}
+
+static inline u64 fuse_dentry_time(const struct dentry *entry)
+{
+       return (u64)entry->d_fsdata;
+}
+
+#else
 union fuse_dentry {
        u64 time;
        struct rcu_head rcu;
 };
 
-static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
+static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
 {
-       ((union fuse_dentry *) entry->d_fsdata)->time = time;
+       ((union fuse_dentry *) dentry->d_fsdata)->time = time;
 }
 
-static inline u64 fuse_dentry_time(struct dentry *entry)
+static inline u64 fuse_dentry_time(const struct dentry *entry)
 {
        return ((union fuse_dentry *) entry->d_fsdata)->time;
 }
+#endif
+
+static void fuse_dentry_settime(struct dentry *dentry, u64 time)
+{
+       struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
+       bool delete = !time && fc->delete_stale;
+       /*
+        * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
+        * Don't care about races, either way it's just an optimization
+        */
+       if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
+           (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
+               spin_lock(&dentry->d_lock);
+               if (!delete)
+                       dentry->d_flags &= ~DCACHE_OP_DELETE;
+               else
+                       dentry->d_flags |= DCACHE_OP_DELETE;
+               spin_unlock(&dentry->d_lock);
+       }
+
+       __fuse_dentry_settime(dentry, time);
+}
 
 /*
  * FUSE caches dentries and attributes with separate timeout.  The
@@ -139,14 +173,14 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
                             struct fuse_entry_out *outarg)
 {
        memset(outarg, 0, sizeof(struct fuse_entry_out));
-       args->in.h.opcode = FUSE_LOOKUP;
-       args->in.h.nodeid = nodeid;
-       args->in.numargs = 1;
-       args->in.args[0].size = name->len + 1;
-       args->in.args[0].value = name->name;
-       args->out.numargs = 1;
-       args->out.args[0].size = sizeof(struct fuse_entry_out);
-       args->out.args[0].value = outarg;
+       args->opcode = FUSE_LOOKUP;
+       args->nodeid = nodeid;
+       args->in_numargs = 1;
+       args->in_args[0].size = name->len + 1;
+       args->in_args[0].value = name->name;
+       args->out_numargs = 1;
+       args->out_args[0].size = sizeof(struct fuse_entry_out);
+       args->out_args[0].value = outarg;
 }
 
 /*
@@ -242,9 +276,11 @@ invalid:
        goto out;
 }
 
+#if BITS_PER_LONG < 64
 static int fuse_dentry_init(struct dentry *dentry)
 {
-       dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL);
+       dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
+                                  GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
 
        return dentry->d_fsdata ? 0 : -ENOMEM;
 }
@@ -254,16 +290,27 @@ static void fuse_dentry_release(struct dentry *dentry)
 
        kfree_rcu(fd, rcu);
 }
+#endif
+
+static int fuse_dentry_delete(const struct dentry *dentry)
+{
+       return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
+}
 
 const struct dentry_operations fuse_dentry_operations = {
        .d_revalidate   = fuse_dentry_revalidate,
+       .d_delete       = fuse_dentry_delete,
+#if BITS_PER_LONG < 64
        .d_init         = fuse_dentry_init,
        .d_release      = fuse_dentry_release,
+#endif
 };
 
 const struct dentry_operations fuse_root_dentry_operations = {
+#if BITS_PER_LONG < 64
        .d_init         = fuse_dentry_init,
        .d_release      = fuse_dentry_release,
+#endif
 };
 
 int fuse_valid_type(int m)
@@ -358,7 +405,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
        else
                fuse_invalidate_entry_cache(entry);
 
-       fuse_advise_use_readdirplus(dir);
+       if (inode)
+               fuse_advise_use_readdirplus(dir);
        return newent;
 
  out_iput:
@@ -410,18 +458,18 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
        inarg.flags = flags;
        inarg.mode = mode;
        inarg.umask = current_umask();
-       args.in.h.opcode = FUSE_CREATE;
-       args.in.h.nodeid = get_node_id(dir);
-       args.in.numargs = 2;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.in.args[1].size = entry->d_name.len + 1;
-       args.in.args[1].value = entry->d_name.name;
-       args.out.numargs = 2;
-       args.out.args[0].size = sizeof(outentry);
-       args.out.args[0].value = &outentry;
-       args.out.args[1].size = sizeof(outopen);
-       args.out.args[1].value = &outopen;
+       args.opcode = FUSE_CREATE;
+       args.nodeid = get_node_id(dir);
+       args.in_numargs = 2;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.in_args[1].size = entry->d_name.len + 1;
+       args.in_args[1].value = entry->d_name.name;
+       args.out_numargs = 2;
+       args.out_args[0].size = sizeof(outentry);
+       args.out_args[0].value = &outentry;
+       args.out_args[1].size = sizeof(outopen);
+       args.out_args[1].value = &outopen;
        err = fuse_simple_request(fc, &args);
        if (err)
                goto out_free_ff;
@@ -526,10 +574,10 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
                return -ENOMEM;
 
        memset(&outarg, 0, sizeof(outarg));
-       args->in.h.nodeid = get_node_id(dir);
-       args->out.numargs = 1;
-       args->out.args[0].size = sizeof(outarg);
-       args->out.args[0].value = &outarg;
+       args->nodeid = get_node_id(dir);
+       args->out_numargs = 1;
+       args->out_args[0].size = sizeof(outarg);
+       args->out_args[0].value = &outarg;
        err = fuse_simple_request(fc, args);
        if (err)
                goto out_put_forget_req;
@@ -582,12 +630,12 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
        inarg.mode = mode;
        inarg.rdev = new_encode_dev(rdev);
        inarg.umask = current_umask();
-       args.in.h.opcode = FUSE_MKNOD;
-       args.in.numargs = 2;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.in.args[1].size = entry->d_name.len + 1;
-       args.in.args[1].value = entry->d_name.name;
+       args.opcode = FUSE_MKNOD;
+       args.in_numargs = 2;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.in_args[1].size = entry->d_name.len + 1;
+       args.in_args[1].value = entry->d_name.name;
        return create_new_entry(fc, &args, dir, entry, mode);
 }
 
@@ -609,12 +657,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
        memset(&inarg, 0, sizeof(inarg));
        inarg.mode = mode;
        inarg.umask = current_umask();
-       args.in.h.opcode = FUSE_MKDIR;
-       args.in.numargs = 2;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.in.args[1].size = entry->d_name.len + 1;
-       args.in.args[1].value = entry->d_name.name;
+       args.opcode = FUSE_MKDIR;
+       args.in_numargs = 2;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.in_args[1].size = entry->d_name.len + 1;
+       args.in_args[1].value = entry->d_name.name;
        return create_new_entry(fc, &args, dir, entry, S_IFDIR);
 }
 
@@ -625,12 +673,12 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
        unsigned len = strlen(link) + 1;
        FUSE_ARGS(args);
 
-       args.in.h.opcode = FUSE_SYMLINK;
-       args.in.numargs = 2;
-       args.in.args[0].size = entry->d_name.len + 1;
-       args.in.args[0].value = entry->d_name.name;
-       args.in.args[1].size = len;
-       args.in.args[1].value = link;
+       args.opcode = FUSE_SYMLINK;
+       args.in_numargs = 2;
+       args.in_args[0].size = entry->d_name.len + 1;
+       args.in_args[0].value = entry->d_name.name;
+       args.in_args[1].size = len;
+       args.in_args[1].value = link;
        return create_new_entry(fc, &args, dir, entry, S_IFLNK);
 }
 
@@ -648,11 +696,11 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
        struct fuse_conn *fc = get_fuse_conn(dir);
        FUSE_ARGS(args);
 
-       args.in.h.opcode = FUSE_UNLINK;
-       args.in.h.nodeid = get_node_id(dir);
-       args.in.numargs = 1;
-       args.in.args[0].size = entry->d_name.len + 1;
-       args.in.args[0].value = entry->d_name.name;
+       args.opcode = FUSE_UNLINK;
+       args.nodeid = get_node_id(dir);
+       args.in_numargs = 1;
+       args.in_args[0].size = entry->d_name.len + 1;
+       args.in_args[0].value = entry->d_name.name;
        err = fuse_simple_request(fc, &args);
        if (!err) {
                struct inode *inode = d_inode(entry);
@@ -684,11 +732,11 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
        struct fuse_conn *fc = get_fuse_conn(dir);
        FUSE_ARGS(args);
 
-       args.in.h.opcode = FUSE_RMDIR;
-       args.in.h.nodeid = get_node_id(dir);
-       args.in.numargs = 1;
-       args.in.args[0].size = entry->d_name.len + 1;
-       args.in.args[0].value = entry->d_name.name;
+       args.opcode = FUSE_RMDIR;
+       args.nodeid = get_node_id(dir);
+       args.in_numargs = 1;
+       args.in_args[0].size = entry->d_name.len + 1;
+       args.in_args[0].value = entry->d_name.name;
        err = fuse_simple_request(fc, &args);
        if (!err) {
                clear_nlink(d_inode(entry));
@@ -711,15 +759,15 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
        memset(&inarg, 0, argsize);
        inarg.newdir = get_node_id(newdir);
        inarg.flags = flags;
-       args.in.h.opcode = opcode;
-       args.in.h.nodeid = get_node_id(olddir);
-       args.in.numargs = 3;
-       args.in.args[0].size = argsize;
-       args.in.args[0].value = &inarg;
-       args.in.args[1].size = oldent->d_name.len + 1;
-       args.in.args[1].value = oldent->d_name.name;
-       args.in.args[2].size = newent->d_name.len + 1;
-       args.in.args[2].value = newent->d_name.name;
+       args.opcode = opcode;
+       args.nodeid = get_node_id(olddir);
+       args.in_numargs = 3;
+       args.in_args[0].size = argsize;
+       args.in_args[0].value = &inarg;
+       args.in_args[1].size = oldent->d_name.len + 1;
+       args.in_args[1].value = oldent->d_name.name;
+       args.in_args[2].size = newent->d_name.len + 1;
+       args.in_args[2].value = newent->d_name.name;
        err = fuse_simple_request(fc, &args);
        if (!err) {
                /* ctime changes */
@@ -796,12 +844,12 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
 
        memset(&inarg, 0, sizeof(inarg));
        inarg.oldnodeid = get_node_id(inode);
-       args.in.h.opcode = FUSE_LINK;
-       args.in.numargs = 2;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.in.args[1].size = newent->d_name.len + 1;
-       args.in.args[1].value = newent->d_name.name;
+       args.opcode = FUSE_LINK;
+       args.in_numargs = 2;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.in_args[1].size = newent->d_name.len + 1;
+       args.in_args[1].value = newent->d_name.name;
        err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
        /* Contrary to "normal" filesystems it can happen that link
           makes two "logical" inodes point to the same "physical"
@@ -884,14 +932,14 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
                inarg.getattr_flags |= FUSE_GETATTR_FH;
                inarg.fh = ff->fh;
        }
-       args.in.h.opcode = FUSE_GETATTR;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(outarg);
-       args.out.args[0].value = &outarg;
+       args.opcode = FUSE_GETATTR;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
        if (!err) {
                if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
@@ -1056,11 +1104,11 @@ static int fuse_access(struct inode *inode, int mask)
 
        memset(&inarg, 0, sizeof(inarg));
        inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
-       args.in.h.opcode = FUSE_ACCESS;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
+       args.opcode = FUSE_ACCESS;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
        err = fuse_simple_request(fc, &args);
        if (err == -ENOSYS) {
                fc->no_access = 1;
@@ -1152,38 +1200,36 @@ static int fuse_permission(struct inode *inode, int mask)
 static int fuse_readlink_page(struct inode *inode, struct page *page)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
-       struct fuse_req *req;
-       int err;
+       struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
+       struct fuse_args_pages ap = {
+               .num_pages = 1,
+               .pages = &page,
+               .descs = &desc,
+       };
+       char *link;
+       ssize_t res;
+
+       ap.args.opcode = FUSE_READLINK;
+       ap.args.nodeid = get_node_id(inode);
+       ap.args.out_pages = true;
+       ap.args.out_argvar = true;
+       ap.args.page_zeroing = true;
+       ap.args.out_numargs = 1;
+       ap.args.out_args[0].size = desc.length;
+       res = fuse_simple_request(fc, &ap.args);
 
-       req = fuse_get_req(fc, 1);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
-
-       req->out.page_zeroing = 1;
-       req->out.argpages = 1;
-       req->num_pages = 1;
-       req->pages[0] = page;
-       req->page_descs[0].length = PAGE_SIZE - 1;
-       req->in.h.opcode = FUSE_READLINK;
-       req->in.h.nodeid = get_node_id(inode);
-       req->out.argvar = 1;
-       req->out.numargs = 1;
-       req->out.args[0].size = PAGE_SIZE - 1;
-       fuse_request_send(fc, req);
-       err = req->out.h.error;
+       fuse_invalidate_atime(inode);
 
-       if (!err) {
-               char *link = page_address(page);
-               size_t len = req->out.args[0].size;
+       if (res < 0)
+               return res;
 
-               BUG_ON(len >= PAGE_SIZE);
-               link[len] = '\0';
-       }
+       if (WARN_ON(res >= PAGE_SIZE))
+               return -EIO;
 
-       fuse_put_request(fc, req);
-       fuse_invalidate_atime(inode);
+       link = page_address(page);
+       link[res] = '\0';
 
-       return err;
+       return 0;
 }
 
 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
@@ -1383,14 +1429,14 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
                              struct fuse_setattr_in *inarg_p,
                              struct fuse_attr_out *outarg_p)
 {
-       args->in.h.opcode = FUSE_SETATTR;
-       args->in.h.nodeid = get_node_id(inode);
-       args->in.numargs = 1;
-       args->in.args[0].size = sizeof(*inarg_p);
-       args->in.args[0].value = inarg_p;
-       args->out.numargs = 1;
-       args->out.args[0].size = sizeof(*outarg_p);
-       args->out.args[0].value = outarg_p;
+       args->opcode = FUSE_SETATTR;
+       args->nodeid = get_node_id(inode);
+       args->in_numargs = 1;
+       args->in_args[0].size = sizeof(*inarg_p);
+       args->in_args[0].value = inarg_p;
+       args->out_numargs = 1;
+       args->out_args[0].size = sizeof(*outarg_p);
+       args->out_args[0].value = outarg_p;
 }
 
 /*
@@ -1476,6 +1522,19 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
                is_truncate = true;
        }
 
+       /* Flush dirty data/metadata before non-truncate SETATTR */
+       if (is_wb && S_ISREG(inode->i_mode) &&
+           attr->ia_valid &
+                       (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
+                        ATTR_TIMES_SET)) {
+               err = write_inode_now(inode, true);
+               if (err)
+                       return err;
+
+               fuse_set_nowrite(inode);
+               fuse_release_nowrite(inode);
+       }
+
        if (is_truncate) {
                fuse_set_nowrite(inode);
                set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
index 5ae2828..db48a5c 100644 (file)
 #include <linux/falloc.h>
 #include <linux/uio.h>
 
+static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
+                                     struct fuse_page_desc **desc)
+{
+       struct page **pages;
+
+       pages = kzalloc(npages * (sizeof(struct page *) +
+                                 sizeof(struct fuse_page_desc)), flags);
+       *desc = (void *) (pages + npages);
+
+       return pages;
+}
+
 static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
                          int opcode, struct fuse_open_out *outargp)
 {
@@ -29,29 +41,36 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
        inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
        if (!fc->atomic_o_trunc)
                inarg.flags &= ~O_TRUNC;
-       args.in.h.opcode = opcode;
-       args.in.h.nodeid = nodeid;
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(*outargp);
-       args.out.args[0].value = outargp;
+       args.opcode = opcode;
+       args.nodeid = nodeid;
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(*outargp);
+       args.out_args[0].value = outargp;
 
        return fuse_simple_request(fc, &args);
 }
 
+struct fuse_release_args {
+       struct fuse_args args;
+       struct fuse_release_in inarg;
+       struct inode *inode;
+};
+
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 {
        struct fuse_file *ff;
 
-       ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL);
+       ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL_ACCOUNT);
        if (unlikely(!ff))
                return NULL;
 
        ff->fc = fc;
-       ff->reserved_req = fuse_request_alloc(0);
-       if (unlikely(!ff->reserved_req)) {
+       ff->release_args = kzalloc(sizeof(*ff->release_args),
+                                  GFP_KERNEL_ACCOUNT);
+       if (!ff->release_args) {
                kfree(ff);
                return NULL;
        }
@@ -69,7 +88,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 
 void fuse_file_free(struct fuse_file *ff)
 {
-       fuse_request_free(ff->reserved_req);
+       kfree(ff->release_args);
        mutex_destroy(&ff->readdir.lock);
        kfree(ff);
 }
@@ -80,34 +99,31 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff)
        return ff;
 }
 
-static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_release_end(struct fuse_conn *fc, struct fuse_args *args,
+                            int error)
 {
-       iput(req->misc.release.inode);
+       struct fuse_release_args *ra = container_of(args, typeof(*ra), args);
+
+       iput(ra->inode);
+       kfree(ra);
 }
 
 static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
 {
        if (refcount_dec_and_test(&ff->count)) {
-               struct fuse_req *req = ff->reserved_req;
+               struct fuse_args *args = &ff->release_args->args;
 
                if (isdir ? ff->fc->no_opendir : ff->fc->no_open) {
-                       /*
-                        * Drop the release request when client does not
-                        * implement 'open'
-                        */
-                       __clear_bit(FR_BACKGROUND, &req->flags);
-                       iput(req->misc.release.inode);
-                       fuse_put_request(ff->fc, req);
+                       /* Do nothing when client does not implement 'open' */
+                       fuse_release_end(ff->fc, args, 0);
                } else if (sync) {
-                       __set_bit(FR_FORCE, &req->flags);
-                       __clear_bit(FR_BACKGROUND, &req->flags);
-                       fuse_request_send(ff->fc, req);
-                       iput(req->misc.release.inode);
-                       fuse_put_request(ff->fc, req);
+                       fuse_simple_request(ff->fc, args);
+                       fuse_release_end(ff->fc, args, 0);
                } else {
-                       req->end = fuse_release_end;
-                       __set_bit(FR_BACKGROUND, &req->flags);
-                       fuse_request_send_background(ff->fc, req);
+                       args->end = fuse_release_end;
+                       if (fuse_simple_background(ff->fc, args,
+                                                  GFP_KERNEL | __GFP_NOFAIL))
+                               fuse_release_end(ff->fc, args, -ENOTCONN);
                }
                kfree(ff);
        }
@@ -201,7 +217,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
        int err;
-       bool lock_inode = (file->f_flags & O_TRUNC) &&
+       bool is_wb_truncate = (file->f_flags & O_TRUNC) &&
                          fc->atomic_o_trunc &&
                          fc->writeback_cache;
 
@@ -209,16 +225,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
        if (err)
                return err;
 
-       if (lock_inode)
+       if (is_wb_truncate) {
                inode_lock(inode);
+               fuse_set_nowrite(inode);
+       }
 
        err = fuse_do_open(fc, get_node_id(inode), file, isdir);
 
        if (!err)
                fuse_finish_open(inode, file);
 
-       if (lock_inode)
+       if (is_wb_truncate) {
+               fuse_release_nowrite(inode);
                inode_unlock(inode);
+       }
 
        return err;
 }
@@ -227,8 +247,7 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
                                 int flags, int opcode)
 {
        struct fuse_conn *fc = ff->fc;
-       struct fuse_req *req = ff->reserved_req;
-       struct fuse_release_in *inarg = &req->misc.release.in;
+       struct fuse_release_args *ra = ff->release_args;
 
        /* Inode is NULL on error path of fuse_create_open() */
        if (likely(fi)) {
@@ -243,32 +262,33 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
 
        wake_up_interruptible_all(&ff->poll_wait);
 
-       inarg->fh = ff->fh;
-       inarg->flags = flags;
-       req->in.h.opcode = opcode;
-       req->in.h.nodeid = ff->nodeid;
-       req->in.numargs = 1;
-       req->in.args[0].size = sizeof(struct fuse_release_in);
-       req->in.args[0].value = inarg;
+       ra->inarg.fh = ff->fh;
+       ra->inarg.flags = flags;
+       ra->args.in_numargs = 1;
+       ra->args.in_args[0].size = sizeof(struct fuse_release_in);
+       ra->args.in_args[0].value = &ra->inarg;
+       ra->args.opcode = opcode;
+       ra->args.nodeid = ff->nodeid;
+       ra->args.force = true;
+       ra->args.nocreds = true;
 }
 
 void fuse_release_common(struct file *file, bool isdir)
 {
        struct fuse_inode *fi = get_fuse_inode(file_inode(file));
        struct fuse_file *ff = file->private_data;
-       struct fuse_req *req = ff->reserved_req;
+       struct fuse_release_args *ra = ff->release_args;
        int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
 
        fuse_prepare_release(fi, ff, file->f_flags, opcode);
 
        if (ff->flock) {
-               struct fuse_release_in *inarg = &req->misc.release.in;
-               inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
-               inarg->lock_owner = fuse_lock_owner_id(ff->fc,
-                                                      (fl_owner_t) file);
+               ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
+               ra->inarg.lock_owner = fuse_lock_owner_id(ff->fc,
+                                                         (fl_owner_t) file);
        }
        /* Hold inode until release is finished */
-       req->misc.release.inode = igrab(file_inode(file));
+       ra->inode = igrab(file_inode(file));
 
        /*
         * Normally this will send the RELEASE request, however if
@@ -279,7 +299,7 @@ void fuse_release_common(struct file *file, bool isdir)
         * synchronous RELEASE is allowed (and desirable) in this case
         * because the server can be trusted not to screw up.
         */
-       fuse_file_put(ff, ff->fc->destroy_req != NULL, isdir);
+       fuse_file_put(ff, ff->fc->destroy, isdir);
 }
 
 static int fuse_open(struct inode *inode, struct file *file)
@@ -335,19 +355,27 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
        return (u64) v0 + ((u64) v1 << 32);
 }
 
-static struct fuse_req *fuse_find_writeback(struct fuse_inode *fi,
+struct fuse_writepage_args {
+       struct fuse_io_args ia;
+       struct list_head writepages_entry;
+       struct list_head queue_entry;
+       struct fuse_writepage_args *next;
+       struct inode *inode;
+};
+
+static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
                                            pgoff_t idx_from, pgoff_t idx_to)
 {
-       struct fuse_req *req;
+       struct fuse_writepage_args *wpa;
 
-       list_for_each_entry(req, &fi->writepages, writepages_entry) {
+       list_for_each_entry(wpa, &fi->writepages, writepages_entry) {
                pgoff_t curr_index;
 
-               WARN_ON(get_fuse_inode(req->inode) != fi);
-               curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
-               if (idx_from < curr_index + req->num_pages &&
+               WARN_ON(get_fuse_inode(wpa->inode) != fi);
+               curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
+               if (idx_from < curr_index + wpa->ia.ap.num_pages &&
                    curr_index <= idx_to) {
-                       return req;
+                       return wpa;
                }
        }
        return NULL;
@@ -383,12 +411,11 @@ static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
  * Since fuse doesn't rely on the VM writeback tracking, this has to
  * use some other means.
  */
-static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
+static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
 {
        struct fuse_inode *fi = get_fuse_inode(inode);
 
        wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
-       return 0;
 }
 
 /*
@@ -411,8 +438,8 @@ static int fuse_flush(struct file *file, fl_owner_t id)
        struct inode *inode = file_inode(file);
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_file *ff = file->private_data;
-       struct fuse_req *req;
        struct fuse_flush_in inarg;
+       FUSE_ARGS(args);
        int err;
 
        if (is_bad_inode(inode))
@@ -433,19 +460,17 @@ static int fuse_flush(struct file *file, fl_owner_t id)
        if (err)
                return err;
 
-       req = fuse_get_req_nofail_nopages(fc, file);
        memset(&inarg, 0, sizeof(inarg));
        inarg.fh = ff->fh;
        inarg.lock_owner = fuse_lock_owner_id(fc, id);
-       req->in.h.opcode = FUSE_FLUSH;
-       req->in.h.nodeid = get_node_id(inode);
-       req->in.numargs = 1;
-       req->in.args[0].size = sizeof(inarg);
-       req->in.args[0].value = &inarg;
-       __set_bit(FR_FORCE, &req->flags);
-       fuse_request_send(fc, req);
-       err = req->out.h.error;
-       fuse_put_request(fc, req);
+       args.opcode = FUSE_FLUSH;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.force = true;
+
+       err = fuse_simple_request(fc, &args);
        if (err == -ENOSYS) {
                fc->no_flush = 1;
                err = 0;
@@ -465,11 +490,11 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
        memset(&inarg, 0, sizeof(inarg));
        inarg.fh = ff->fh;
        inarg.fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0;
-       args.in.h.opcode = opcode;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
+       args.opcode = opcode;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
        return fuse_simple_request(fc, &args);
 }
 
@@ -523,35 +548,35 @@ out:
        return err;
 }
 
-void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
-                   size_t count, int opcode)
+void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
+                        size_t count, int opcode)
 {
-       struct fuse_read_in *inarg = &req->misc.read.in;
        struct fuse_file *ff = file->private_data;
+       struct fuse_args *args = &ia->ap.args;
 
-       inarg->fh = ff->fh;
-       inarg->offset = pos;
-       inarg->size = count;
-       inarg->flags = file->f_flags;
-       req->in.h.opcode = opcode;
-       req->in.h.nodeid = ff->nodeid;
-       req->in.numargs = 1;
-       req->in.args[0].size = sizeof(struct fuse_read_in);
-       req->in.args[0].value = inarg;
-       req->out.argvar = 1;
-       req->out.numargs = 1;
-       req->out.args[0].size = count;
+       ia->read.in.fh = ff->fh;
+       ia->read.in.offset = pos;
+       ia->read.in.size = count;
+       ia->read.in.flags = file->f_flags;
+       args->opcode = opcode;
+       args->nodeid = ff->nodeid;
+       args->in_numargs = 1;
+       args->in_args[0].size = sizeof(ia->read.in);
+       args->in_args[0].value = &ia->read.in;
+       args->out_argvar = true;
+       args->out_numargs = 1;
+       args->out_args[0].size = count;
 }
 
-static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty)
+static void fuse_release_user_pages(struct fuse_args_pages *ap,
+                                   bool should_dirty)
 {
-       unsigned i;
+       unsigned int i;
 
-       for (i = 0; i < req->num_pages; i++) {
-               struct page *page = req->pages[i];
+       for (i = 0; i < ap->num_pages; i++) {
                if (should_dirty)
-                       set_page_dirty_lock(page);
-               put_page(page);
+                       set_page_dirty_lock(ap->pages[i]);
+               put_page(ap->pages[i]);
        }
 }
 
@@ -621,64 +646,94 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
        kref_put(&io->refcnt, fuse_io_release);
 }
 
-static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
+static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io,
+                                         unsigned int npages)
 {
-       struct fuse_io_priv *io = req->io;
+       struct fuse_io_args *ia;
+
+       ia = kzalloc(sizeof(*ia), GFP_KERNEL);
+       if (ia) {
+               ia->io = io;
+               ia->ap.pages = fuse_pages_alloc(npages, GFP_KERNEL,
+                                               &ia->ap.descs);
+               if (!ia->ap.pages) {
+                       kfree(ia);
+                       ia = NULL;
+               }
+       }
+       return ia;
+}
+
+static void fuse_io_free(struct fuse_io_args *ia)
+{
+       kfree(ia->ap.pages);
+       kfree(ia);
+}
+
+static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_args *args,
+                                 int err)
+{
+       struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
+       struct fuse_io_priv *io = ia->io;
        ssize_t pos = -1;
 
-       fuse_release_user_pages(req, io->should_dirty);
+       fuse_release_user_pages(&ia->ap, io->should_dirty);
 
-       if (io->write) {
-               if (req->misc.write.in.size != req->misc.write.out.size)
-                       pos = req->misc.write.in.offset - io->offset +
-                               req->misc.write.out.size;
+       if (err) {
+               /* Nothing */
+       } else if (io->write) {
+               if (ia->write.out.size > ia->write.in.size) {
+                       err = -EIO;
+               } else if (ia->write.in.size != ia->write.out.size) {
+                       pos = ia->write.in.offset - io->offset +
+                               ia->write.out.size;
+               }
        } else {
-               if (req->misc.read.in.size != req->out.args[0].size)
-                       pos = req->misc.read.in.offset - io->offset +
-                               req->out.args[0].size;
+               u32 outsize = args->out_args[0].size;
+
+               if (ia->read.in.size != outsize)
+                       pos = ia->read.in.offset - io->offset + outsize;
        }
 
-       fuse_aio_complete(io, req->out.h.error, pos);
+       fuse_aio_complete(io, err, pos);
+       fuse_io_free(ia);
 }
 
-static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req,
-               size_t num_bytes, struct fuse_io_priv *io)
+static ssize_t fuse_async_req_send(struct fuse_conn *fc,
+                                  struct fuse_io_args *ia, size_t num_bytes)
 {
+       ssize_t err;
+       struct fuse_io_priv *io = ia->io;
+
        spin_lock(&io->lock);
        kref_get(&io->refcnt);
        io->size += num_bytes;
        io->reqs++;
        spin_unlock(&io->lock);
 
-       req->io = io;
-       req->end = fuse_aio_complete_req;
+       ia->ap.args.end = fuse_aio_complete_req;
+       err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL);
 
-       __fuse_get_request(req);
-       fuse_request_send_background(fc, req);
-
-       return num_bytes;
+       return err ?: num_bytes;
 }
 
-static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io,
-                            loff_t pos, size_t count, fl_owner_t owner)
+static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count,
+                             fl_owner_t owner)
 {
-       struct file *file = io->iocb->ki_filp;
+       struct file *file = ia->io->iocb->ki_filp;
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = ff->fc;
 
-       fuse_read_fill(req, file, pos, count, FUSE_READ);
+       fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
        if (owner != NULL) {
-               struct fuse_read_in *inarg = &req->misc.read.in;
-
-               inarg->read_flags |= FUSE_READ_LOCKOWNER;
-               inarg->lock_owner = fuse_lock_owner_id(fc, owner);
+               ia->read.in.read_flags |= FUSE_READ_LOCKOWNER;
+               ia->read.in.lock_owner = fuse_lock_owner_id(fc, owner);
        }
 
-       if (io->async)
-               return fuse_async_req_send(fc, req, count, io);
+       if (ia->io->async)
+               return fuse_async_req_send(fc, ia, count);
 
-       fuse_request_send(fc, req);
-       return req->out.args[0].size;
+       return fuse_simple_request(fc, &ia->ap.args);
 }
 
 static void fuse_read_update_size(struct inode *inode, loff_t size,
@@ -696,10 +751,9 @@ static void fuse_read_update_size(struct inode *inode, loff_t size,
        spin_unlock(&fi->lock);
 }
 
-static void fuse_short_read(struct fuse_req *req, struct inode *inode,
-                           u64 attr_ver)
+static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
+                           struct fuse_args_pages *ap)
 {
-       size_t num_read = req->out.args[0].size;
        struct fuse_conn *fc = get_fuse_conn(inode);
 
        if (fc->writeback_cache) {
@@ -712,28 +766,31 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode,
                int start_idx = num_read >> PAGE_SHIFT;
                size_t off = num_read & (PAGE_SIZE - 1);
 
-               for (i = start_idx; i < req->num_pages; i++) {
-                       zero_user_segment(req->pages[i], off, PAGE_SIZE);
+               for (i = start_idx; i < ap->num_pages; i++) {
+                       zero_user_segment(ap->pages[i], off, PAGE_SIZE);
                        off = 0;
                }
        } else {
-               loff_t pos = page_offset(req->pages[0]) + num_read;
+               loff_t pos = page_offset(ap->pages[0]) + num_read;
                fuse_read_update_size(inode, pos, attr_ver);
        }
 }
 
 static int fuse_do_readpage(struct file *file, struct page *page)
 {
-       struct kiocb iocb;
-       struct fuse_io_priv io;
        struct inode *inode = page->mapping->host;
        struct fuse_conn *fc = get_fuse_conn(inode);
-       struct fuse_req *req;
-       size_t num_read;
        loff_t pos = page_offset(page);
-       size_t count = PAGE_SIZE;
+       struct fuse_page_desc desc = { .length = PAGE_SIZE };
+       struct fuse_io_args ia = {
+               .ap.args.page_zeroing = true,
+               .ap.args.out_pages = true,
+               .ap.num_pages = 1,
+               .ap.pages = &page,
+               .ap.descs = &desc,
+       };
+       ssize_t res;
        u64 attr_ver;
-       int err;
 
        /*
         * Page writeback can extend beyond the lifetime of the
@@ -742,35 +799,21 @@ static int fuse_do_readpage(struct file *file, struct page *page)
         */
        fuse_wait_on_page_writeback(inode, page->index);
 
-       req = fuse_get_req(fc, 1);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
-
        attr_ver = fuse_get_attr_version(fc);
 
-       req->out.page_zeroing = 1;
-       req->out.argpages = 1;
-       req->num_pages = 1;
-       req->pages[0] = page;
-       req->page_descs[0].length = count;
-       init_sync_kiocb(&iocb, file);
-       io = (struct fuse_io_priv) FUSE_IO_PRIV_SYNC(&iocb);
-       num_read = fuse_send_read(req, &io, pos, count, NULL);
-       err = req->out.h.error;
-
-       if (!err) {
-               /*
-                * Short read means EOF.  If file size is larger, truncate it
-                */
-               if (num_read < count)
-                       fuse_short_read(req, inode, attr_ver);
-
-               SetPageUptodate(page);
-       }
+       fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ);
+       res = fuse_simple_request(fc, &ia.ap.args);
+       if (res < 0)
+               return res;
+       /*
+        * Short read means EOF.  If file size is larger, truncate it
+        */
+       if (res < desc.length)
+               fuse_short_read(inode, attr_ver, res, &ia.ap);
 
-       fuse_put_request(fc, req);
+       SetPageUptodate(page);
 
-       return err;
+       return 0;
 }
 
 static int fuse_readpage(struct file *file, struct page *page)
@@ -789,15 +832,18 @@ static int fuse_readpage(struct file *file, struct page *page)
        return err;
 }
 
-static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_args *args,
+                              int err)
 {
        int i;
-       size_t count = req->misc.read.in.size;
-       size_t num_read = req->out.args[0].size;
+       struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
+       struct fuse_args_pages *ap = &ia->ap;
+       size_t count = ia->read.in.size;
+       size_t num_read = args->out_args[0].size;
        struct address_space *mapping = NULL;
 
-       for (i = 0; mapping == NULL && i < req->num_pages; i++)
-               mapping = req->pages[i]->mapping;
+       for (i = 0; mapping == NULL && i < ap->num_pages; i++)
+               mapping = ap->pages[i]->mapping;
 
        if (mapping) {
                struct inode *inode = mapping->host;
@@ -805,93 +851,97 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
                /*
                 * Short read means EOF. If file size is larger, truncate it
                 */
-               if (!req->out.h.error && num_read < count)
-                       fuse_short_read(req, inode, req->misc.read.attr_ver);
+               if (!err && num_read < count)
+                       fuse_short_read(inode, ia->read.attr_ver, num_read, ap);
 
                fuse_invalidate_atime(inode);
        }
 
-       for (i = 0; i < req->num_pages; i++) {
-               struct page *page = req->pages[i];
-               if (!req->out.h.error)
+       for (i = 0; i < ap->num_pages; i++) {
+               struct page *page = ap->pages[i];
+
+               if (!err)
                        SetPageUptodate(page);
                else
                        SetPageError(page);
                unlock_page(page);
                put_page(page);
        }
-       if (req->ff)
-               fuse_file_put(req->ff, false, false);
+       if (ia->ff)
+               fuse_file_put(ia->ff, false, false);
+
+       fuse_io_free(ia);
 }
 
-static void fuse_send_readpages(struct fuse_req *req, struct file *file)
+static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
 {
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = ff->fc;
-       loff_t pos = page_offset(req->pages[0]);
-       size_t count = req->num_pages << PAGE_SHIFT;
-
-       req->out.argpages = 1;
-       req->out.page_zeroing = 1;
-       req->out.page_replace = 1;
-       fuse_read_fill(req, file, pos, count, FUSE_READ);
-       req->misc.read.attr_ver = fuse_get_attr_version(fc);
+       struct fuse_args_pages *ap = &ia->ap;
+       loff_t pos = page_offset(ap->pages[0]);
+       size_t count = ap->num_pages << PAGE_SHIFT;
+       int err;
+
+       ap->args.out_pages = true;
+       ap->args.page_zeroing = true;
+       ap->args.page_replace = true;
+       fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
+       ia->read.attr_ver = fuse_get_attr_version(fc);
        if (fc->async_read) {
-               req->ff = fuse_file_get(ff);
-               req->end = fuse_readpages_end;
-               fuse_request_send_background(fc, req);
+               ia->ff = fuse_file_get(ff);
+               ap->args.end = fuse_readpages_end;
+               err = fuse_simple_background(fc, &ap->args, GFP_KERNEL);
+               if (!err)
+                       return;
        } else {
-               fuse_request_send(fc, req);
-               fuse_readpages_end(fc, req);
-               fuse_put_request(fc, req);
+               err = fuse_simple_request(fc, &ap->args);
        }
+       fuse_readpages_end(fc, &ap->args, err);
 }
 
 struct fuse_fill_data {
-       struct fuse_req *req;
+       struct fuse_io_args *ia;
        struct file *file;
        struct inode *inode;
-       unsigned nr_pages;
+       unsigned int nr_pages;
+       unsigned int max_pages;
 };
 
 static int fuse_readpages_fill(void *_data, struct page *page)
 {
        struct fuse_fill_data *data = _data;
-       struct fuse_req *req = data->req;
+       struct fuse_io_args *ia = data->ia;
+       struct fuse_args_pages *ap = &ia->ap;
        struct inode *inode = data->inode;
        struct fuse_conn *fc = get_fuse_conn(inode);
 
        fuse_wait_on_page_writeback(inode, page->index);
 
-       if (req->num_pages &&
-           (req->num_pages == fc->max_pages ||
-            (req->num_pages + 1) * PAGE_SIZE > fc->max_read ||
-            req->pages[req->num_pages - 1]->index + 1 != page->index)) {
-               unsigned int nr_alloc = min_t(unsigned int, data->nr_pages,
-                                             fc->max_pages);
-               fuse_send_readpages(req, data->file);
-               if (fc->async_read)
-                       req = fuse_get_req_for_background(fc, nr_alloc);
-               else
-                       req = fuse_get_req(fc, nr_alloc);
-
-               data->req = req;
-               if (IS_ERR(req)) {
+       if (ap->num_pages &&
+           (ap->num_pages == fc->max_pages ||
+            (ap->num_pages + 1) * PAGE_SIZE > fc->max_read ||
+            ap->pages[ap->num_pages - 1]->index + 1 != page->index)) {
+               data->max_pages = min_t(unsigned int, data->nr_pages,
+                                       fc->max_pages);
+               fuse_send_readpages(ia, data->file);
+               data->ia = ia = fuse_io_alloc(NULL, data->max_pages);
+               if (!ia) {
                        unlock_page(page);
-                       return PTR_ERR(req);
+                       return -ENOMEM;
                }
+               ap = &ia->ap;
        }
 
-       if (WARN_ON(req->num_pages >= req->max_pages)) {
+       if (WARN_ON(ap->num_pages >= data->max_pages)) {
                unlock_page(page);
-               fuse_put_request(fc, req);
+               fuse_io_free(ia);
                return -EIO;
        }
 
        get_page(page);
-       req->pages[req->num_pages] = page;
-       req->page_descs[req->num_pages].length = PAGE_SIZE;
-       req->num_pages++;
+       ap->pages[ap->num_pages] = page;
+       ap->descs[ap->num_pages].length = PAGE_SIZE;
+       ap->num_pages++;
        data->nr_pages--;
        return 0;
 }
@@ -903,7 +953,6 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_fill_data data;
        int err;
-       unsigned int nr_alloc = min_t(unsigned int, nr_pages, fc->max_pages);
 
        err = -EIO;
        if (is_bad_inode(inode))
@@ -911,21 +960,20 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 
        data.file = file;
        data.inode = inode;
-       if (fc->async_read)
-               data.req = fuse_get_req_for_background(fc, nr_alloc);
-       else
-               data.req = fuse_get_req(fc, nr_alloc);
        data.nr_pages = nr_pages;
-       err = PTR_ERR(data.req);
-       if (IS_ERR(data.req))
+       data.max_pages = min_t(unsigned int, nr_pages, fc->max_pages);
+;
+       data.ia = fuse_io_alloc(NULL, data.max_pages);
+       err = -ENOMEM;
+       if (!data.ia)
                goto out;
 
        err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
        if (!err) {
-               if (data.req->num_pages)
-                       fuse_send_readpages(data.req, file);
+               if (data.ia->ap.num_pages)
+                       fuse_send_readpages(data.ia, file);
                else
-                       fuse_put_request(fc, data.req);
+                       fuse_io_free(data.ia);
        }
 out:
        return err;
@@ -952,54 +1000,65 @@ static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
        return generic_file_read_iter(iocb, to);
 }
 
-static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
-                           loff_t pos, size_t count)
+static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff,
+                                loff_t pos, size_t count)
 {
-       struct fuse_write_in *inarg = &req->misc.write.in;
-       struct fuse_write_out *outarg = &req->misc.write.out;
+       struct fuse_args *args = &ia->ap.args;
 
-       inarg->fh = ff->fh;
-       inarg->offset = pos;
-       inarg->size = count;
-       req->in.h.opcode = FUSE_WRITE;
-       req->in.h.nodeid = ff->nodeid;
-       req->in.numargs = 2;
+       ia->write.in.fh = ff->fh;
+       ia->write.in.offset = pos;
+       ia->write.in.size = count;
+       args->opcode = FUSE_WRITE;
+       args->nodeid = ff->nodeid;
+       args->in_numargs = 2;
        if (ff->fc->minor < 9)
-               req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
+               args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
        else
-               req->in.args[0].size = sizeof(struct fuse_write_in);
-       req->in.args[0].value = inarg;
-       req->in.args[1].size = count;
-       req->out.numargs = 1;
-       req->out.args[0].size = sizeof(struct fuse_write_out);
-       req->out.args[0].value = outarg;
+               args->in_args[0].size = sizeof(ia->write.in);
+       args->in_args[0].value = &ia->write.in;
+       args->in_args[1].size = count;
+       args->out_numargs = 1;
+       args->out_args[0].size = sizeof(ia->write.out);
+       args->out_args[0].value = &ia->write.out;
+}
+
+static unsigned int fuse_write_flags(struct kiocb *iocb)
+{
+       unsigned int flags = iocb->ki_filp->f_flags;
+
+       if (iocb->ki_flags & IOCB_DSYNC)
+               flags |= O_DSYNC;
+       if (iocb->ki_flags & IOCB_SYNC)
+               flags |= O_SYNC;
+
+       return flags;
 }
 
-static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
-                             loff_t pos, size_t count, fl_owner_t owner)
+static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos,
+                              size_t count, fl_owner_t owner)
 {
-       struct kiocb *iocb = io->iocb;
+       struct kiocb *iocb = ia->io->iocb;
        struct file *file = iocb->ki_filp;
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = ff->fc;
-       struct fuse_write_in *inarg = &req->misc.write.in;
+       struct fuse_write_in *inarg = &ia->write.in;
+       ssize_t err;
 
-       fuse_write_fill(req, ff, pos, count);
-       inarg->flags = file->f_flags;
-       if (iocb->ki_flags & IOCB_DSYNC)
-               inarg->flags |= O_DSYNC;
-       if (iocb->ki_flags & IOCB_SYNC)
-               inarg->flags |= O_SYNC;
+       fuse_write_args_fill(ia, ff, pos, count);
+       inarg->flags = fuse_write_flags(iocb);
        if (owner != NULL) {
                inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
                inarg->lock_owner = fuse_lock_owner_id(fc, owner);
        }
 
-       if (io->async)
-               return fuse_async_req_send(fc, req, count, io);
+       if (ia->io->async)
+               return fuse_async_req_send(fc, ia, count);
+
+       err = fuse_simple_request(fc, &ia->ap.args);
+       if (!err && ia->write.out.size > count)
+               err = -EIO;
 
-       fuse_request_send(fc, req);
-       return req->misc.write.out.size;
+       return err ?: ia->write.out.size;
 }
 
 bool fuse_write_update_size(struct inode *inode, loff_t pos)
@@ -1019,26 +1078,31 @@ bool fuse_write_update_size(struct inode *inode, loff_t pos)
        return ret;
 }
 
-static size_t fuse_send_write_pages(struct fuse_req *req, struct kiocb *iocb,
-                                   struct inode *inode, loff_t pos,
-                                   size_t count)
+static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
+                                    struct kiocb *iocb, struct inode *inode,
+                                    loff_t pos, size_t count)
 {
-       size_t res;
-       unsigned offset;
-       unsigned i;
-       struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
+       struct fuse_args_pages *ap = &ia->ap;
+       struct file *file = iocb->ki_filp;
+       struct fuse_file *ff = file->private_data;
+       struct fuse_conn *fc = ff->fc;
+       unsigned int offset, i;
+       int err;
 
-       for (i = 0; i < req->num_pages; i++)
-               fuse_wait_on_page_writeback(inode, req->pages[i]->index);
+       for (i = 0; i < ap->num_pages; i++)
+               fuse_wait_on_page_writeback(inode, ap->pages[i]->index);
 
-       res = fuse_send_write(req, &io, pos, count, NULL);
+       fuse_write_args_fill(ia, ff, pos, count);
+       ia->write.in.flags = fuse_write_flags(iocb);
 
-       offset = req->page_descs[0].offset;
-       count = res;
-       for (i = 0; i < req->num_pages; i++) {
-               struct page *page = req->pages[i];
+       err = fuse_simple_request(fc, &ap->args);
 
-               if (!req->out.h.error && !offset && count >= PAGE_SIZE)
+       offset = ap->descs[0].offset;
+       count = ia->write.out.size;
+       for (i = 0; i < ap->num_pages; i++) {
+               struct page *page = ap->pages[i];
+
+               if (!err && !offset && count >= PAGE_SIZE)
                        SetPageUptodate(page);
 
                if (count > PAGE_SIZE - offset)
@@ -1051,20 +1115,21 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct kiocb *iocb,
                put_page(page);
        }
 
-       return res;
+       return err;
 }
 
-static ssize_t fuse_fill_write_pages(struct fuse_req *req,
-                              struct address_space *mapping,
-                              struct iov_iter *ii, loff_t pos)
+static ssize_t fuse_fill_write_pages(struct fuse_args_pages *ap,
+                                    struct address_space *mapping,
+                                    struct iov_iter *ii, loff_t pos,
+                                    unsigned int max_pages)
 {
        struct fuse_conn *fc = get_fuse_conn(mapping->host);
        unsigned offset = pos & (PAGE_SIZE - 1);
        size_t count = 0;
        int err;
 
-       req->in.argpages = 1;
-       req->page_descs[0].offset = offset;
+       ap->args.in_pages = true;
+       ap->descs[0].offset = offset;
 
        do {
                size_t tmp;
@@ -1100,9 +1165,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
                }
 
                err = 0;
-               req->pages[req->num_pages] = page;
-               req->page_descs[req->num_pages].length = tmp;
-               req->num_pages++;
+               ap->pages[ap->num_pages] = page;
+               ap->descs[ap->num_pages].length = tmp;
+               ap->num_pages++;
 
                count += tmp;
                pos += tmp;
@@ -1113,7 +1178,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
                if (!fc->big_writes)
                        break;
        } while (iov_iter_count(ii) && count < fc->max_write &&
-                req->num_pages < req->max_pages && offset == 0);
+                ap->num_pages < max_pages && offset == 0);
 
        return count > 0 ? count : err;
 }
@@ -1141,27 +1206,27 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
                set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
 
        do {
-               struct fuse_req *req;
                ssize_t count;
+               struct fuse_io_args ia = {};
+               struct fuse_args_pages *ap = &ia.ap;
                unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii),
                                                      fc->max_pages);
 
-               req = fuse_get_req(fc, nr_pages);
-               if (IS_ERR(req)) {
-                       err = PTR_ERR(req);
+               ap->pages = fuse_pages_alloc(nr_pages, GFP_KERNEL, &ap->descs);
+               if (!ap->pages) {
+                       err = -ENOMEM;
                        break;
                }
 
-               count = fuse_fill_write_pages(req, mapping, ii, pos);
+               count = fuse_fill_write_pages(ap, mapping, ii, pos, nr_pages);
                if (count <= 0) {
                        err = count;
                } else {
-                       size_t num_written;
-
-                       num_written = fuse_send_write_pages(req, iocb, inode,
-                                                           pos, count);
-                       err = req->out.h.error;
+                       err = fuse_send_write_pages(&ia, iocb, inode,
+                                                   pos, count);
                        if (!err) {
+                               size_t num_written = ia.write.out.size;
+
                                res += num_written;
                                pos += num_written;
 
@@ -1170,7 +1235,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
                                        err = -EIO;
                        }
                }
-               fuse_put_request(fc, req);
+               kfree(ap->pages);
        } while (!err && iov_iter_count(ii));
 
        if (res > 0)
@@ -1258,14 +1323,14 @@ out:
        return written ? written : err;
 }
 
-static inline void fuse_page_descs_length_init(struct fuse_req *req,
-               unsigned index, unsigned nr_pages)
+static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
+                                              unsigned int index,
+                                              unsigned int nr_pages)
 {
        int i;
 
        for (i = index; i < index + nr_pages; i++)
-               req->page_descs[i].length = PAGE_SIZE -
-                       req->page_descs[i].offset;
+               descs[i].length = PAGE_SIZE - descs[i].offset;
 }
 
 static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
@@ -1279,8 +1344,9 @@ static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
        return min(iov_iter_single_seg_count(ii), max_size);
 }
 
-static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
-                              size_t *nbytesp, int write)
+static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
+                              size_t *nbytesp, int write,
+                              unsigned int max_pages)
 {
        size_t nbytes = 0;  /* # bytes already packed in req */
        ssize_t ret = 0;
@@ -1291,21 +1357,21 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
                size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
 
                if (write)
-                       req->in.args[1].value = (void *) user_addr;
+                       ap->args.in_args[1].value = (void *) user_addr;
                else
-                       req->out.args[0].value = (void *) user_addr;
+                       ap->args.out_args[0].value = (void *) user_addr;
 
                iov_iter_advance(ii, frag_size);
                *nbytesp = frag_size;
                return 0;
        }
 
-       while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
+       while (nbytes < *nbytesp && ap->num_pages < max_pages) {
                unsigned npages;
                size_t start;
-               ret = iov_iter_get_pages(ii, &req->pages[req->num_pages],
+               ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages],
                                        *nbytesp - nbytes,
-                                       req->max_pages - req->num_pages,
+                                       max_pages - ap->num_pages,
                                        &start);
                if (ret < 0)
                        break;
@@ -1316,18 +1382,18 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
                ret += start;
                npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
 
-               req->page_descs[req->num_pages].offset = start;
-               fuse_page_descs_length_init(req, req->num_pages, npages);
+               ap->descs[ap->num_pages].offset = start;
+               fuse_page_descs_length_init(ap->descs, ap->num_pages, npages);
 
-               req->num_pages += npages;
-               req->page_descs[req->num_pages - 1].length -=
+               ap->num_pages += npages;
+               ap->descs[ap->num_pages - 1].length -=
                        (PAGE_SIZE - ret) & (PAGE_SIZE - 1);
        }
 
        if (write)
-               req->in.argpages = 1;
+               ap->args.in_pages = 1;
        else
-               req->out.argpages = 1;
+               ap->args.out_pages = 1;
 
        *nbytesp = nbytes;
 
@@ -1349,17 +1415,16 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
        pgoff_t idx_from = pos >> PAGE_SHIFT;
        pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT;
        ssize_t res = 0;
-       struct fuse_req *req;
        int err = 0;
+       struct fuse_io_args *ia;
+       unsigned int max_pages;
 
-       if (io->async)
-               req = fuse_get_req_for_background(fc, iov_iter_npages(iter,
-                                                               fc->max_pages));
-       else
-               req = fuse_get_req(fc, iov_iter_npages(iter, fc->max_pages));
-       if (IS_ERR(req))
-               return PTR_ERR(req);
+       max_pages = iov_iter_npages(iter, fc->max_pages);
+       ia = fuse_io_alloc(io, max_pages);
+       if (!ia)
+               return -ENOMEM;
 
+       ia->io = io;
        if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
                if (!write)
                        inode_lock(inode);
@@ -1370,54 +1435,49 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 
        io->should_dirty = !write && iter_is_iovec(iter);
        while (count) {
-               size_t nres;
+               ssize_t nres;
                fl_owner_t owner = current->files;
                size_t nbytes = min(count, nmax);
-               err = fuse_get_user_pages(req, iter, &nbytes, write);
+
+               err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write,
+                                         max_pages);
                if (err && !nbytes)
                        break;
 
                if (write) {
-                       if (!capable(CAP_FSETID)) {
-                               struct fuse_write_in *inarg;
+                       if (!capable(CAP_FSETID))
+                               ia->write.in.write_flags |= FUSE_WRITE_KILL_PRIV;
 
-                               inarg = &req->misc.write.in;
-                               inarg->write_flags |= FUSE_WRITE_KILL_PRIV;
-                       }
-                       nres = fuse_send_write(req, io, pos, nbytes, owner);
+                       nres = fuse_send_write(ia, pos, nbytes, owner);
                } else {
-                       nres = fuse_send_read(req, io, pos, nbytes, owner);
+                       nres = fuse_send_read(ia, pos, nbytes, owner);
                }
 
-               if (!io->async)
-                       fuse_release_user_pages(req, io->should_dirty);
-               if (req->out.h.error) {
-                       err = req->out.h.error;
-                       break;
-               } else if (nres > nbytes) {
-                       res = 0;
-                       err = -EIO;
+               if (!io->async || nres < 0) {
+                       fuse_release_user_pages(&ia->ap, io->should_dirty);
+                       fuse_io_free(ia);
+               }
+               ia = NULL;
+               if (nres < 0) {
+                       err = nres;
                        break;
                }
+               WARN_ON(nres > nbytes);
+
                count -= nres;
                res += nres;
                pos += nres;
                if (nres != nbytes)
                        break;
                if (count) {
-                       fuse_put_request(fc, req);
-                       if (io->async)
-                               req = fuse_get_req_for_background(fc,
-                                       iov_iter_npages(iter, fc->max_pages));
-                       else
-                               req = fuse_get_req(fc, iov_iter_npages(iter,
-                                                               fc->max_pages));
-                       if (IS_ERR(req))
+                       max_pages = iov_iter_npages(iter, fc->max_pages);
+                       ia = fuse_io_alloc(io, max_pages);
+                       if (!ia)
                                break;
                }
        }
-       if (!IS_ERR(req))
-               fuse_put_request(fc, req);
+       if (ia)
+               fuse_io_free(ia);
        if (res > 0)
                *ppos = pos;
 
@@ -1509,45 +1569,53 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                return fuse_direct_write_iter(iocb, from);
 }
 
-static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_writepage_free(struct fuse_writepage_args *wpa)
 {
+       struct fuse_args_pages *ap = &wpa->ia.ap;
        int i;
 
-       for (i = 0; i < req->num_pages; i++)
-               __free_page(req->pages[i]);
+       for (i = 0; i < ap->num_pages; i++)
+               __free_page(ap->pages[i]);
+
+       if (wpa->ia.ff)
+               fuse_file_put(wpa->ia.ff, false, false);
 
-       if (req->ff)
-               fuse_file_put(req->ff, false, false);
+       kfree(ap->pages);
+       kfree(wpa);
 }
 
-static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_writepage_finish(struct fuse_conn *fc,
+                                 struct fuse_writepage_args *wpa)
 {
-       struct inode *inode = req->inode;
+       struct fuse_args_pages *ap = &wpa->ia.ap;
+       struct inode *inode = wpa->inode;
        struct fuse_inode *fi = get_fuse_inode(inode);
        struct backing_dev_info *bdi = inode_to_bdi(inode);
        int i;
 
-       list_del(&req->writepages_entry);
-       for (i = 0; i < req->num_pages; i++) {
+       list_del(&wpa->writepages_entry);
+       for (i = 0; i < ap->num_pages; i++) {
                dec_wb_stat(&bdi->wb, WB_WRITEBACK);
-               dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP);
+               dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
                wb_writeout_inc(&bdi->wb);
        }
        wake_up(&fi->page_waitq);
 }
 
 /* Called under fi->lock, may release and reacquire it */
-static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
-                               loff_t size)
+static void fuse_send_writepage(struct fuse_conn *fc,
+                               struct fuse_writepage_args *wpa, loff_t size)
 __releases(fi->lock)
 __acquires(fi->lock)
 {
-       struct fuse_req *aux, *next;
-       struct fuse_inode *fi = get_fuse_inode(req->inode);
-       struct fuse_write_in *inarg = &req->misc.write.in;
-       __u64 data_size = req->num_pages * PAGE_SIZE;
-       bool queued;
+       struct fuse_writepage_args *aux, *next;
+       struct fuse_inode *fi = get_fuse_inode(wpa->inode);
+       struct fuse_write_in *inarg = &wpa->ia.write.in;
+       struct fuse_args *args = &wpa->ia.ap.args;
+       __u64 data_size = wpa->ia.ap.num_pages * PAGE_SIZE;
+       int err;
 
+       fi->writectr++;
        if (inarg->offset + data_size <= size) {
                inarg->size = data_size;
        } else if (inarg->offset < size) {
@@ -1557,29 +1625,36 @@ __acquires(fi->lock)
                goto out_free;
        }
 
-       req->in.args[1].size = inarg->size;
-       queued = fuse_request_queue_background(fc, req);
+       args->in_args[1].size = inarg->size;
+       args->force = true;
+       args->nocreds = true;
+
+       err = fuse_simple_background(fc, args, GFP_ATOMIC);
+       if (err == -ENOMEM) {
+               spin_unlock(&fi->lock);
+               err = fuse_simple_background(fc, args, GFP_NOFS | __GFP_NOFAIL);
+               spin_lock(&fi->lock);
+       }
+
        /* Fails on broken connection only */
-       if (unlikely(!queued))
+       if (unlikely(err))
                goto out_free;
 
-       fi->writectr++;
        return;
 
  out_free:
-       fuse_writepage_finish(fc, req);
+       fi->writectr--;
+       fuse_writepage_finish(fc, wpa);
        spin_unlock(&fi->lock);
 
        /* After fuse_writepage_finish() aux request list is private */
-       for (aux = req->misc.write.next; aux; aux = next) {
-               next = aux->misc.write.next;
-               aux->misc.write.next = NULL;
-               fuse_writepage_free(fc, aux);
-               fuse_put_request(fc, aux);
+       for (aux = wpa->next; aux; aux = next) {
+               next = aux->next;
+               aux->next = NULL;
+               fuse_writepage_free(aux);
        }
 
-       fuse_writepage_free(fc, req);
-       fuse_put_request(fc, req);
+       fuse_writepage_free(wpa);
        spin_lock(&fi->lock);
 }
 
@@ -1596,29 +1671,34 @@ __acquires(fi->lock)
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
        loff_t crop = i_size_read(inode);
-       struct fuse_req *req;
+       struct fuse_writepage_args *wpa;
 
        while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
-               req = list_entry(fi->queued_writes.next, struct fuse_req, list);
-               list_del_init(&req->list);
-               fuse_send_writepage(fc, req, crop);
+               wpa = list_entry(fi->queued_writes.next,
+                                struct fuse_writepage_args, queue_entry);
+               list_del_init(&wpa->queue_entry);
+               fuse_send_writepage(fc, wpa, crop);
        }
 }
 
-static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
+                              int error)
 {
-       struct inode *inode = req->inode;
+       struct fuse_writepage_args *wpa =
+               container_of(args, typeof(*wpa), ia.ap.args);
+       struct inode *inode = wpa->inode;
        struct fuse_inode *fi = get_fuse_inode(inode);
 
-       mapping_set_error(inode->i_mapping, req->out.h.error);
+       mapping_set_error(inode->i_mapping, error);
        spin_lock(&fi->lock);
-       while (req->misc.write.next) {
+       while (wpa->next) {
                struct fuse_conn *fc = get_fuse_conn(inode);
-               struct fuse_write_in *inarg = &req->misc.write.in;
-               struct fuse_req *next = req->misc.write.next;
-               req->misc.write.next = next->misc.write.next;
-               next->misc.write.next = NULL;
-               next->ff = fuse_file_get(req->ff);
+               struct fuse_write_in *inarg = &wpa->ia.write.in;
+               struct fuse_writepage_args *next = wpa->next;
+
+               wpa->next = next->next;
+               next->next = NULL;
+               next->ia.ff = fuse_file_get(wpa->ia.ff);
                list_add(&next->writepages_entry, &fi->writepages);
 
                /*
@@ -1647,9 +1727,9 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
                fuse_send_writepage(fc, next, inarg->offset + inarg->size);
        }
        fi->writectr--;
-       fuse_writepage_finish(fc, req);
+       fuse_writepage_finish(fc, wpa);
        spin_unlock(&fi->lock);
-       fuse_writepage_free(fc, req);
+       fuse_writepage_free(wpa);
 }
 
 static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
@@ -1691,52 +1771,71 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
        return err;
 }
 
+static struct fuse_writepage_args *fuse_writepage_args_alloc(void)
+{
+       struct fuse_writepage_args *wpa;
+       struct fuse_args_pages *ap;
+
+       wpa = kzalloc(sizeof(*wpa), GFP_NOFS);
+       if (wpa) {
+               ap = &wpa->ia.ap;
+               ap->num_pages = 0;
+               ap->pages = fuse_pages_alloc(1, GFP_NOFS, &ap->descs);
+               if (!ap->pages) {
+                       kfree(wpa);
+                       wpa = NULL;
+               }
+       }
+       return wpa;
+
+}
+
 static int fuse_writepage_locked(struct page *page)
 {
        struct address_space *mapping = page->mapping;
        struct inode *inode = mapping->host;
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
-       struct fuse_req *req;
+       struct fuse_writepage_args *wpa;
+       struct fuse_args_pages *ap;
        struct page *tmp_page;
        int error = -ENOMEM;
 
        set_page_writeback(page);
 
-       req = fuse_request_alloc_nofs(1);
-       if (!req)
+       wpa = fuse_writepage_args_alloc();
+       if (!wpa)
                goto err;
+       ap = &wpa->ia.ap;
 
-       /* writeback always goes to bg_queue */
-       __set_bit(FR_BACKGROUND, &req->flags);
        tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
        if (!tmp_page)
                goto err_free;
 
        error = -EIO;
-       req->ff = fuse_write_file_get(fc, fi);
-       if (!req->ff)
+       wpa->ia.ff = fuse_write_file_get(fc, fi);
+       if (!wpa->ia.ff)
                goto err_nofile;
 
-       fuse_write_fill(req, req->ff, page_offset(page), 0);
+       fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
 
        copy_highpage(tmp_page, page);
-       req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
-       req->misc.write.next = NULL;
-       req->in.argpages = 1;
-       req->num_pages = 1;
-       req->pages[0] = tmp_page;
-       req->page_descs[0].offset = 0;
-       req->page_descs[0].length = PAGE_SIZE;
-       req->end = fuse_writepage_end;
-       req->inode = inode;
+       wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
+       wpa->next = NULL;
+       ap->args.in_pages = true;
+       ap->num_pages = 1;
+       ap->pages[0] = tmp_page;
+       ap->descs[0].offset = 0;
+       ap->descs[0].length = PAGE_SIZE;
+       ap->args.end = fuse_writepage_end;
+       wpa->inode = inode;
 
        inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
        inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
 
        spin_lock(&fi->lock);
-       list_add(&req->writepages_entry, &fi->writepages);
-       list_add_tail(&req->list, &fi->queued_writes);
+       list_add(&wpa->writepages_entry, &fi->writepages);
+       list_add_tail(&wpa->queue_entry, &fi->queued_writes);
        fuse_flush_writepages(inode);
        spin_unlock(&fi->lock);
 
@@ -1747,7 +1846,7 @@ static int fuse_writepage_locked(struct page *page)
 err_nofile:
        __free_page(tmp_page);
 err_free:
-       fuse_request_free(req);
+       kfree(wpa);
 err:
        mapping_set_error(page->mapping, error);
        end_page_writeback(page);
@@ -1767,6 +1866,7 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc)
                WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
 
                redirty_page_for_writepage(wbc, page);
+               unlock_page(page);
                return 0;
        }
 
@@ -1777,23 +1877,50 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc)
 }
 
 struct fuse_fill_wb_data {
-       struct fuse_req *req;
+       struct fuse_writepage_args *wpa;
        struct fuse_file *ff;
        struct inode *inode;
        struct page **orig_pages;
+       unsigned int max_pages;
 };
 
+static bool fuse_pages_realloc(struct fuse_fill_wb_data *data)
+{
+       struct fuse_args_pages *ap = &data->wpa->ia.ap;
+       struct fuse_conn *fc = get_fuse_conn(data->inode);
+       struct page **pages;
+       struct fuse_page_desc *descs;
+       unsigned int npages = min_t(unsigned int,
+                                   max_t(unsigned int, data->max_pages * 2,
+                                         FUSE_DEFAULT_MAX_PAGES_PER_REQ),
+                                   fc->max_pages);
+       WARN_ON(npages <= data->max_pages);
+
+       pages = fuse_pages_alloc(npages, GFP_NOFS, &descs);
+       if (!pages)
+               return false;
+
+       memcpy(pages, ap->pages, sizeof(struct page *) * ap->num_pages);
+       memcpy(descs, ap->descs, sizeof(struct fuse_page_desc) * ap->num_pages);
+       kfree(ap->pages);
+       ap->pages = pages;
+       ap->descs = descs;
+       data->max_pages = npages;
+
+       return true;
+}
+
 static void fuse_writepages_send(struct fuse_fill_wb_data *data)
 {
-       struct fuse_req *req = data->req;
+       struct fuse_writepage_args *wpa = data->wpa;
        struct inode *inode = data->inode;
        struct fuse_inode *fi = get_fuse_inode(inode);
-       int num_pages = req->num_pages;
+       int num_pages = wpa->ia.ap.num_pages;
        int i;
 
-       req->ff = fuse_file_get(data->ff);
+       wpa->ia.ff = fuse_file_get(data->ff);
        spin_lock(&fi->lock);
-       list_add_tail(&req->list, &fi->queued_writes);
+       list_add_tail(&wpa->queue_entry, &fi->queued_writes);
        fuse_flush_writepages(inode);
        spin_unlock(&fi->lock);
 
@@ -1808,54 +1935,52 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
  * this new request onto the auxiliary list, otherwise reuse the existing one by
  * copying the new page contents over to the old temporary page.
  */
-static bool fuse_writepage_in_flight(struct fuse_req *new_req,
+static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa,
                                     struct page *page)
 {
-       struct fuse_conn *fc = get_fuse_conn(new_req->inode);
-       struct fuse_inode *fi = get_fuse_inode(new_req->inode);
-       struct fuse_req *tmp;
-       struct fuse_req *old_req;
+       struct fuse_inode *fi = get_fuse_inode(new_wpa->inode);
+       struct fuse_writepage_args *tmp;
+       struct fuse_writepage_args *old_wpa;
+       struct fuse_args_pages *new_ap = &new_wpa->ia.ap;
 
-       WARN_ON(new_req->num_pages != 0);
+       WARN_ON(new_ap->num_pages != 0);
 
        spin_lock(&fi->lock);
-       list_del(&new_req->writepages_entry);
-       old_req = fuse_find_writeback(fi, page->index, page->index);
-       if (!old_req) {
-               list_add(&new_req->writepages_entry, &fi->writepages);
+       list_del(&new_wpa->writepages_entry);
+       old_wpa = fuse_find_writeback(fi, page->index, page->index);
+       if (!old_wpa) {
+               list_add(&new_wpa->writepages_entry, &fi->writepages);
                spin_unlock(&fi->lock);
                return false;
        }
 
-       new_req->num_pages = 1;
-       for (tmp = old_req->misc.write.next; tmp; tmp = tmp->misc.write.next) {
+       new_ap->num_pages = 1;
+       for (tmp = old_wpa->next; tmp; tmp = tmp->next) {
                pgoff_t curr_index;
 
-               WARN_ON(tmp->inode != new_req->inode);
-               curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT;
+               WARN_ON(tmp->inode != new_wpa->inode);
+               curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT;
                if (curr_index == page->index) {
-                       WARN_ON(tmp->num_pages != 1);
-                       WARN_ON(!test_bit(FR_PENDING, &tmp->flags));
-                       swap(tmp->pages[0], new_req->pages[0]);
+                       WARN_ON(tmp->ia.ap.num_pages != 1);
+                       swap(tmp->ia.ap.pages[0], new_ap->pages[0]);
                        break;
                }
        }
 
        if (!tmp) {
-               new_req->misc.write.next = old_req->misc.write.next;
-               old_req->misc.write.next = new_req;
+               new_wpa->next = old_wpa->next;
+               old_wpa->next = new_wpa;
        }
 
        spin_unlock(&fi->lock);
 
        if (tmp) {
-               struct backing_dev_info *bdi = inode_to_bdi(new_req->inode);
+               struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode);
 
                dec_wb_stat(&bdi->wb, WB_WRITEBACK);
-               dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP);
+               dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP);
                wb_writeout_inc(&bdi->wb);
-               fuse_writepage_free(fc, new_req);
-               fuse_request_free(new_req);
+               fuse_writepage_free(new_wpa);
        }
 
        return true;
@@ -1865,7 +1990,8 @@ static int fuse_writepages_fill(struct page *page,
                struct writeback_control *wbc, void *_data)
 {
        struct fuse_fill_wb_data *data = _data;
-       struct fuse_req *req = data->req;
+       struct fuse_writepage_args *wpa = data->wpa;
+       struct fuse_args_pages *ap = &wpa->ia.ap;
        struct inode *inode = data->inode;
        struct fuse_inode *fi = get_fuse_inode(inode);
        struct fuse_conn *fc = get_fuse_conn(inode);
@@ -1875,7 +2001,7 @@ static int fuse_writepages_fill(struct page *page,
 
        if (!data->ff) {
                err = -EIO;
-               data->ff = fuse_write_file_get(fc, get_fuse_inode(inode));
+               data->ff = fuse_write_file_get(fc, fi);
                if (!data->ff)
                        goto out_unlock;
        }
@@ -1888,16 +2014,16 @@ static int fuse_writepages_fill(struct page *page,
         */
        is_writeback = fuse_page_is_writeback(inode, page->index);
 
-       if (req && req->num_pages &&
-           (is_writeback || req->num_pages == fc->max_pages ||
-            (req->num_pages + 1) * PAGE_SIZE > fc->max_write ||
-            data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
+       if (wpa && ap->num_pages &&
+           (is_writeback || ap->num_pages == fc->max_pages ||
+            (ap->num_pages + 1) * PAGE_SIZE > fc->max_write ||
+            data->orig_pages[ap->num_pages - 1]->index + 1 != page->index)) {
                fuse_writepages_send(data);
-               data->req = NULL;
-       } else if (req && req->num_pages == req->max_pages) {
-               if (!fuse_req_realloc_pages(fc, req, GFP_NOFS)) {
+               data->wpa = NULL;
+       } else if (wpa && ap->num_pages == data->max_pages) {
+               if (!fuse_pages_realloc(data)) {
                        fuse_writepages_send(data);
-                       req = data->req = NULL;
+                       data->wpa = NULL;
                }
        }
 
@@ -1915,59 +2041,58 @@ static int fuse_writepages_fill(struct page *page,
         * This is ensured by holding the page lock in page_mkwrite() while
         * checking fuse_page_is_writeback().  We already hold the page lock
         * since clear_page_dirty_for_io() and keep it held until we add the
-        * request to the fi->writepages list and increment req->num_pages.
+        * request to the fi->writepages list and increment ap->num_pages.
         * After this fuse_page_is_writeback() will indicate that the page is
         * under writeback, so we can release the page lock.
         */
-       if (data->req == NULL) {
-               struct fuse_inode *fi = get_fuse_inode(inode);
-
+       if (data->wpa == NULL) {
                err = -ENOMEM;
-               req = fuse_request_alloc_nofs(FUSE_REQ_INLINE_PAGES);
-               if (!req) {
+               wpa = fuse_writepage_args_alloc();
+               if (!wpa) {
                        __free_page(tmp_page);
                        goto out_unlock;
                }
+               data->max_pages = 1;
 
-               fuse_write_fill(req, data->ff, page_offset(page), 0);
-               req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
-               req->misc.write.next = NULL;
-               req->in.argpages = 1;
-               __set_bit(FR_BACKGROUND, &req->flags);
-               req->num_pages = 0;
-               req->end = fuse_writepage_end;
-               req->inode = inode;
+               ap = &wpa->ia.ap;
+               fuse_write_args_fill(&wpa->ia, data->ff, page_offset(page), 0);
+               wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
+               wpa->next = NULL;
+               ap->args.in_pages = true;
+               ap->args.end = fuse_writepage_end;
+               ap->num_pages = 0;
+               wpa->inode = inode;
 
                spin_lock(&fi->lock);
-               list_add(&req->writepages_entry, &fi->writepages);
+               list_add(&wpa->writepages_entry, &fi->writepages);
                spin_unlock(&fi->lock);
 
-               data->req = req;
+               data->wpa = wpa;
        }
        set_page_writeback(page);
 
        copy_highpage(tmp_page, page);
-       req->pages[req->num_pages] = tmp_page;
-       req->page_descs[req->num_pages].offset = 0;
-       req->page_descs[req->num_pages].length = PAGE_SIZE;
+       ap->pages[ap->num_pages] = tmp_page;
+       ap->descs[ap->num_pages].offset = 0;
+       ap->descs[ap->num_pages].length = PAGE_SIZE;
 
        inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
        inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
 
        err = 0;
-       if (is_writeback && fuse_writepage_in_flight(req, page)) {
+       if (is_writeback && fuse_writepage_in_flight(wpa, page)) {
                end_page_writeback(page);
-               data->req = NULL;
+               data->wpa = NULL;
                goto out_unlock;
        }
-       data->orig_pages[req->num_pages] = page;
+       data->orig_pages[ap->num_pages] = page;
 
        /*
         * Protected by fi->lock against concurrent access by
         * fuse_page_is_writeback().
         */
        spin_lock(&fi->lock);
-       req->num_pages++;
+       ap->num_pages++;
        spin_unlock(&fi->lock);
 
 out_unlock:
@@ -1989,7 +2114,7 @@ static int fuse_writepages(struct address_space *mapping,
                goto out;
 
        data.inode = inode;
-       data.req = NULL;
+       data.wpa = NULL;
        data.ff = NULL;
 
        err = -ENOMEM;
@@ -2000,9 +2125,9 @@ static int fuse_writepages(struct address_space *mapping,
                goto out;
 
        err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
-       if (data.req) {
+       if (data.wpa) {
                /* Ignore errors if we can write at least one page */
-               BUG_ON(!data.req->num_pages);
+               WARN_ON(!data.wpa->ia.ap.num_pages);
                fuse_writepages_send(&data);
                err = 0;
        }
@@ -2222,11 +2347,11 @@ static void fuse_lk_fill(struct fuse_args *args, struct file *file,
        inarg->lk.pid = pid;
        if (flock)
                inarg->lk_flags |= FUSE_LK_FLOCK;
-       args->in.h.opcode = opcode;
-       args->in.h.nodeid = get_node_id(inode);
-       args->in.numargs = 1;
-       args->in.args[0].size = sizeof(*inarg);
-       args->in.args[0].value = inarg;
+       args->opcode = opcode;
+       args->nodeid = get_node_id(inode);
+       args->in_numargs = 1;
+       args->in_args[0].size = sizeof(*inarg);
+       args->in_args[0].value = inarg;
 }
 
 static int fuse_getlk(struct file *file, struct file_lock *fl)
@@ -2239,9 +2364,9 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
        int err;
 
        fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg);
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(outarg);
-       args.out.args[0].value = &outarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
        if (!err)
                err = convert_fuse_file_lock(fc, &outarg.lk, fl);
@@ -2336,14 +2461,14 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
        memset(&inarg, 0, sizeof(inarg));
        inarg.block = block;
        inarg.blocksize = inode->i_sb->s_blocksize;
-       args.in.h.opcode = FUSE_BMAP;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(outarg);
-       args.out.args[0].value = &outarg;
+       args.opcode = FUSE_BMAP;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
        if (err == -ENOSYS)
                fc->no_bmap = 1;
@@ -2368,14 +2493,14 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence)
        if (fc->no_lseek)
                goto fallback;
 
-       args.in.h.opcode = FUSE_LSEEK;
-       args.in.h.nodeid = ff->nodeid;
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(outarg);
-       args.out.args[0].value = &outarg;
+       args.opcode = FUSE_LSEEK;
+       args.nodeid = ff->nodeid;
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
        if (err) {
                if (err == -ENOSYS) {
@@ -2573,14 +2698,14 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
                .flags = flags
        };
        struct fuse_ioctl_out outarg;
-       struct fuse_req *req = NULL;
-       struct page **pages = NULL;
        struct iovec *iov_page = NULL;
        struct iovec *in_iov = NULL, *out_iov = NULL;
-       unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
-       size_t in_size, out_size, transferred, c;
+       unsigned int in_iovs = 0, out_iovs = 0, max_pages;
+       size_t in_size, out_size, c;
+       ssize_t transferred;
        int err, i;
        struct iov_iter ii;
+       struct fuse_args_pages ap = {};
 
 #if BITS_PER_LONG == 32
        inarg.flags |= FUSE_IOCTL_32BIT;
@@ -2598,11 +2723,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
        BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
 
        err = -ENOMEM;
-       pages = kcalloc(fc->max_pages, sizeof(pages[0]), GFP_KERNEL);
+       ap.pages = fuse_pages_alloc(fc->max_pages, GFP_KERNEL, &ap.descs);
        iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
-       if (!pages || !iov_page)
+       if (!ap.pages || !iov_page)
                goto out;
 
+       fuse_page_descs_length_init(ap.descs, 0, fc->max_pages);
+
        /*
         * If restricted, initialize IO parameters as encoded in @cmd.
         * RETRY from server is not allowed.
@@ -2639,56 +2766,44 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
        err = -ENOMEM;
        if (max_pages > fc->max_pages)
                goto out;
-       while (num_pages < max_pages) {
-               pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
-               if (!pages[num_pages])
+       while (ap.num_pages < max_pages) {
+               ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+               if (!ap.pages[ap.num_pages])
                        goto out;
-               num_pages++;
+               ap.num_pages++;
        }
 
-       req = fuse_get_req(fc, num_pages);
-       if (IS_ERR(req)) {
-               err = PTR_ERR(req);
-               req = NULL;
-               goto out;
-       }
-       memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
-       req->num_pages = num_pages;
-       fuse_page_descs_length_init(req, 0, req->num_pages);
 
        /* okay, let's send it to the client */
-       req->in.h.opcode = FUSE_IOCTL;
-       req->in.h.nodeid = ff->nodeid;
-       req->in.numargs = 1;
-       req->in.args[0].size = sizeof(inarg);
-       req->in.args[0].value = &inarg;
+       ap.args.opcode = FUSE_IOCTL;
+       ap.args.nodeid = ff->nodeid;
+       ap.args.in_numargs = 1;
+       ap.args.in_args[0].size = sizeof(inarg);
+       ap.args.in_args[0].value = &inarg;
        if (in_size) {
-               req->in.numargs++;
-               req->in.args[1].size = in_size;
-               req->in.argpages = 1;
+               ap.args.in_numargs++;
+               ap.args.in_args[1].size = in_size;
+               ap.args.in_pages = true;
 
                err = -EFAULT;
                iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size);
-               for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
-                       c = copy_page_from_iter(pages[i], 0, PAGE_SIZE, &ii);
+               for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
+                       c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
                        if (c != PAGE_SIZE && iov_iter_count(&ii))
                                goto out;
                }
        }
 
-       req->out.numargs = 2;
-       req->out.args[0].size = sizeof(outarg);
-       req->out.args[0].value = &outarg;
-       req->out.args[1].size = out_size;
-       req->out.argpages = 1;
-       req->out.argvar = 1;
+       ap.args.out_numargs = 2;
+       ap.args.out_args[0].size = sizeof(outarg);
+       ap.args.out_args[0].value = &outarg;
+       ap.args.out_args[1].size = out_size;
+       ap.args.out_pages = true;
+       ap.args.out_argvar = true;
 
-       fuse_request_send(fc, req);
-       err = req->out.h.error;
-       transferred = req->out.args[1].size;
-       fuse_put_request(fc, req);
-       req = NULL;
-       if (err)
+       transferred = fuse_simple_request(fc, &ap.args);
+       err = transferred;
+       if (transferred < 0)
                goto out;
 
        /* did it ask for retry? */
@@ -2713,7 +2828,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
                    in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
                        goto out;
 
-               vaddr = kmap_atomic(pages[0]);
+               vaddr = kmap_atomic(ap.pages[0]);
                err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
                                            transferred, in_iovs + out_iovs,
                                            (flags & FUSE_IOCTL_COMPAT) != 0);
@@ -2741,19 +2856,17 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 
        err = -EFAULT;
        iov_iter_init(&ii, READ, out_iov, out_iovs, transferred);
-       for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
-               c = copy_page_to_iter(pages[i], 0, PAGE_SIZE, &ii);
+       for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
+               c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
                if (c != PAGE_SIZE && iov_iter_count(&ii))
                        goto out;
        }
        err = 0;
  out:
-       if (req)
-               fuse_put_request(fc, req);
        free_page((unsigned long) iov_page);
-       while (num_pages)
-               __free_page(pages[--num_pages]);
-       kfree(pages);
+       while (ap.num_pages)
+               __free_page(ap.pages[--ap.num_pages]);
+       kfree(ap.pages);
 
        return err ? err : outarg.result;
 }
@@ -2861,14 +2974,14 @@ __poll_t fuse_file_poll(struct file *file, poll_table *wait)
                fuse_register_polled_file(fc, ff);
        }
 
-       args.in.h.opcode = FUSE_POLL;
-       args.in.h.nodeid = ff->nodeid;
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(outarg);
-       args.out.args[0].value = &outarg;
+       args.opcode = FUSE_POLL;
+       args.nodeid = ff->nodeid;
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
 
        if (!err)
@@ -3076,11 +3189,11 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
        if (!(mode & FALLOC_FL_KEEP_SIZE))
                set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
 
-       args.in.h.opcode = FUSE_FALLOCATE;
-       args.in.h.nodeid = ff->nodeid;
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
+       args.opcode = FUSE_FALLOCATE;
+       args.nodeid = ff->nodeid;
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
        err = fuse_simple_request(fc, &args);
        if (err == -ENOSYS) {
                fc->no_fallocate = 1;
@@ -3168,14 +3281,14 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
        if (is_unstable)
                set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
 
-       args.in.h.opcode = FUSE_COPY_FILE_RANGE;
-       args.in.h.nodeid = ff_in->nodeid;
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(outarg);
-       args.out.args[0].value = &outarg;
+       args.opcode = FUSE_COPY_FILE_RANGE;
+       args.nodeid = ff_in->nodeid;
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
        if (err == -ENOSYS) {
                fc->no_copy_file_range = 1;
index 24dbca7..d148188 100644 (file)
@@ -47,9 +47,6 @@
 /** Number of dentries for each connection in the control filesystem */
 #define FUSE_CTL_NUM_DENTRIES 5
 
-/** Number of page pointers embedded in fuse_req */
-#define FUSE_REQ_INLINE_PAGES 1
-
 /** List of active connections */
 extern struct list_head fuse_conn_list;
 
@@ -164,17 +161,15 @@ enum {
 };
 
 struct fuse_conn;
+struct fuse_release_args;
 
 /** FUSE specific file data */
 struct fuse_file {
        /** Fuse connection for this file */
        struct fuse_conn *fc;
 
-       /*
-        * Request reserved for flush and release.
-        * Modified under relative fuse_inode::lock.
-        */
-       struct fuse_req *reserved_req;
+       /* Argument space reserved for release */
+       struct fuse_release_args *release_args;
 
        /** Kernel file handle guaranteed to be unique */
        u64 kh;
@@ -229,57 +224,12 @@ struct fuse_in_arg {
        const void *value;
 };
 
-/** The request input */
-struct fuse_in {
-       /** The request header */
-       struct fuse_in_header h;
-
-       /** True if the data for the last argument is in req->pages */
-       unsigned argpages:1;
-
-       /** Number of arguments */
-       unsigned numargs;
-
-       /** Array of arguments */
-       struct fuse_in_arg args[3];
-};
-
 /** One output argument of a request */
 struct fuse_arg {
        unsigned size;
        void *value;
 };
 
-/** The request output */
-struct fuse_out {
-       /** Header returned from userspace */
-       struct fuse_out_header h;
-
-       /*
-        * The following bitfields are not changed during the request
-        * processing
-        */
-
-       /** Last argument is variable length (can be shorter than
-           arg->size) */
-       unsigned argvar:1;
-
-       /** Last argument is a list of pages to copy data to */
-       unsigned argpages:1;
-
-       /** Zero partially or not copied pages */
-       unsigned page_zeroing:1;
-
-       /** Pages may be replaced with new ones */
-       unsigned page_replace:1;
-
-       /** Number or arguments */
-       unsigned numargs;
-
-       /** Array of arguments */
-       struct fuse_arg args[2];
-};
-
 /** FUSE page descriptor */
 struct fuse_page_desc {
        unsigned int length;
@@ -287,20 +237,28 @@ struct fuse_page_desc {
 };
 
 struct fuse_args {
-       struct {
-               struct {
-                       uint32_t opcode;
-                       uint64_t nodeid;
-               } h;
-               unsigned numargs;
-               struct fuse_in_arg args[3];
+       uint64_t nodeid;
+       uint32_t opcode;
+       unsigned short in_numargs;
+       unsigned short out_numargs;
+       bool force:1;
+       bool noreply:1;
+       bool nocreds:1;
+       bool in_pages:1;
+       bool out_pages:1;
+       bool out_argvar:1;
+       bool page_zeroing:1;
+       bool page_replace:1;
+       struct fuse_in_arg in_args[3];
+       struct fuse_arg out_args[2];
+       void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error);
+};
 
-       } in;
-       struct {
-               unsigned argvar:1;
-               unsigned numargs;
-               struct fuse_arg args[2];
-       } out;
+struct fuse_args_pages {
+       struct fuse_args args;
+       struct page **pages;
+       struct fuse_page_desc *descs;
+       unsigned int num_pages;
 };
 
 #define FUSE_ARGS(args) struct fuse_args args = {}
@@ -373,83 +331,79 @@ struct fuse_req {
        /** Entry on the interrupts list  */
        struct list_head intr_entry;
 
+       /* Input/output arguments */
+       struct fuse_args *args;
+
        /** refcount */
        refcount_t count;
 
        /* Request flags, updated with test/set/clear_bit() */
        unsigned long flags;
 
-       /** The request input */
-       struct fuse_in in;
+       /* The request input header */
+       struct {
+               struct fuse_in_header h;
+       } in;
 
-       /** The request output */
-       struct fuse_out out;
+       /* The request output header */
+       struct {
+               struct fuse_out_header h;
+       } out;
 
        /** Used to wake up the task waiting for completion of request*/
        wait_queue_head_t waitq;
 
-       /** Data for asynchronous requests */
-       union {
-               struct {
-                       struct fuse_release_in in;
-                       struct inode *inode;
-               } release;
-               struct fuse_init_in init_in;
-               struct fuse_init_out init_out;
-               struct cuse_init_in cuse_init_in;
-               struct {
-                       struct fuse_read_in in;
-                       u64 attr_ver;
-               } read;
-               struct {
-                       struct fuse_write_in in;
-                       struct fuse_write_out out;
-                       struct fuse_req *next;
-               } write;
-               struct fuse_notify_retrieve_in retrieve_in;
-       } misc;
-
-       /** page vector */
-       struct page **pages;
-
-       /** page-descriptor vector */
-       struct fuse_page_desc *page_descs;
-
-       /** size of the 'pages' array */
-       unsigned max_pages;
-
-       /** inline page vector */
-       struct page *inline_pages[FUSE_REQ_INLINE_PAGES];
-
-       /** inline page-descriptor vector */
-       struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES];
-
-       /** number of pages in vector */
-       unsigned num_pages;
-
-       /** File used in the request (or NULL) */
-       struct fuse_file *ff;
+#if IS_ENABLED(CONFIG_VIRTIO_FS)
+       /** virtio-fs's physically contiguous buffer for in and out args */
+       void *argbuf;
+#endif
+};
 
-       /** Inode used in the request or NULL */
-       struct inode *inode;
+struct fuse_iqueue;
 
-       /** AIO control block */
-       struct fuse_io_priv *io;
+/**
+ * Input queue callbacks
+ *
+ * Input queue signalling is device-specific.  For example, the /dev/fuse file
+ * uses fiq->waitq and fasync to wake processes that are waiting on queue
+ * readiness.  These callbacks allow other device types to respond to input
+ * queue activity.
+ */
+struct fuse_iqueue_ops {
+       /**
+        * Signal that a forget has been queued
+        */
+       void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq)
+               __releases(fiq->lock);
 
-       /** Link on fi->writepages */
-       struct list_head writepages_entry;
+       /**
+        * Signal that an INTERRUPT request has been queued
+        */
+       void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq)
+               __releases(fiq->lock);
 
-       /** Request completion callback */
-       void (*end)(struct fuse_conn *, struct fuse_req *);
+       /**
+        * Signal that a request has been queued
+        */
+       void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq)
+               __releases(fiq->lock);
 
-       /** Request is stolen from fuse_file->reserved_req */
-       struct file *stolen_file;
+       /**
+        * Clean up when fuse_iqueue is destroyed
+        */
+       void (*release)(struct fuse_iqueue *fiq);
 };
 
+/** /dev/fuse input queue operations */
+extern const struct fuse_iqueue_ops fuse_dev_fiq_ops;
+
 struct fuse_iqueue {
        /** Connection established */
        unsigned connected;
 
+       /** Lock protecting accesses to members of this structure */
+       spinlock_t lock;
+
        /** Readers of the connection are waiting on this */
        wait_queue_head_t waitq;
 
@@ -471,6 +425,12 @@ struct fuse_iqueue {
 
        /** O_ASYNC requests */
        struct fasync_struct *fasync;
+
+       /** Device-specific callbacks */
+       const struct fuse_iqueue_ops *ops;
+
+       /** Device-specific state */
+       void *priv;
 };
 
 #define FUSE_PQ_HASH_BITS 8
@@ -504,6 +464,30 @@ struct fuse_dev {
        struct list_head entry;
 };
 
+struct fuse_fs_context {
+       int fd;
+       unsigned int rootmode;
+       kuid_t user_id;
+       kgid_t group_id;
+       bool is_bdev:1;
+       bool fd_present:1;
+       bool rootmode_present:1;
+       bool user_id_present:1;
+       bool group_id_present:1;
+       bool default_permissions:1;
+       bool allow_other:1;
+       bool destroy:1;
+       bool no_control:1;
+       bool no_force_umount:1;
+       bool no_mount_options:1;
+       unsigned int max_read;
+       unsigned int blksize;
+       const char *subtype;
+
+       /* fuse_dev pointer to fill in, should contain NULL on entry */
+       void **fudptr;
+};
+
 /**
  * A Fuse connection.
  *
@@ -584,9 +568,6 @@ struct fuse_conn {
        /** waitq for blocked connection */
        wait_queue_head_t blocked_waitq;
 
-       /** waitq for reserved requests */
-       wait_queue_head_t reserved_req_waitq;
-
        /** Connection established, cleared on umount, connection
            abort and device release */
        unsigned connected;
@@ -721,6 +702,21 @@ struct fuse_conn {
        /** Does the filesystem support copy_file_range? */
        unsigned no_copy_file_range:1;
 
+       /* Send DESTROY request */
+       unsigned int destroy:1;
+
+       /* Delete dentries that have gone stale */
+       unsigned int delete_stale:1;
+
+       /** Do not create entry in fusectl fs */
+       unsigned int no_control:1;
+
+       /** Do not allow MNT_FORCE umount */
+       unsigned int no_force_umount:1;
+
+       /* Do not show mount options */
+       unsigned int no_mount_options:1;
+
        /** The number of requests waiting for completion */
        atomic_t num_waiting;
 
@@ -742,9 +738,6 @@ struct fuse_conn {
        /** Key for lock owner ID scrambling */
        u32 scramble_key[4];
 
-       /** Reserved request for the DESTROY message */
-       struct fuse_req *destroy_req;
-
        /** Version counter for attribute changes */
        atomic64_t attr_version;
 
@@ -820,14 +813,32 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 
 struct fuse_forget_link *fuse_alloc_forget(void);
 
-/* Used by READDIRPLUS */
-void fuse_force_forget(struct file *file, u64 nodeid);
+struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
+                                            unsigned int max,
+                                            unsigned int *countp);
 
-/**
+/*
  * Initialize READ or READDIR request
  */
-void fuse_read_fill(struct fuse_req *req, struct file *file,
-                   loff_t pos, size_t count, int opcode);
+struct fuse_io_args {
+       union {
+               struct {
+                       struct fuse_read_in in;
+                       u64 attr_ver;
+               } read;
+               struct {
+                       struct fuse_write_in in;
+                       struct fuse_write_out out;
+               } write;
+       };
+       struct fuse_args_pages ap;
+       struct fuse_io_priv *io;
+       struct fuse_file *ff;
+};
+
+void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
+                        size_t count, int opcode);
+
 
 /**
  * Send OPEN or OPENDIR request
@@ -899,62 +910,17 @@ void fuse_dev_cleanup(void);
 int fuse_ctl_init(void);
 void __exit fuse_ctl_cleanup(void);
 
-/**
- * Allocate a request
- */
-struct fuse_req *fuse_request_alloc(unsigned npages);
-
-struct fuse_req *fuse_request_alloc_nofs(unsigned npages);
-
-bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req,
-                           gfp_t flags);
-
-
-/**
- * Free a request
- */
-void fuse_request_free(struct fuse_req *req);
-
-/**
- * Get a request, may fail with -ENOMEM,
- * caller should specify # elements in req->pages[] explicitly
- */
-struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages);
-struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
-                                            unsigned npages);
-
-/*
- * Increment reference count on request
- */
-void __fuse_get_request(struct fuse_req *req);
-
-/**
- * Gets a requests for a file operation, always succeeds
- */
-struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
-                                            struct file *file);
-
-/**
- * Decrement reference count of a request.  If count goes to zero free
- * the request.
- */
-void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
-
-/**
- * Send a request (synchronous)
- */
-void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
-
 /**
  * Simple request sending that does request allocation and freeing
  */
 ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args);
+int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args,
+                          gfp_t gfp_flags);
 
 /**
- * Send a request in the background
+ * End a finished request
  */
-void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
-bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req);
+void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req);
 
 /* Abort all requests */
 void fuse_abort_conn(struct fuse_conn *fc);
@@ -980,15 +946,33 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
 /**
  * Initialize fuse_conn
  */
-void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns);
+void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
+                   const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv);
 
 /**
  * Release reference to fuse_conn
  */
 void fuse_conn_put(struct fuse_conn *fc);
 
-struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc);
+struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc);
+struct fuse_dev *fuse_dev_alloc(void);
+void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc);
 void fuse_dev_free(struct fuse_dev *fud);
+void fuse_send_init(struct fuse_conn *fc);
+
+/**
+ * Fill in superblock and initialize fuse connection
+ * @sb: partially-initialized superblock to fill in
+ * @ctx: mount context
+ */
+int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx);
+
+/**
+ * Disassociate fuse connection from superblock and kill the superblock
+ *
+ * Calls kill_anon_super(), do not use with bdev mounts.
+ */
+void fuse_kill_sb_anon(struct super_block *sb);
 
 /**
  * Add connection to control filesystem
@@ -1093,4 +1077,15 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 /* readdir.c */
 int fuse_readdir(struct file *file, struct dir_context *ctx);
 
+/**
+ * Return the number of bytes in an arguments list
+ */
+unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args);
+
+/**
+ * Get the next unique ID for a request
+ */
+u64 fuse_get_unique(struct fuse_iqueue *fiq);
+void fuse_free_conn(struct fuse_conn *fc);
+
 #endif /* _FS_FUSE_I_H */
index 4bb885b..16aec32 100644 (file)
@@ -15,7 +15,8 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/parser.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
 #include <linux/statfs.h>
 #include <linux/random.h>
 #include <linux/sched.h>
@@ -59,24 +60,13 @@ MODULE_PARM_DESC(max_user_congthresh,
 /** Congestion starts at 75% of maximum */
 #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
 
-struct fuse_mount_data {
-       int fd;
-       unsigned rootmode;
-       kuid_t user_id;
-       kgid_t group_id;
-       unsigned fd_present:1;
-       unsigned rootmode_present:1;
-       unsigned user_id_present:1;
-       unsigned group_id_present:1;
-       unsigned default_permissions:1;
-       unsigned allow_other:1;
-       unsigned max_read;
-       unsigned blksize;
-};
+#ifdef CONFIG_BLOCK
+static struct file_system_type fuseblk_fs_type;
+#endif
 
 struct fuse_forget_link *fuse_alloc_forget(void)
 {
-       return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
+       return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
 }
 
 static struct inode *fuse_alloc_inode(struct super_block *sb)
@@ -374,19 +364,21 @@ void fuse_unlock_inode(struct inode *inode, bool locked)
 
 static void fuse_umount_begin(struct super_block *sb)
 {
-       fuse_abort_conn(get_fuse_conn_super(sb));
+       struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+       if (!fc->no_force_umount)
+               fuse_abort_conn(fc);
 }
 
 static void fuse_send_destroy(struct fuse_conn *fc)
 {
-       struct fuse_req *req = fc->destroy_req;
-       if (req && fc->conn_init) {
-               fc->destroy_req = NULL;
-               req->in.h.opcode = FUSE_DESTROY;
-               __set_bit(FR_FORCE, &req->flags);
-               __clear_bit(FR_BACKGROUND, &req->flags);
-               fuse_request_send(fc, req);
-               fuse_put_request(fc, req);
+       if (fc->conn_init) {
+               FUSE_ARGS(args);
+
+               args.opcode = FUSE_DESTROY;
+               args.force = true;
+               args.nocreds = true;
+               fuse_simple_request(fc, &args);
        }
 }
 
@@ -430,12 +422,12 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
        }
 
        memset(&outarg, 0, sizeof(outarg));
-       args.in.numargs = 0;
-       args.in.h.opcode = FUSE_STATFS;
-       args.in.h.nodeid = get_node_id(d_inode(dentry));
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(outarg);
-       args.out.args[0].value = &outarg;
+       args.in_numargs = 0;
+       args.opcode = FUSE_STATFS;
+       args.nodeid = get_node_id(d_inode(dentry));
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
        if (!err)
                convert_fuse_statfs(buf, &outarg.st);
@@ -443,6 +435,8 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
 }
 
 enum {
+       OPT_SOURCE,
+       OPT_SUBTYPE,
        OPT_FD,
        OPT_ROOTMODE,
        OPT_USER_ID,
@@ -454,111 +448,109 @@ enum {
        OPT_ERR
 };
 
-static const match_table_t tokens = {
-       {OPT_FD,                        "fd=%u"},
-       {OPT_ROOTMODE,                  "rootmode=%o"},
-       {OPT_USER_ID,                   "user_id=%u"},
-       {OPT_GROUP_ID,                  "group_id=%u"},
-       {OPT_DEFAULT_PERMISSIONS,       "default_permissions"},
-       {OPT_ALLOW_OTHER,               "allow_other"},
-       {OPT_MAX_READ,                  "max_read=%u"},
-       {OPT_BLKSIZE,                   "blksize=%u"},
-       {OPT_ERR,                       NULL}
+static const struct fs_parameter_spec fuse_param_specs[] = {
+       fsparam_string  ("source",              OPT_SOURCE),
+       fsparam_u32     ("fd",                  OPT_FD),
+       fsparam_u32oct  ("rootmode",            OPT_ROOTMODE),
+       fsparam_u32     ("user_id",             OPT_USER_ID),
+       fsparam_u32     ("group_id",            OPT_GROUP_ID),
+       fsparam_flag    ("default_permissions", OPT_DEFAULT_PERMISSIONS),
+       fsparam_flag    ("allow_other",         OPT_ALLOW_OTHER),
+       fsparam_u32     ("max_read",            OPT_MAX_READ),
+       fsparam_u32     ("blksize",             OPT_BLKSIZE),
+       fsparam_string  ("subtype",             OPT_SUBTYPE),
+       {}
+};
+
+static const struct fs_parameter_description fuse_fs_parameters = {
+       .name           = "fuse",
+       .specs          = fuse_param_specs,
 };
 
-static int fuse_match_uint(substring_t *s, unsigned int *res)
+static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
-       int err = -ENOMEM;
-       char *buf = match_strdup(s);
-       if (buf) {
-               err = kstrtouint(buf, 10, res);
-               kfree(buf);
+       struct fs_parse_result result;
+       struct fuse_fs_context *ctx = fc->fs_private;
+       int opt;
+
+       opt = fs_parse(fc, &fuse_fs_parameters, param, &result);
+       if (opt < 0)
+               return opt;
+
+       switch (opt) {
+       case OPT_SOURCE:
+               if (fc->source)
+                       return invalf(fc, "fuse: Multiple sources specified");
+               fc->source = param->string;
+               param->string = NULL;
+               break;
+
+       case OPT_SUBTYPE:
+               if (ctx->subtype)
+                       return invalf(fc, "fuse: Multiple subtypes specified");
+               ctx->subtype = param->string;
+               param->string = NULL;
+               return 0;
+
+       case OPT_FD:
+               ctx->fd = result.uint_32;
+               ctx->fd_present = 1;
+               break;
+
+       case OPT_ROOTMODE:
+               if (!fuse_valid_type(result.uint_32))
+                       return invalf(fc, "fuse: Invalid rootmode");
+               ctx->rootmode = result.uint_32;
+               ctx->rootmode_present = 1;
+               break;
+
+       case OPT_USER_ID:
+               ctx->user_id = make_kuid(fc->user_ns, result.uint_32);
+               if (!uid_valid(ctx->user_id))
+                       return invalf(fc, "fuse: Invalid user_id");
+               ctx->user_id_present = 1;
+               break;
+
+       case OPT_GROUP_ID:
+               ctx->group_id = make_kgid(fc->user_ns, result.uint_32);
+               if (!gid_valid(ctx->group_id))
+                       return invalf(fc, "fuse: Invalid group_id");
+               ctx->group_id_present = 1;
+               break;
+
+       case OPT_DEFAULT_PERMISSIONS:
+               ctx->default_permissions = 1;
+               break;
+
+       case OPT_ALLOW_OTHER:
+               ctx->allow_other = 1;
+               break;
+
+       case OPT_MAX_READ:
+               ctx->max_read = result.uint_32;
+               break;
+
+       case OPT_BLKSIZE:
+               if (!ctx->is_bdev)
+                       return invalf(fc, "fuse: blksize only supported for fuseblk");
+               ctx->blksize = result.uint_32;
+               break;
+
+       default:
+               return -EINVAL;
        }
-       return err;
+
+       return 0;
 }
 
-static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
-                         struct user_namespace *user_ns)
+static void fuse_free_fc(struct fs_context *fc)
 {
-       char *p;
-       memset(d, 0, sizeof(struct fuse_mount_data));
-       d->max_read = ~0;
-       d->blksize = FUSE_DEFAULT_BLKSIZE;
-
-       while ((p = strsep(&opt, ",")) != NULL) {
-               int token;
-               int value;
-               unsigned uv;
-               substring_t args[MAX_OPT_ARGS];
-               if (!*p)
-                       continue;
-
-               token = match_token(p, tokens, args);
-               switch (token) {
-               case OPT_FD:
-                       if (match_int(&args[0], &value))
-                               return 0;
-                       d->fd = value;
-                       d->fd_present = 1;
-                       break;
-
-               case OPT_ROOTMODE:
-                       if (match_octal(&args[0], &value))
-                               return 0;
-                       if (!fuse_valid_type(value))
-                               return 0;
-                       d->rootmode = value;
-                       d->rootmode_present = 1;
-                       break;
-
-               case OPT_USER_ID:
-                       if (fuse_match_uint(&args[0], &uv))
-                               return 0;
-                       d->user_id = make_kuid(user_ns, uv);
-                       if (!uid_valid(d->user_id))
-                               return 0;
-                       d->user_id_present = 1;
-                       break;
-
-               case OPT_GROUP_ID:
-                       if (fuse_match_uint(&args[0], &uv))
-                               return 0;
-                       d->group_id = make_kgid(user_ns, uv);
-                       if (!gid_valid(d->group_id))
-                               return 0;
-                       d->group_id_present = 1;
-                       break;
-
-               case OPT_DEFAULT_PERMISSIONS:
-                       d->default_permissions = 1;
-                       break;
-
-               case OPT_ALLOW_OTHER:
-                       d->allow_other = 1;
-                       break;
-
-               case OPT_MAX_READ:
-                       if (match_int(&args[0], &value))
-                               return 0;
-                       d->max_read = value;
-                       break;
-
-               case OPT_BLKSIZE:
-                       if (!is_bdev || match_int(&args[0], &value))
-                               return 0;
-                       d->blksize = value;
-                       break;
-
-               default:
-                       return 0;
-               }
-       }
+       struct fuse_fs_context *ctx = fc->fs_private;
 
-       if (!d->fd_present || !d->rootmode_present ||
-           !d->user_id_present || !d->group_id_present)
-               return 0;
-
-       return 1;
+       if (ctx) {
+               kfree(ctx->subtype);
+               kfree(ctx);
+       }
 }
 
 static int fuse_show_options(struct seq_file *m, struct dentry *root)
@@ -566,6 +558,9 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
        struct super_block *sb = root->d_sb;
        struct fuse_conn *fc = get_fuse_conn_super(sb);
 
+       if (fc->no_mount_options)
+               return 0;
+
        seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id));
        seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id));
        if (fc->default_permissions)
@@ -579,14 +574,19 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
        return 0;
 }
 
-static void fuse_iqueue_init(struct fuse_iqueue *fiq)
+static void fuse_iqueue_init(struct fuse_iqueue *fiq,
+                            const struct fuse_iqueue_ops *ops,
+                            void *priv)
 {
        memset(fiq, 0, sizeof(struct fuse_iqueue));
+       spin_lock_init(&fiq->lock);
        init_waitqueue_head(&fiq->waitq);
        INIT_LIST_HEAD(&fiq->pending);
        INIT_LIST_HEAD(&fiq->interrupts);
        fiq->forget_list_tail = &fiq->forget_list_head;
        fiq->connected = 1;
+       fiq->ops = ops;
+       fiq->priv = priv;
 }
 
 static void fuse_pqueue_init(struct fuse_pqueue *fpq)
@@ -600,7 +600,8 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq)
        fpq->connected = 1;
 }
 
-void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
+void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
+                   const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
 {
        memset(fc, 0, sizeof(*fc));
        spin_lock_init(&fc->lock);
@@ -609,8 +610,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
        refcount_set(&fc->count, 1);
        atomic_set(&fc->dev_count, 1);
        init_waitqueue_head(&fc->blocked_waitq);
-       init_waitqueue_head(&fc->reserved_req_waitq);
-       fuse_iqueue_init(&fc->iq);
+       fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
        INIT_LIST_HEAD(&fc->bg_queue);
        INIT_LIST_HEAD(&fc->entry);
        INIT_LIST_HEAD(&fc->devices);
@@ -633,8 +633,10 @@ EXPORT_SYMBOL_GPL(fuse_conn_init);
 void fuse_conn_put(struct fuse_conn *fc)
 {
        if (refcount_dec_and_test(&fc->count)) {
-               if (fc->destroy_req)
-                       fuse_request_free(fc->destroy_req);
+               struct fuse_iqueue *fiq = &fc->iq;
+
+               if (fiq->ops->release)
+                       fiq->ops->release(fiq);
                put_pid_ns(fc->pid_ns);
                put_user_ns(fc->user_ns);
                fc->release(fc);
@@ -822,9 +824,12 @@ static const struct super_operations fuse_super_operations = {
 
 static void sanitize_global_limit(unsigned *limit)
 {
+       /*
+        * The default maximum number of async requests is calculated to consume
+        * 1/2^13 of the total memory, assuming 392 bytes per request.
+        */
        if (*limit == 0)
-               *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) /
-                        sizeof(struct fuse_req);
+               *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
 
        if (*limit >= 1 << 16)
                *limit = (1 << 16) - 1;
@@ -870,11 +875,19 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
        spin_unlock(&fc->bg_lock);
 }
 
-static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+struct fuse_init_args {
+       struct fuse_args args;
+       struct fuse_init_in in;
+       struct fuse_init_out out;
+};
+
+static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
+                              int error)
 {
-       struct fuse_init_out *arg = &req->misc.init_out;
+       struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
+       struct fuse_init_out *arg = &ia->out;
 
-       if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
+       if (error || arg->major != FUSE_KERNEL_VERSION)
                fc->conn_error = 1;
        else {
                unsigned long ra_pages;
@@ -951,18 +964,23 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                fc->max_write = max_t(unsigned, 4096, fc->max_write);
                fc->conn_init = 1;
        }
+       kfree(ia);
+
        fuse_set_initialized(fc);
        wake_up_all(&fc->blocked_waitq);
 }
 
-static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
+void fuse_send_init(struct fuse_conn *fc)
 {
-       struct fuse_init_in *arg = &req->misc.init_in;
+       struct fuse_init_args *ia;
+
+       ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);
 
-       arg->major = FUSE_KERNEL_VERSION;
-       arg->minor = FUSE_KERNEL_MINOR_VERSION;
-       arg->max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE;
-       arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
+       ia->in.major = FUSE_KERNEL_VERSION;
+       ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
+       ia->in.max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE;
+       ia->in.flags |=
+               FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
                FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
                FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
                FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
@@ -971,26 +989,32 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
                FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
                FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
                FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
-       req->in.h.opcode = FUSE_INIT;
-       req->in.numargs = 1;
-       req->in.args[0].size = sizeof(*arg);
-       req->in.args[0].value = arg;
-       req->out.numargs = 1;
+       ia->args.opcode = FUSE_INIT;
+       ia->args.in_numargs = 1;
+       ia->args.in_args[0].size = sizeof(ia->in);
+       ia->args.in_args[0].value = &ia->in;
+       ia->args.out_numargs = 1;
        /* Variable length argument used for backward compatibility
           with interface version < 7.5.  Rest of init_out is zeroed
           by do_get_request(), so a short reply is not a problem */
-       req->out.argvar = 1;
-       req->out.args[0].size = sizeof(struct fuse_init_out);
-       req->out.args[0].value = &req->misc.init_out;
-       req->end = process_init_reply;
-       fuse_request_send_background(fc, req);
+       ia->args.out_argvar = 1;
+       ia->args.out_args[0].size = sizeof(ia->out);
+       ia->args.out_args[0].value = &ia->out;
+       ia->args.force = true;
+       ia->args.nocreds = true;
+       ia->args.end = process_init_reply;
+
+       if (fuse_simple_background(fc, &ia->args, GFP_KERNEL) != 0)
+               process_init_reply(fc, &ia->args, -ENOTCONN);
 }
+EXPORT_SYMBOL_GPL(fuse_send_init);
 
-static void fuse_free_conn(struct fuse_conn *fc)
+void fuse_free_conn(struct fuse_conn *fc)
 {
        WARN_ON(!list_empty(&fc->devices));
        kfree_rcu(fc, rcu);
 }
+EXPORT_SYMBOL_GPL(fuse_free_conn);
 
 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 {
@@ -1032,7 +1056,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
        return 0;
 }
 
-struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc)
+struct fuse_dev *fuse_dev_alloc(void)
 {
        struct fuse_dev *fud;
        struct list_head *pq;
@@ -1048,16 +1072,33 @@ struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc)
        }
 
        fud->pq.processing = pq;
-       fud->fc = fuse_conn_get(fc);
        fuse_pqueue_init(&fud->pq);
 
+       return fud;
+}
+EXPORT_SYMBOL_GPL(fuse_dev_alloc);
+
+void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc)
+{
+       fud->fc = fuse_conn_get(fc);
        spin_lock(&fc->lock);
        list_add_tail(&fud->entry, &fc->devices);
        spin_unlock(&fc->lock);
+}
+EXPORT_SYMBOL_GPL(fuse_dev_install);
+
+struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
+{
+       struct fuse_dev *fud;
 
+       fud = fuse_dev_alloc();
+       if (!fud)
+               return NULL;
+
+       fuse_dev_install(fud, fc);
        return fud;
 }
-EXPORT_SYMBOL_GPL(fuse_dev_alloc);
+EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
 
 void fuse_dev_free(struct fuse_dev *fud)
 {
@@ -1075,17 +1116,13 @@ void fuse_dev_free(struct fuse_dev *fud)
 }
 EXPORT_SYMBOL_GPL(fuse_dev_free);
 
-static int fuse_fill_super(struct super_block *sb, void *data, int silent)
+int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
 {
        struct fuse_dev *fud;
-       struct fuse_conn *fc;
+       struct fuse_conn *fc = get_fuse_conn_super(sb);
        struct inode *root;
-       struct fuse_mount_data d;
-       struct file *file;
        struct dentry *root_dentry;
-       struct fuse_req *init_req;
        int err;
-       int is_bdev = sb->s_bdev != NULL;
 
        err = -EINVAL;
        if (sb->s_flags & SB_MANDLOCK)
@@ -1093,19 +1130,19 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
        sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
 
-       if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
-               goto err;
-
-       if (is_bdev) {
+       if (ctx->is_bdev) {
 #ifdef CONFIG_BLOCK
                err = -EINVAL;
-               if (!sb_set_blocksize(sb, d.blksize))
+               if (!sb_set_blocksize(sb, ctx->blksize))
                        goto err;
 #endif
        } else {
                sb->s_blocksize = PAGE_SIZE;
                sb->s_blocksize_bits = PAGE_SHIFT;
        }
+
+       sb->s_subtype = ctx->subtype;
+       ctx->subtype = NULL;
        sb->s_magic = FUSE_SUPER_MAGIC;
        sb->s_op = &fuse_super_operations;
        sb->s_xattr = fuse_xattr_handlers;
@@ -1116,19 +1153,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        if (sb->s_user_ns != &init_user_ns)
                sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
 
-       file = fget(d.fd);
-       err = -EINVAL;
-       if (!file)
-               goto err;
-
-       /*
-        * Require mount to happen from the same user namespace which
-        * opened /dev/fuse to prevent potential attacks.
-        */
-       if (file->f_op != &fuse_dev_operations ||
-           file->f_cred->user_ns != sb->s_user_ns)
-               goto err_fput;
-
        /*
         * If we are not in the initial user namespace posix
         * acls must be translated.
@@ -1136,17 +1160,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        if (sb->s_user_ns != &init_user_ns)
                sb->s_xattr = fuse_no_acl_xattr_handlers;
 
-       fc = kmalloc(sizeof(*fc), GFP_KERNEL);
-       err = -ENOMEM;
-       if (!fc)
-               goto err_fput;
-
-       fuse_conn_init(fc, sb->s_user_ns);
-       fc->release = fuse_free_conn;
-
-       fud = fuse_dev_alloc(fc);
+       fud = fuse_dev_alloc_install(fc);
        if (!fud)
-               goto err_put_conn;
+               goto err;
 
        fc->dev = sb->s_dev;
        fc->sb = sb;
@@ -1159,17 +1175,18 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
                fc->dont_mask = 1;
        sb->s_flags |= SB_POSIXACL;
 
-       fc->default_permissions = d.default_permissions;
-       fc->allow_other = d.allow_other;
-       fc->user_id = d.user_id;
-       fc->group_id = d.group_id;
-       fc->max_read = max_t(unsigned, 4096, d.max_read);
-
-       /* Used by get_root_inode() */
-       sb->s_fs_info = fc;
+       fc->default_permissions = ctx->default_permissions;
+       fc->allow_other = ctx->allow_other;
+       fc->user_id = ctx->user_id;
+       fc->group_id = ctx->group_id;
+       fc->max_read = max_t(unsigned, 4096, ctx->max_read);
+       fc->destroy = ctx->destroy;
+       fc->no_control = ctx->no_control;
+       fc->no_force_umount = ctx->no_force_umount;
+       fc->no_mount_options = ctx->no_mount_options;
 
        err = -ENOMEM;
-       root = fuse_get_root_inode(sb, d.rootmode);
+       root = fuse_get_root_inode(sb, ctx->rootmode);
        sb->s_d_op = &fuse_root_dentry_operations;
        root_dentry = d_make_root(root);
        if (!root_dentry)
@@ -1177,20 +1194,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        /* Root dentry doesn't have .d_revalidate */
        sb->s_d_op = &fuse_dentry_operations;
 
-       init_req = fuse_request_alloc(0);
-       if (!init_req)
-               goto err_put_root;
-       __set_bit(FR_BACKGROUND, &init_req->flags);
-
-       if (is_bdev) {
-               fc->destroy_req = fuse_request_alloc(0);
-               if (!fc->destroy_req)
-                       goto err_free_init_req;
-       }
-
        mutex_lock(&fuse_mutex);
        err = -EINVAL;
-       if (file->private_data)
+       if (*ctx->fudptr)
                goto err_unlock;
 
        err = fuse_ctl_add_conn(fc);
@@ -1199,27 +1205,62 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
        list_add_tail(&fc->entry, &fuse_conn_list);
        sb->s_root = root_dentry;
-       file->private_data = fud;
+       *ctx->fudptr = fud;
        mutex_unlock(&fuse_mutex);
+       return 0;
+
+ err_unlock:
+       mutex_unlock(&fuse_mutex);
+       dput(root_dentry);
+ err_dev_free:
+       fuse_dev_free(fud);
+ err:
+       return err;
+}
+EXPORT_SYMBOL_GPL(fuse_fill_super_common);
+
+static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
+{
+       struct fuse_fs_context *ctx = fsc->fs_private;
+       struct file *file;
+       int err;
+       struct fuse_conn *fc;
+
+       err = -EINVAL;
+       file = fget(ctx->fd);
+       if (!file)
+               goto err;
+
+       /*
+        * Require mount to happen from the same user namespace which
+        * opened /dev/fuse to prevent potential attacks.
+        */
+       if ((file->f_op != &fuse_dev_operations) ||
+           (file->f_cred->user_ns != sb->s_user_ns))
+               goto err_fput;
+       ctx->fudptr = &file->private_data;
+
+       fc = kmalloc(sizeof(*fc), GFP_KERNEL);
+       err = -ENOMEM;
+       if (!fc)
+               goto err_fput;
+
+       fuse_conn_init(fc, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
+       fc->release = fuse_free_conn;
+       sb->s_fs_info = fc;
+
+       err = fuse_fill_super_common(sb, ctx);
+       if (err)
+               goto err_put_conn;
        /*
         * atomic_dec_and_test() in fput() provides the necessary
         * memory barrier for file->private_data to be visible on all
         * CPUs after this
         */
        fput(file);
-
-       fuse_send_init(fc, init_req);
-
+       fuse_send_init(get_fuse_conn_super(sb));
        return 0;
 
- err_unlock:
-       mutex_unlock(&fuse_mutex);
- err_free_init_req:
-       fuse_request_free(init_req);
- err_put_root:
-       dput(root_dentry);
- err_dev_free:
-       fuse_dev_free(fud);
  err_put_conn:
        fuse_conn_put(fc);
        sb->s_fs_info = NULL;
@@ -1229,11 +1270,52 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        return err;
 }
 
-static struct dentry *fuse_mount(struct file_system_type *fs_type,
-                      int flags, const char *dev_name,
-                      void *raw_data)
+static int fuse_get_tree(struct fs_context *fc)
 {
-       return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
+       struct fuse_fs_context *ctx = fc->fs_private;
+
+       if (!ctx->fd_present || !ctx->rootmode_present ||
+           !ctx->user_id_present || !ctx->group_id_present)
+               return -EINVAL;
+
+#ifdef CONFIG_BLOCK
+       if (ctx->is_bdev)
+               return get_tree_bdev(fc, fuse_fill_super);
+#endif
+
+       return get_tree_nodev(fc, fuse_fill_super);
+}
+
+static const struct fs_context_operations fuse_context_ops = {
+       .free           = fuse_free_fc,
+       .parse_param    = fuse_parse_param,
+       .get_tree       = fuse_get_tree,
+};
+
+/*
+ * Set up the filesystem mount context.
+ */
+static int fuse_init_fs_context(struct fs_context *fc)
+{
+       struct fuse_fs_context *ctx;
+
+       ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
+
+       ctx->max_read = ~0;
+       ctx->blksize = FUSE_DEFAULT_BLKSIZE;
+
+#ifdef CONFIG_BLOCK
+       if (fc->fs_type == &fuseblk_fs_type) {
+               ctx->is_bdev = true;
+               ctx->destroy = true;
+       }
+#endif
+
+       fc->fs_private = ctx;
+       fc->ops = &fuse_context_ops;
+       return 0;
 }
 
 static void fuse_sb_destroy(struct super_block *sb)
@@ -1241,7 +1323,8 @@ static void fuse_sb_destroy(struct super_block *sb)
        struct fuse_conn *fc = get_fuse_conn_super(sb);
 
        if (fc) {
-               fuse_send_destroy(fc);
+               if (fc->destroy)
+                       fuse_send_destroy(fc);
 
                fuse_abort_conn(fc);
                fuse_wait_aborted(fc);
@@ -1252,29 +1335,24 @@ static void fuse_sb_destroy(struct super_block *sb)
        }
 }
 
-static void fuse_kill_sb_anon(struct super_block *sb)
+void fuse_kill_sb_anon(struct super_block *sb)
 {
        fuse_sb_destroy(sb);
        kill_anon_super(sb);
 }
+EXPORT_SYMBOL_GPL(fuse_kill_sb_anon);
 
 static struct file_system_type fuse_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "fuse",
        .fs_flags       = FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
-       .mount          = fuse_mount,
+       .init_fs_context = fuse_init_fs_context,
+       .parameters     = &fuse_fs_parameters,
        .kill_sb        = fuse_kill_sb_anon,
 };
 MODULE_ALIAS_FS("fuse");
 
 #ifdef CONFIG_BLOCK
-static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
-                          int flags, const char *dev_name,
-                          void *raw_data)
-{
-       return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
-}
-
 static void fuse_kill_sb_blk(struct super_block *sb)
 {
        fuse_sb_destroy(sb);
@@ -1284,7 +1362,8 @@ static void fuse_kill_sb_blk(struct super_block *sb)
 static struct file_system_type fuseblk_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "fuseblk",
-       .mount          = fuse_mount_blk,
+       .init_fs_context = fuse_init_fs_context,
+       .parameters     = &fuse_fs_parameters,
        .kill_sb        = fuse_kill_sb_blk,
        .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
 };
index 574d03f..5c38b9d 100644 (file)
@@ -249,6 +249,27 @@ retry:
        return 0;
 }
 
+static void fuse_force_forget(struct file *file, u64 nodeid)
+{
+       struct inode *inode = file_inode(file);
+       struct fuse_conn *fc = get_fuse_conn(inode);
+       struct fuse_forget_in inarg;
+       FUSE_ARGS(args);
+
+       memset(&inarg, 0, sizeof(inarg));
+       inarg.nlookup = 1;
+       args.opcode = FUSE_FORGET;
+       args.nodeid = nodeid;
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.force = true;
+       args.noreply = true;
+
+       fuse_simple_request(fc, &args);
+       /* ignore errors */
+}
+
 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
                             struct dir_context *ctx, u64 attr_version)
 {
@@ -295,62 +316,55 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
 
 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
 {
-       int plus, err;
-       size_t nbytes;
+       int plus;
+       ssize_t res;
        struct page *page;
        struct inode *inode = file_inode(file);
        struct fuse_conn *fc = get_fuse_conn(inode);
-       struct fuse_req *req;
+       struct fuse_io_args ia = {};
+       struct fuse_args_pages *ap = &ia.ap;
+       struct fuse_page_desc desc = { .length = PAGE_SIZE };
        u64 attr_version = 0;
        bool locked;
 
-       req = fuse_get_req(fc, 1);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
-
        page = alloc_page(GFP_KERNEL);
-       if (!page) {
-               fuse_put_request(fc, req);
+       if (!page)
                return -ENOMEM;
-       }
 
        plus = fuse_use_readdirplus(inode, ctx);
-       req->out.argpages = 1;
-       req->num_pages = 1;
-       req->pages[0] = page;
-       req->page_descs[0].length = PAGE_SIZE;
+       ap->args.out_pages = 1;
+       ap->num_pages = 1;
+       ap->pages = &page;
+       ap->descs = &desc;
        if (plus) {
                attr_version = fuse_get_attr_version(fc);
-               fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
-                              FUSE_READDIRPLUS);
+               fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
+                                   FUSE_READDIRPLUS);
        } else {
-               fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
-                              FUSE_READDIR);
+               fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
+                                   FUSE_READDIR);
        }
        locked = fuse_lock_inode(inode);
-       fuse_request_send(fc, req);
+       res = fuse_simple_request(fc, &ap->args);
        fuse_unlock_inode(inode, locked);
-       nbytes = req->out.args[0].size;
-       err = req->out.h.error;
-       fuse_put_request(fc, req);
-       if (!err) {
-               if (!nbytes) {
+       if (res >= 0) {
+               if (!res) {
                        struct fuse_file *ff = file->private_data;
 
                        if (ff->open_flags & FOPEN_CACHE_DIR)
                                fuse_readdir_cache_end(file, ctx->pos);
                } else if (plus) {
-                       err = parse_dirplusfile(page_address(page), nbytes,
+                       res = parse_dirplusfile(page_address(page), res,
                                                file, ctx, attr_version);
                } else {
-                       err = parse_dirfile(page_address(page), nbytes, file,
+                       res = parse_dirfile(page_address(page), res, file,
                                            ctx);
                }
        }
 
        __free_page(page);
        fuse_invalidate_atime(inode);
-       return err;
+       return res;
 }
 
 enum fuse_parse_result {
@@ -372,11 +386,13 @@ static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
        for (;;) {
                struct fuse_dirent *dirent = addr + offset;
                unsigned int nbytes = size - offset;
-               size_t reclen = FUSE_DIRENT_SIZE(dirent);
+               size_t reclen;
 
                if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
                        break;
 
+               reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
+
                if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
                        return FOUND_ERR;
                if (WARN_ON(reclen > nbytes))
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
new file mode 100644 (file)
index 0000000..a5c8604
--- /dev/null
@@ -0,0 +1,1252 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * virtio-fs: Virtio Filesystem
+ * Copyright (C) 2018 Red Hat, Inc.
+ */
+
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/virtio.h>
+#include <linux/virtio_fs.h>
+#include <linux/delay.h>
+#include <linux/fs_context.h>
+#include <linux/highmem.h>
+#include "fuse_i.h"
+
+/* List of virtio-fs device instances and a lock for the list. Also provides
+ * mutual exclusion in device removal and mounting path
+ */
+static DEFINE_MUTEX(virtio_fs_mutex);
+static LIST_HEAD(virtio_fs_instances);
+
+enum {
+       VQ_HIPRIO,
+       VQ_REQUEST
+};
+
+/* Per-virtqueue state */
+struct virtio_fs_vq {
+       spinlock_t lock;
+       struct virtqueue *vq;     /* protected by ->lock */
+       struct work_struct done_work;
+       struct list_head queued_reqs;
+       struct list_head end_reqs;      /* End these requests */
+       struct delayed_work dispatch_work;
+       struct fuse_dev *fud;
+       bool connected;
+       long in_flight;
+       char name[24];
+} ____cacheline_aligned_in_smp;
+
+/* A virtio-fs device instance */
+struct virtio_fs {
+       struct kref refcount;
+       struct list_head list;    /* on virtio_fs_instances */
+       char *tag;
+       struct virtio_fs_vq *vqs;
+       unsigned int nvqs;               /* number of virtqueues */
+       unsigned int num_request_queues; /* number of request queues */
+};
+
+struct virtio_fs_forget {
+       struct fuse_in_header ih;
+       struct fuse_forget_in arg;
+       /* This request can be temporarily queued on virt queue */
+       struct list_head list;
+};
+
+static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
+                                struct fuse_req *req, bool in_flight);
+
+static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
+{
+       struct virtio_fs *fs = vq->vdev->priv;
+
+       return &fs->vqs[vq->index];
+}
+
+static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
+{
+       return &vq_to_fsvq(vq)->fud->pq;
+}
+
+/* Should be called with fsvq->lock held. */
+static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
+{
+       fsvq->in_flight++;
+}
+
+/* Should be called with fsvq->lock held. */
+static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
+{
+       WARN_ON(fsvq->in_flight <= 0);
+       fsvq->in_flight--;
+}
+
+static void release_virtio_fs_obj(struct kref *ref)
+{
+       struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
+
+       kfree(vfs->vqs);
+       kfree(vfs);
+}
+
+/* Make sure virtiofs_mutex is held */
+static void virtio_fs_put(struct virtio_fs *fs)
+{
+       kref_put(&fs->refcount, release_virtio_fs_obj);
+}
+
+static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
+{
+       struct virtio_fs *vfs = fiq->priv;
+
+       mutex_lock(&virtio_fs_mutex);
+       virtio_fs_put(vfs);
+       mutex_unlock(&virtio_fs_mutex);
+}
+
+static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
+{
+       WARN_ON(fsvq->in_flight < 0);
+
+       /* Wait for in flight requests to finish.*/
+       while (1) {
+               spin_lock(&fsvq->lock);
+               if (!fsvq->in_flight) {
+                       spin_unlock(&fsvq->lock);
+                       break;
+               }
+               spin_unlock(&fsvq->lock);
+               /* TODO use completion instead of timeout */
+               usleep_range(1000, 2000);
+       }
+
+       flush_work(&fsvq->done_work);
+       flush_delayed_work(&fsvq->dispatch_work);
+}
+
+static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
+{
+       struct virtio_fs_vq *fsvq;
+       int i;
+
+       for (i = 0; i < fs->nvqs; i++) {
+               fsvq = &fs->vqs[i];
+               virtio_fs_drain_queue(fsvq);
+       }
+}
+
+static void virtio_fs_start_all_queues(struct virtio_fs *fs)
+{
+       struct virtio_fs_vq *fsvq;
+       int i;
+
+       for (i = 0; i < fs->nvqs; i++) {
+               fsvq = &fs->vqs[i];
+               spin_lock(&fsvq->lock);
+               fsvq->connected = true;
+               spin_unlock(&fsvq->lock);
+       }
+}
+
+/* Add a new instance to the list or return -EEXIST if tag name exists*/
+static int virtio_fs_add_instance(struct virtio_fs *fs)
+{
+       struct virtio_fs *fs2;
+       bool duplicate = false;
+
+       mutex_lock(&virtio_fs_mutex);
+
+       list_for_each_entry(fs2, &virtio_fs_instances, list) {
+               if (strcmp(fs->tag, fs2->tag) == 0)
+                       duplicate = true;
+       }
+
+       if (!duplicate)
+               list_add_tail(&fs->list, &virtio_fs_instances);
+
+       mutex_unlock(&virtio_fs_mutex);
+
+       if (duplicate)
+               return -EEXIST;
+       return 0;
+}
+
+/* Return the virtio_fs with a given tag, or NULL */
+static struct virtio_fs *virtio_fs_find_instance(const char *tag)
+{
+       struct virtio_fs *fs;
+
+       mutex_lock(&virtio_fs_mutex);
+
+       list_for_each_entry(fs, &virtio_fs_instances, list) {
+               if (strcmp(fs->tag, tag) == 0) {
+                       kref_get(&fs->refcount);
+                       goto found;
+               }
+       }
+
+       fs = NULL; /* not found */
+
+found:
+       mutex_unlock(&virtio_fs_mutex);
+
+       return fs;
+}
+
+static void virtio_fs_free_devs(struct virtio_fs *fs)
+{
+       unsigned int i;
+
+       for (i = 0; i < fs->nvqs; i++) {
+               struct virtio_fs_vq *fsvq = &fs->vqs[i];
+
+               if (!fsvq->fud)
+                       continue;
+
+               fuse_dev_free(fsvq->fud);
+               fsvq->fud = NULL;
+       }
+}
+
+/* Read filesystem name from virtio config into fs->tag (must kfree()). */
+static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
+{
+       char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
+       char *end;
+       size_t len;
+
+       virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
+                          &tag_buf, sizeof(tag_buf));
+       end = memchr(tag_buf, '\0', sizeof(tag_buf));
+       if (end == tag_buf)
+               return -EINVAL; /* empty tag */
+       if (!end)
+               end = &tag_buf[sizeof(tag_buf)];
+
+       len = end - tag_buf;
+       fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
+       if (!fs->tag)
+               return -ENOMEM;
+       memcpy(fs->tag, tag_buf, len);
+       fs->tag[len] = '\0';
+       return 0;
+}
+
+/* Work function for hiprio completion */
+static void virtio_fs_hiprio_done_work(struct work_struct *work)
+{
+       struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+                                                done_work);
+       struct virtqueue *vq = fsvq->vq;
+
+       /* Free completed FUSE_FORGET requests */
+       spin_lock(&fsvq->lock);
+       do {
+               unsigned int len;
+               void *req;
+
+               virtqueue_disable_cb(vq);
+
+               while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
+                       kfree(req);
+                       dec_in_flight_req(fsvq);
+               }
+       } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
+       spin_unlock(&fsvq->lock);
+}
+
+static void virtio_fs_request_dispatch_work(struct work_struct *work)
+{
+       struct fuse_req *req;
+       struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+                                                dispatch_work.work);
+       struct fuse_conn *fc = fsvq->fud->fc;
+       int ret;
+
+       pr_debug("virtio-fs: worker %s called.\n", __func__);
+       while (1) {
+               spin_lock(&fsvq->lock);
+               req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
+                                              list);
+               if (!req) {
+                       spin_unlock(&fsvq->lock);
+                       break;
+               }
+
+               list_del_init(&req->list);
+               spin_unlock(&fsvq->lock);
+               fuse_request_end(fc, req);
+       }
+
+       /* Dispatch pending requests */
+       while (1) {
+               spin_lock(&fsvq->lock);
+               req = list_first_entry_or_null(&fsvq->queued_reqs,
+                                              struct fuse_req, list);
+               if (!req) {
+                       spin_unlock(&fsvq->lock);
+                       return;
+               }
+               list_del_init(&req->list);
+               spin_unlock(&fsvq->lock);
+
+               ret = virtio_fs_enqueue_req(fsvq, req, true);
+               if (ret < 0) {
+                       if (ret == -ENOMEM || ret == -ENOSPC) {
+                               spin_lock(&fsvq->lock);
+                               list_add_tail(&req->list, &fsvq->queued_reqs);
+                               schedule_delayed_work(&fsvq->dispatch_work,
+                                                     msecs_to_jiffies(1));
+                               spin_unlock(&fsvq->lock);
+                               return;
+                       }
+                       req->out.h.error = ret;
+                       spin_lock(&fsvq->lock);
+                       dec_in_flight_req(fsvq);
+                       spin_unlock(&fsvq->lock);
+                       pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
+                              ret);
+                       fuse_request_end(fc, req);
+               }
+       }
+}
+
+static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
+{
+       struct virtio_fs_forget *forget;
+       struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+                                                dispatch_work.work);
+       struct virtqueue *vq = fsvq->vq;
+       struct scatterlist sg;
+       struct scatterlist *sgs[] = {&sg};
+       bool notify;
+       int ret;
+
+       pr_debug("virtio-fs: worker %s called.\n", __func__);
+       while (1) {
+               spin_lock(&fsvq->lock);
+               forget = list_first_entry_or_null(&fsvq->queued_reqs,
+                                       struct virtio_fs_forget, list);
+               if (!forget) {
+                       spin_unlock(&fsvq->lock);
+                       return;
+               }
+
+               list_del(&forget->list);
+               if (!fsvq->connected) {
+                       dec_in_flight_req(fsvq);
+                       spin_unlock(&fsvq->lock);
+                       kfree(forget);
+                       continue;
+               }
+
+               sg_init_one(&sg, forget, sizeof(*forget));
+
+               /* Enqueue the request */
+               dev_dbg(&vq->vdev->dev, "%s\n", __func__);
+               ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
+               if (ret < 0) {
+                       if (ret == -ENOMEM || ret == -ENOSPC) {
+                               pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
+                                        ret);
+                               list_add_tail(&forget->list,
+                                               &fsvq->queued_reqs);
+                               schedule_delayed_work(&fsvq->dispatch_work,
+                                               msecs_to_jiffies(1));
+                       } else {
+                               pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
+                                        ret);
+                               dec_in_flight_req(fsvq);
+                               kfree(forget);
+                       }
+                       spin_unlock(&fsvq->lock);
+                       return;
+               }
+
+               notify = virtqueue_kick_prepare(vq);
+               spin_unlock(&fsvq->lock);
+
+               if (notify)
+                       virtqueue_notify(vq);
+               pr_debug("virtio-fs: worker %s dispatched one forget request.\n",
+                        __func__);
+       }
+}
+
+/* Allocate and copy args into req->argbuf */
+static int copy_args_to_argbuf(struct fuse_req *req)
+{
+       struct fuse_args *args = req->args;
+       unsigned int offset = 0;
+       unsigned int num_in;
+       unsigned int num_out;
+       unsigned int len;
+       unsigned int i;
+
+       num_in = args->in_numargs - args->in_pages;
+       num_out = args->out_numargs - args->out_pages;
+       len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
+             fuse_len_args(num_out, args->out_args);
+
+       req->argbuf = kmalloc(len, GFP_ATOMIC);
+       if (!req->argbuf)
+               return -ENOMEM;
+
+       for (i = 0; i < num_in; i++) {
+               memcpy(req->argbuf + offset,
+                      args->in_args[i].value,
+                      args->in_args[i].size);
+               offset += args->in_args[i].size;
+       }
+
+       return 0;
+}
+
+/* Copy args out of and free req->argbuf */
+static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
+{
+       unsigned int remaining;
+       unsigned int offset;
+       unsigned int num_in;
+       unsigned int num_out;
+       unsigned int i;
+
+       remaining = req->out.h.len - sizeof(req->out.h);
+       num_in = args->in_numargs - args->in_pages;
+       num_out = args->out_numargs - args->out_pages;
+       offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
+
+       for (i = 0; i < num_out; i++) {
+               unsigned int argsize = args->out_args[i].size;
+
+               if (args->out_argvar &&
+                   i == args->out_numargs - 1 &&
+                   argsize > remaining) {
+                       argsize = remaining;
+               }
+
+               memcpy(args->out_args[i].value, req->argbuf + offset, argsize);
+               offset += argsize;
+
+               if (i != args->out_numargs - 1)
+                       remaining -= argsize;
+       }
+
+       /* Store the actual size of the variable-length arg */
+       if (args->out_argvar)
+               args->out_args[args->out_numargs - 1].size = remaining;
+
+       kfree(req->argbuf);
+       req->argbuf = NULL;
+}
+
+/* Work function for request completion */
+static void virtio_fs_requests_done_work(struct work_struct *work)
+{
+       struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+                                                done_work);
+       struct fuse_pqueue *fpq = &fsvq->fud->pq;
+       struct fuse_conn *fc = fsvq->fud->fc;
+       struct virtqueue *vq = fsvq->vq;
+       struct fuse_req *req;
+       struct fuse_args_pages *ap;
+       struct fuse_req *next;
+       struct fuse_args *args;
+       unsigned int len, i, thislen;
+       struct page *page;
+       LIST_HEAD(reqs);
+
+       /* Collect completed requests off the virtqueue */
+       spin_lock(&fsvq->lock);
+       do {
+               virtqueue_disable_cb(vq);
+
+               while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
+                       spin_lock(&fpq->lock);
+                       list_move_tail(&req->list, &reqs);
+                       spin_unlock(&fpq->lock);
+               }
+       } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
+       spin_unlock(&fsvq->lock);
+
+       /* End requests */
+       list_for_each_entry_safe(req, next, &reqs, list) {
+               /*
+                * TODO verify that server properly follows FUSE protocol
+                * (oh.uniq, oh.len)
+                */
+               args = req->args;
+               copy_args_from_argbuf(args, req);
+
+               if (args->out_pages && args->page_zeroing) {
+                       len = args->out_args[args->out_numargs - 1].size;
+                       ap = container_of(args, typeof(*ap), args);
+                       for (i = 0; i < ap->num_pages; i++) {
+                               thislen = ap->descs[i].length;
+                               if (len < thislen) {
+                                       WARN_ON(ap->descs[i].offset);
+                                       page = ap->pages[i];
+                                       zero_user_segment(page, len, thislen);
+                                       len = 0;
+                               } else {
+                                       len -= thislen;
+                               }
+                       }
+               }
+
+               spin_lock(&fpq->lock);
+               clear_bit(FR_SENT, &req->flags);
+               list_del_init(&req->list);
+               spin_unlock(&fpq->lock);
+
+               fuse_request_end(fc, req);
+               spin_lock(&fsvq->lock);
+               dec_in_flight_req(fsvq);
+               spin_unlock(&fsvq->lock);
+       }
+}
+
+/* Virtqueue interrupt handler */
+static void virtio_fs_vq_done(struct virtqueue *vq)
+{
+       struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
+
+       dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
+
+       schedule_work(&fsvq->done_work);
+}
+
+/* Initialize virtqueues */
+static int virtio_fs_setup_vqs(struct virtio_device *vdev,
+                              struct virtio_fs *fs)
+{
+       struct virtqueue **vqs;
+       vq_callback_t **callbacks;
+       const char **names;
+       unsigned int i;
+       int ret = 0;
+
+       virtio_cread(vdev, struct virtio_fs_config, num_request_queues,
+                    &fs->num_request_queues);
+       if (fs->num_request_queues == 0)
+               return -EINVAL;
+
+       fs->nvqs = 1 + fs->num_request_queues;
+       fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
+       if (!fs->vqs)
+               return -ENOMEM;
+
+       vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL);
+       callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]),
+                                       GFP_KERNEL);
+       names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL);
+       if (!vqs || !callbacks || !names) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
+       snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name),
+                       "hiprio");
+       names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
+       INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
+       INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
+       INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
+       INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
+                       virtio_fs_hiprio_dispatch_work);
+       spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
+
+       /* Initialize the requests virtqueues */
+       for (i = VQ_REQUEST; i < fs->nvqs; i++) {
+               spin_lock_init(&fs->vqs[i].lock);
+               INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
+               INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
+                                 virtio_fs_request_dispatch_work);
+               INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
+               INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
+               snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
+                        "requests.%u", i - VQ_REQUEST);
+               callbacks[i] = virtio_fs_vq_done;
+               names[i] = fs->vqs[i].name;
+       }
+
+       ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL);
+       if (ret < 0)
+               goto out;
+
+       for (i = 0; i < fs->nvqs; i++)
+               fs->vqs[i].vq = vqs[i];
+
+       virtio_fs_start_all_queues(fs);
+out:
+       kfree(names);
+       kfree(callbacks);
+       kfree(vqs);
+       if (ret)
+               kfree(fs->vqs);
+       return ret;
+}
+
+/* Free virtqueues (device must already be reset) */
+static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
+                                 struct virtio_fs *fs)
+{
+       vdev->config->del_vqs(vdev);
+}
+
+static int virtio_fs_probe(struct virtio_device *vdev)
+{
+       struct virtio_fs *fs;
+       int ret;
+
+       fs = kzalloc(sizeof(*fs), GFP_KERNEL);
+       if (!fs)
+               return -ENOMEM;
+       kref_init(&fs->refcount);
+       vdev->priv = fs;
+
+       ret = virtio_fs_read_tag(vdev, fs);
+       if (ret < 0)
+               goto out;
+
+       ret = virtio_fs_setup_vqs(vdev, fs);
+       if (ret < 0)
+               goto out;
+
+       /* TODO vq affinity */
+
+       /* Bring the device online in case the filesystem is mounted and
+        * requests need to be sent before we return.
+        */
+       virtio_device_ready(vdev);
+
+       ret = virtio_fs_add_instance(fs);
+       if (ret < 0)
+               goto out_vqs;
+
+       return 0;
+
+out_vqs:
+       vdev->config->reset(vdev);
+       virtio_fs_cleanup_vqs(vdev, fs);
+
+out:
+       vdev->priv = NULL;
+       kfree(fs);
+       return ret;
+}
+
+static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
+{
+       struct virtio_fs_vq *fsvq;
+       int i;
+
+       for (i = 0; i < fs->nvqs; i++) {
+               fsvq = &fs->vqs[i];
+               spin_lock(&fsvq->lock);
+               fsvq->connected = false;
+               spin_unlock(&fsvq->lock);
+       }
+}
+
+static void virtio_fs_remove(struct virtio_device *vdev)
+{
+       struct virtio_fs *fs = vdev->priv;
+
+       mutex_lock(&virtio_fs_mutex);
+       /* This device is going away. No one should get new reference */
+       list_del_init(&fs->list);
+       virtio_fs_stop_all_queues(fs);
+       virtio_fs_drain_all_queues(fs);
+       vdev->config->reset(vdev);
+       virtio_fs_cleanup_vqs(vdev, fs);
+
+       vdev->priv = NULL;
+       /* Put device reference on virtio_fs object */
+       virtio_fs_put(fs);
+       mutex_unlock(&virtio_fs_mutex);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int virtio_fs_freeze(struct virtio_device *vdev)
+{
+       /* TODO need to save state here */
+       pr_warn("virtio-fs: suspend/resume not yet supported\n");
+       return -EOPNOTSUPP;
+}
+
+static int virtio_fs_restore(struct virtio_device *vdev)
+{
+        /* TODO need to restore state here */
+       return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+const static struct virtio_device_id id_table[] = {
+       { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
+       {},
+};
+
+const static unsigned int feature_table[] = {};
+
+static struct virtio_driver virtio_fs_driver = {
+       .driver.name            = KBUILD_MODNAME,
+       .driver.owner           = THIS_MODULE,
+       .id_table               = id_table,
+       .feature_table          = feature_table,
+       .feature_table_size     = ARRAY_SIZE(feature_table),
+       .probe                  = virtio_fs_probe,
+       .remove                 = virtio_fs_remove,
+#ifdef CONFIG_PM_SLEEP
+       .freeze                 = virtio_fs_freeze,
+       .restore                = virtio_fs_restore,
+#endif
+};
+
+static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
+__releases(fiq->lock)
+{
+       struct fuse_forget_link *link;
+       struct virtio_fs_forget *forget;
+       struct scatterlist sg;
+       struct scatterlist *sgs[] = {&sg};
+       struct virtio_fs *fs;
+       struct virtqueue *vq;
+       struct virtio_fs_vq *fsvq;
+       bool notify;
+       u64 unique;
+       int ret;
+
+       link = fuse_dequeue_forget(fiq, 1, NULL);
+       unique = fuse_get_unique(fiq);
+
+       fs = fiq->priv;
+       fsvq = &fs->vqs[VQ_HIPRIO];
+       spin_unlock(&fiq->lock);
+
+       /* Allocate a buffer for the request */
+       forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
+
+       forget->ih = (struct fuse_in_header){
+               .opcode = FUSE_FORGET,
+               .nodeid = link->forget_one.nodeid,
+               .unique = unique,
+               .len = sizeof(*forget),
+       };
+       forget->arg = (struct fuse_forget_in){
+               .nlookup = link->forget_one.nlookup,
+       };
+
+       sg_init_one(&sg, forget, sizeof(*forget));
+
+       /* Enqueue the request */
+       spin_lock(&fsvq->lock);
+
+       if (!fsvq->connected) {
+               kfree(forget);
+               spin_unlock(&fsvq->lock);
+               goto out;
+       }
+
+       vq = fsvq->vq;
+       dev_dbg(&vq->vdev->dev, "%s\n", __func__);
+
+       ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
+       if (ret < 0) {
+               if (ret == -ENOMEM || ret == -ENOSPC) {
+                       pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later.\n",
+                                ret);
+                       list_add_tail(&forget->list, &fsvq->queued_reqs);
+                       schedule_delayed_work(&fsvq->dispatch_work,
+                                       msecs_to_jiffies(1));
+                       inc_in_flight_req(fsvq);
+               } else {
+                       pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
+                                ret);
+                       kfree(forget);
+               }
+               spin_unlock(&fsvq->lock);
+               goto out;
+       }
+
+       inc_in_flight_req(fsvq);
+       notify = virtqueue_kick_prepare(vq);
+
+       spin_unlock(&fsvq->lock);
+
+       if (notify)
+               virtqueue_notify(vq);
+out:
+       kfree(link);
+}
+
+static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
+__releases(fiq->lock)
+{
+       /*
+        * TODO interrupts.
+        *
+        * Normal fs operations on a local filesystems aren't interruptible.
+        * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
+        * with shared lock between host and guest.
+        */
+       spin_unlock(&fiq->lock);
+}
+
+/* Return the number of scatter-gather list elements required */
+static unsigned int sg_count_fuse_req(struct fuse_req *req)
+{
+       struct fuse_args *args = req->args;
+       struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
+       unsigned int total_sgs = 1 /* fuse_in_header */;
+
+       if (args->in_numargs - args->in_pages)
+               total_sgs += 1;
+
+       if (args->in_pages)
+               total_sgs += ap->num_pages;
+
+       if (!test_bit(FR_ISREPLY, &req->flags))
+               return total_sgs;
+
+       total_sgs += 1 /* fuse_out_header */;
+
+       if (args->out_numargs - args->out_pages)
+               total_sgs += 1;
+
+       if (args->out_pages)
+               total_sgs += ap->num_pages;
+
+       return total_sgs;
+}
+
+/* Add pages to scatter-gather list and return number of elements used */
+static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
+                                      struct page **pages,
+                                      struct fuse_page_desc *page_descs,
+                                      unsigned int num_pages,
+                                      unsigned int total_len)
+{
+       unsigned int i;
+       unsigned int this_len;
+
+       for (i = 0; i < num_pages && total_len; i++) {
+               sg_init_table(&sg[i], 1);
+               this_len =  min(page_descs[i].length, total_len);
+               sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
+               total_len -= this_len;
+       }
+
+       return i;
+}
+
+/* Add args to scatter-gather list and return number of elements used */
+static unsigned int sg_init_fuse_args(struct scatterlist *sg,
+                                     struct fuse_req *req,
+                                     struct fuse_arg *args,
+                                     unsigned int numargs,
+                                     bool argpages,
+                                     void *argbuf,
+                                     unsigned int *len_used)
+{
+       struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
+       unsigned int total_sgs = 0;
+       unsigned int len;
+
+       len = fuse_len_args(numargs - argpages, args);
+       if (len)
+               sg_init_one(&sg[total_sgs++], argbuf, len);
+
+       if (argpages)
+               total_sgs += sg_init_fuse_pages(&sg[total_sgs],
+                                               ap->pages, ap->descs,
+                                               ap->num_pages,
+                                               args[numargs - 1].size);
+
+       if (len_used)
+               *len_used = len;
+
+       return total_sgs;
+}
+
+/* Add a request to a virtqueue and kick the device */
+static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
+                                struct fuse_req *req, bool in_flight)
+{
+       /* requests need at least 4 elements */
+       struct scatterlist *stack_sgs[6];
+       struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
+       struct scatterlist **sgs = stack_sgs;
+       struct scatterlist *sg = stack_sg;
+       struct virtqueue *vq;
+       struct fuse_args *args = req->args;
+       unsigned int argbuf_used = 0;
+       unsigned int out_sgs = 0;
+       unsigned int in_sgs = 0;
+       unsigned int total_sgs;
+       unsigned int i;
+       int ret;
+       bool notify;
+       struct fuse_pqueue *fpq;
+
+       /* Does the sglist fit on the stack? */
+       total_sgs = sg_count_fuse_req(req);
+       if (total_sgs > ARRAY_SIZE(stack_sgs)) {
+               sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
+               sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
+               if (!sgs || !sg) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+       }
+
+       /* Use a bounce buffer since stack args cannot be mapped */
+       ret = copy_args_to_argbuf(req);
+       if (ret < 0)
+               goto out;
+
+       /* Request elements */
+       sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
+       out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
+                                    (struct fuse_arg *)args->in_args,
+                                    args->in_numargs, args->in_pages,
+                                    req->argbuf, &argbuf_used);
+
+       /* Reply elements */
+       if (test_bit(FR_ISREPLY, &req->flags)) {
+               sg_init_one(&sg[out_sgs + in_sgs++],
+                           &req->out.h, sizeof(req->out.h));
+               in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
+                                           args->out_args, args->out_numargs,
+                                           args->out_pages,
+                                           req->argbuf + argbuf_used, NULL);
+       }
+
+       WARN_ON(out_sgs + in_sgs != total_sgs);
+
+       for (i = 0; i < total_sgs; i++)
+               sgs[i] = &sg[i];
+
+       spin_lock(&fsvq->lock);
+
+       if (!fsvq->connected) {
+               spin_unlock(&fsvq->lock);
+               ret = -ENOTCONN;
+               goto out;
+       }
+
+       vq = fsvq->vq;
+       ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
+       if (ret < 0) {
+               spin_unlock(&fsvq->lock);
+               goto out;
+       }
+
+       /* Request successfully sent. */
+       fpq = &fsvq->fud->pq;
+       spin_lock(&fpq->lock);
+       list_add_tail(&req->list, fpq->processing);
+       spin_unlock(&fpq->lock);
+       set_bit(FR_SENT, &req->flags);
+       /* matches barrier in request_wait_answer() */
+       smp_mb__after_atomic();
+
+       if (!in_flight)
+               inc_in_flight_req(fsvq);
+       notify = virtqueue_kick_prepare(vq);
+
+       spin_unlock(&fsvq->lock);
+
+       if (notify)
+               virtqueue_notify(vq);
+
+out:
+       if (ret < 0 && req->argbuf) {
+               kfree(req->argbuf);
+               req->argbuf = NULL;
+       }
+       if (sgs != stack_sgs) {
+               kfree(sgs);
+               kfree(sg);
+       }
+
+       return ret;
+}
+
+static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
+__releases(fiq->lock)
+{
+       unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
+       struct virtio_fs *fs;
+       struct fuse_req *req;
+       struct virtio_fs_vq *fsvq;
+       int ret;
+
+       WARN_ON(list_empty(&fiq->pending));
+       req = list_last_entry(&fiq->pending, struct fuse_req, list);
+       clear_bit(FR_PENDING, &req->flags);
+       list_del_init(&req->list);
+       WARN_ON(!list_empty(&fiq->pending));
+       spin_unlock(&fiq->lock);
+
+       fs = fiq->priv;
+
+       pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
+                 __func__, req->in.h.opcode, req->in.h.unique,
+                req->in.h.nodeid, req->in.h.len,
+                fuse_len_args(req->args->out_numargs, req->args->out_args));
+
+       fsvq = &fs->vqs[queue_id];
+       ret = virtio_fs_enqueue_req(fsvq, req, false);
+       if (ret < 0) {
+               if (ret == -ENOMEM || ret == -ENOSPC) {
+                       /*
+                        * Virtqueue full. Retry submission from worker
+                        * context as we might be holding fc->bg_lock.
+                        */
+                       spin_lock(&fsvq->lock);
+                       list_add_tail(&req->list, &fsvq->queued_reqs);
+                       inc_in_flight_req(fsvq);
+                       schedule_delayed_work(&fsvq->dispatch_work,
+                                               msecs_to_jiffies(1));
+                       spin_unlock(&fsvq->lock);
+                       return;
+               }
+               req->out.h.error = ret;
+               pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
+
+               /* Can't end request in submission context. Use a worker */
+               spin_lock(&fsvq->lock);
+               list_add_tail(&req->list, &fsvq->end_reqs);
+               schedule_delayed_work(&fsvq->dispatch_work, 0);
+               spin_unlock(&fsvq->lock);
+               return;
+       }
+}
+
+const static struct fuse_iqueue_ops virtio_fs_fiq_ops = {
+       .wake_forget_and_unlock         = virtio_fs_wake_forget_and_unlock,
+       .wake_interrupt_and_unlock      = virtio_fs_wake_interrupt_and_unlock,
+       .wake_pending_and_unlock        = virtio_fs_wake_pending_and_unlock,
+       .release                        = virtio_fs_fiq_release,
+};
+
+static int virtio_fs_fill_super(struct super_block *sb)
+{
+       struct fuse_conn *fc = get_fuse_conn_super(sb);
+       struct virtio_fs *fs = fc->iq.priv;
+       unsigned int i;
+       int err;
+       struct fuse_fs_context ctx = {
+               .rootmode = S_IFDIR,
+               .default_permissions = 1,
+               .allow_other = 1,
+               .max_read = UINT_MAX,
+               .blksize = 512,
+               .destroy = true,
+               .no_control = true,
+               .no_force_umount = true,
+               .no_mount_options = true,
+       };
+
+       mutex_lock(&virtio_fs_mutex);
+
+       /* After holding mutex, make sure virtiofs device is still there.
+        * Though we are holding a reference to it, drive ->remove might
+        * still have cleaned up virtual queues. In that case bail out.
+        */
+       err = -EINVAL;
+       if (list_empty(&fs->list)) {
+               pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
+               goto err;
+       }
+
+       err = -ENOMEM;
+       /* Allocate fuse_dev for hiprio and notification queues */
+       for (i = 0; i < VQ_REQUEST; i++) {
+               struct virtio_fs_vq *fsvq = &fs->vqs[i];
+
+               fsvq->fud = fuse_dev_alloc();
+               if (!fsvq->fud)
+                       goto err_free_fuse_devs;
+       }
+
+       ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud;
+       err = fuse_fill_super_common(sb, &ctx);
+       if (err < 0)
+               goto err_free_fuse_devs;
+
+       fc = fs->vqs[VQ_REQUEST].fud->fc;
+
+       for (i = 0; i < fs->nvqs; i++) {
+               struct virtio_fs_vq *fsvq = &fs->vqs[i];
+
+               if (i == VQ_REQUEST)
+                       continue; /* already initialized */
+               fuse_dev_install(fsvq->fud, fc);
+       }
+
+       /* Previous unmount will stop all queues. Start these again */
+       virtio_fs_start_all_queues(fs);
+       fuse_send_init(fc);
+       mutex_unlock(&virtio_fs_mutex);
+       return 0;
+
+err_free_fuse_devs:
+       virtio_fs_free_devs(fs);
+err:
+       mutex_unlock(&virtio_fs_mutex);
+       return err;
+}
+
+static void virtio_kill_sb(struct super_block *sb)
+{
+       struct fuse_conn *fc = get_fuse_conn_super(sb);
+       struct virtio_fs *vfs;
+       struct virtio_fs_vq *fsvq;
+
+       /* If mount failed, we can still be called without any fc */
+       if (!fc)
+               return fuse_kill_sb_anon(sb);
+
+       vfs = fc->iq.priv;
+       fsvq = &vfs->vqs[VQ_HIPRIO];
+
+       /* Stop forget queue. Soon destroy will be sent */
+       spin_lock(&fsvq->lock);
+       fsvq->connected = false;
+       spin_unlock(&fsvq->lock);
+       virtio_fs_drain_all_queues(vfs);
+
+       fuse_kill_sb_anon(sb);
+
+       /* fuse_kill_sb_anon() must have sent destroy. Stop all queues
+        * and drain one more time and free fuse devices. Freeing fuse
+        * devices will drop their reference on fuse_conn and that in
+        * turn will drop its reference on virtio_fs object.
+        */
+       virtio_fs_stop_all_queues(vfs);
+       virtio_fs_drain_all_queues(vfs);
+       virtio_fs_free_devs(vfs);
+}
+
+static int virtio_fs_test_super(struct super_block *sb,
+                               struct fs_context *fsc)
+{
+       struct fuse_conn *fc = fsc->s_fs_info;
+
+       return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv;
+}
+
+static int virtio_fs_set_super(struct super_block *sb,
+                              struct fs_context *fsc)
+{
+       int err;
+
+       err = get_anon_bdev(&sb->s_dev);
+       if (!err)
+               fuse_conn_get(fsc->s_fs_info);
+
+       return err;
+}
+
+static int virtio_fs_get_tree(struct fs_context *fsc)
+{
+       struct virtio_fs *fs;
+       struct super_block *sb;
+       struct fuse_conn *fc;
+       int err;
+
+       /* This gets a reference on virtio_fs object. This ptr gets installed
+        * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
+        * to drop the reference to this object.
+        */
+       fs = virtio_fs_find_instance(fsc->source);
+       if (!fs) {
+               pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
+               return -EINVAL;
+       }
+
+       fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
+       if (!fc) {
+               mutex_lock(&virtio_fs_mutex);
+               virtio_fs_put(fs);
+               mutex_unlock(&virtio_fs_mutex);
+               return -ENOMEM;
+       }
+
+       fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops,
+                      fs);
+       fc->release = fuse_free_conn;
+       fc->delete_stale = true;
+
+       fsc->s_fs_info = fc;
+       sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super);
+       fuse_conn_put(fc);
+       if (IS_ERR(sb))
+               return PTR_ERR(sb);
+
+       if (!sb->s_root) {
+               err = virtio_fs_fill_super(sb);
+               if (err) {
+                       deactivate_locked_super(sb);
+                       return err;
+               }
+
+               sb->s_flags |= SB_ACTIVE;
+       }
+
+       WARN_ON(fsc->root);
+       fsc->root = dget(sb->s_root);
+       return 0;
+}
+
+static const struct fs_context_operations virtio_fs_context_ops = {
+       .get_tree       = virtio_fs_get_tree,
+};
+
+static int virtio_fs_init_fs_context(struct fs_context *fsc)
+{
+       fsc->ops = &virtio_fs_context_ops;
+       return 0;
+}
+
+static struct file_system_type virtio_fs_type = {
+       .owner          = THIS_MODULE,
+       .name           = "virtiofs",
+       .init_fs_context = virtio_fs_init_fs_context,
+       .kill_sb        = virtio_kill_sb,
+};
+
+static int __init virtio_fs_init(void)
+{
+       int ret;
+
+       ret = register_virtio_driver(&virtio_fs_driver);
+       if (ret < 0)
+               return ret;
+
+       ret = register_filesystem(&virtio_fs_type);
+       if (ret < 0) {
+               unregister_virtio_driver(&virtio_fs_driver);
+               return ret;
+       }
+
+       return 0;
+}
+module_init(virtio_fs_init);
+
+static void __exit virtio_fs_exit(void)
+{
+       unregister_filesystem(&virtio_fs_type);
+       unregister_virtio_driver(&virtio_fs_driver);
+}
+module_exit(virtio_fs_exit);
+
+MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
+MODULE_DESCRIPTION("Virtio Filesystem");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_FS(KBUILD_MODNAME);
+MODULE_DEVICE_TABLE(virtio, id_table);
index 4337176..20d052e 100644 (file)
@@ -25,15 +25,15 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value,
        memset(&inarg, 0, sizeof(inarg));
        inarg.size = size;
        inarg.flags = flags;
-       args.in.h.opcode = FUSE_SETXATTR;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 3;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.in.args[1].size = strlen(name) + 1;
-       args.in.args[1].value = name;
-       args.in.args[2].size = size;
-       args.in.args[2].value = value;
+       args.opcode = FUSE_SETXATTR;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 3;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.in_args[1].size = strlen(name) + 1;
+       args.in_args[1].value = name;
+       args.in_args[2].size = size;
+       args.in_args[2].value = value;
        err = fuse_simple_request(fc, &args);
        if (err == -ENOSYS) {
                fc->no_setxattr = 1;
@@ -60,22 +60,22 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
 
        memset(&inarg, 0, sizeof(inarg));
        inarg.size = size;
-       args.in.h.opcode = FUSE_GETXATTR;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 2;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.in.args[1].size = strlen(name) + 1;
-       args.in.args[1].value = name;
+       args.opcode = FUSE_GETXATTR;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 2;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.in_args[1].size = strlen(name) + 1;
+       args.in_args[1].value = name;
        /* This is really two different operations rolled into one */
-       args.out.numargs = 1;
+       args.out_numargs = 1;
        if (size) {
-               args.out.argvar = 1;
-               args.out.args[0].size = size;
-               args.out.args[0].value = value;
+               args.out_argvar = true;
+               args.out_args[0].size = size;
+               args.out_args[0].value = value;
        } else {
-               args.out.args[0].size = sizeof(outarg);
-               args.out.args[0].value = &outarg;
+               args.out_args[0].size = sizeof(outarg);
+               args.out_args[0].value = &outarg;
        }
        ret = fuse_simple_request(fc, &args);
        if (!ret && !size)
@@ -121,20 +121,20 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
 
        memset(&inarg, 0, sizeof(inarg));
        inarg.size = size;
-       args.in.h.opcode = FUSE_LISTXATTR;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
+       args.opcode = FUSE_LISTXATTR;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
        /* This is really two different operations rolled into one */
-       args.out.numargs = 1;
+       args.out_numargs = 1;
        if (size) {
-               args.out.argvar = 1;
-               args.out.args[0].size = size;
-               args.out.args[0].value = list;
+               args.out_argvar = true;
+               args.out_args[0].size = size;
+               args.out_args[0].value = list;
        } else {
-               args.out.args[0].size = sizeof(outarg);
-               args.out.args[0].value = &outarg;
+               args.out_args[0].size = sizeof(outarg);
+               args.out_args[0].value = &outarg;
        }
        ret = fuse_simple_request(fc, &args);
        if (!ret && !size)
@@ -157,11 +157,11 @@ int fuse_removexattr(struct inode *inode, const char *name)
        if (fc->no_removexattr)
                return -EOPNOTSUPP;
 
-       args.in.h.opcode = FUSE_REMOVEXATTR;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 1;
-       args.in.args[0].size = strlen(name) + 1;
-       args.in.args[0].value = name;
+       args.opcode = FUSE_REMOVEXATTR;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = strlen(name) + 1;
+       args.in_args[0].value = name;
        err = fuse_simple_request(fc, &args);
        if (err == -ENOSYS) {
                fc->no_removexattr = 1;
index 6b45006..5f89c51 100644 (file)
@@ -584,10 +584,10 @@ struct gfs2_args {
        unsigned int ar_rgrplvb:1;              /* use lvbs for rgrp info */
        unsigned int ar_loccookie:1;            /* use location based readdir
                                                   cookies */
-       int ar_commit;                          /* Commit interval */
-       int ar_statfs_quantum;                  /* The fast statfs interval */
-       int ar_quota_quantum;                   /* The quota interval */
-       int ar_statfs_percent;                  /* The % change to force sync */
+       s32 ar_commit;                          /* Commit interval */
+       s32 ar_statfs_quantum;                  /* The fast statfs interval */
+       s32 ar_quota_quantum;                   /* The quota interval */
+       s32 ar_statfs_percent;                  /* The % change to force sync */
 };
 
 struct gfs2_tune {
index f3fd5cd..18daf49 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/lockdep.h>
 #include <linux/module.h>
 #include <linux/backing-dev.h>
+#include <linux/fs_parser.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -1031,16 +1032,17 @@ void gfs2_online_uevent(struct gfs2_sbd *sdp)
 }
 
 /**
- * fill_super - Read in superblock
+ * gfs2_fill_super - Read in superblock
  * @sb: The VFS superblock
- * @data: Mount options
+ * @args: Mount options
  * @silent: Don't complain if it's not a GFS2 filesystem
  *
- * Returns: errno
+ * Returns: -errno
  */
-
-static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent)
+static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
 {
+       struct gfs2_args *args = fc->fs_private;
+       int silent = fc->sb_flags & SB_SILENT;
        struct gfs2_sbd *sdp;
        struct gfs2_holder mount_gh;
        int error;
@@ -1205,161 +1207,418 @@ fail_debug:
        return error;
 }
 
-static int set_gfs2_super(struct super_block *s, void *data)
+/**
+ * gfs2_get_tree - Get the GFS2 superblock and root directory
+ * @fc: The filesystem context
+ *
+ * Returns: 0 or -errno on error
+ */
+static int gfs2_get_tree(struct fs_context *fc)
 {
-       s->s_bdev = data;
-       s->s_dev = s->s_bdev->bd_dev;
-       s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
+       struct gfs2_args *args = fc->fs_private;
+       struct gfs2_sbd *sdp;
+       int error;
+
+       error = get_tree_bdev(fc, gfs2_fill_super);
+       if (error)
+               return error;
+
+       sdp = fc->root->d_sb->s_fs_info;
+       dput(fc->root);
+       if (args->ar_meta)
+               fc->root = dget(sdp->sd_master_dir);
+       else
+               fc->root = dget(sdp->sd_root_dir);
        return 0;
 }
 
-static int test_gfs2_super(struct super_block *s, void *ptr)
+static void gfs2_fc_free(struct fs_context *fc)
 {
-       struct block_device *bdev = ptr;
-       return (bdev == s->s_bdev);
+       struct gfs2_args *args = fc->fs_private;
+
+       kfree(args);
 }
 
-/**
- * gfs2_mount - Get the GFS2 superblock
- * @fs_type: The GFS2 filesystem type
- * @flags: Mount flags
- * @dev_name: The name of the device
- * @data: The mount arguments
- *
- * Q. Why not use get_sb_bdev() ?
- * A. We need to select one of two root directories to mount, independent
- *    of whether this is the initial, or subsequent, mount of this sb
- *
- * Returns: 0 or -ve on error
- */
+enum gfs2_param {
+       Opt_lockproto,
+       Opt_locktable,
+       Opt_hostdata,
+       Opt_spectator,
+       Opt_ignore_local_fs,
+       Opt_localflocks,
+       Opt_localcaching,
+       Opt_debug,
+       Opt_upgrade,
+       Opt_acl,
+       Opt_quota,
+       Opt_suiddir,
+       Opt_data,
+       Opt_meta,
+       Opt_discard,
+       Opt_commit,
+       Opt_errors,
+       Opt_statfs_quantum,
+       Opt_statfs_percent,
+       Opt_quota_quantum,
+       Opt_barrier,
+       Opt_rgrplvb,
+       Opt_loccookie,
+};
+
+enum opt_quota {
+       Opt_quota_unset = 0,
+       Opt_quota_off,
+       Opt_quota_account,
+       Opt_quota_on,
+};
+
+static const unsigned int opt_quota_values[] = {
+       [Opt_quota_off]     = GFS2_QUOTA_OFF,
+       [Opt_quota_account] = GFS2_QUOTA_ACCOUNT,
+       [Opt_quota_on]      = GFS2_QUOTA_ON,
+};
+
+enum opt_data {
+       Opt_data_writeback = GFS2_DATA_WRITEBACK,
+       Opt_data_ordered   = GFS2_DATA_ORDERED,
+};
+
+enum opt_errors {
+       Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW,
+       Opt_errors_panic    = GFS2_ERRORS_PANIC,
+};
+
+static const struct fs_parameter_spec gfs2_param_specs[] = {
+       fsparam_string ("lockproto",          Opt_lockproto),
+       fsparam_string ("locktable",          Opt_locktable),
+       fsparam_string ("hostdata",           Opt_hostdata),
+       fsparam_flag   ("spectator",          Opt_spectator),
+       fsparam_flag   ("norecovery",         Opt_spectator),
+       fsparam_flag   ("ignore_local_fs",    Opt_ignore_local_fs),
+       fsparam_flag   ("localflocks",        Opt_localflocks),
+       fsparam_flag   ("localcaching",       Opt_localcaching),
+       fsparam_flag_no("debug",              Opt_debug),
+       fsparam_flag   ("upgrade",            Opt_upgrade),
+       fsparam_flag_no("acl",                Opt_acl),
+       fsparam_flag_no("suiddir",            Opt_suiddir),
+       fsparam_enum   ("data",               Opt_data),
+       fsparam_flag   ("meta",               Opt_meta),
+       fsparam_flag_no("discard",            Opt_discard),
+       fsparam_s32    ("commit",             Opt_commit),
+       fsparam_enum   ("errors",             Opt_errors),
+       fsparam_s32    ("statfs_quantum",     Opt_statfs_quantum),
+       fsparam_s32    ("statfs_percent",     Opt_statfs_percent),
+       fsparam_s32    ("quota_quantum",      Opt_quota_quantum),
+       fsparam_flag_no("barrier",            Opt_barrier),
+       fsparam_flag_no("rgrplvb",            Opt_rgrplvb),
+       fsparam_flag_no("loccookie",          Opt_loccookie),
+       /* quota can be a flag or an enum so it gets special treatment */
+       __fsparam(fs_param_is_enum, "quota", Opt_quota, fs_param_neg_with_no|fs_param_v_optional),
+       {}
+};
 
-static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
-                      const char *dev_name, void *data)
+static const struct fs_parameter_enum gfs2_param_enums[] = {
+       { Opt_quota,    "off",        Opt_quota_off },
+       { Opt_quota,    "account",    Opt_quota_account },
+       { Opt_quota,    "on",         Opt_quota_on },
+       { Opt_data,     "writeback",  Opt_data_writeback },
+       { Opt_data,     "ordered",    Opt_data_ordered },
+       { Opt_errors,   "withdraw",   Opt_errors_withdraw },
+       { Opt_errors,   "panic",      Opt_errors_panic },
+       {}
+};
+
+const struct fs_parameter_description gfs2_fs_parameters = {
+       .name = "gfs2",
+       .specs = gfs2_param_specs,
+       .enums = gfs2_param_enums,
+};
+
+/* Parse a single mount parameter */
+static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
-       struct block_device *bdev;
-       struct super_block *s;
-       fmode_t mode = FMODE_READ | FMODE_EXCL;
-       int error;
-       struct gfs2_args args;
-       struct gfs2_sbd *sdp;
+       struct gfs2_args *args = fc->fs_private;
+       struct fs_parse_result result;
+       int o;
+
+       o = fs_parse(fc, &gfs2_fs_parameters, param, &result);
+       if (o < 0)
+               return o;
+
+       switch (o) {
+       case Opt_lockproto:
+               strlcpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN);
+               break;
+       case Opt_locktable:
+               strlcpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN);
+               break;
+       case Opt_hostdata:
+               strlcpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN);
+               break;
+       case Opt_spectator:
+               args->ar_spectator = 1;
+               break;
+       case Opt_ignore_local_fs:
+               /* Retained for backwards compat only */
+               break;
+       case Opt_localflocks:
+               args->ar_localflocks = 1;
+               break;
+       case Opt_localcaching:
+               /* Retained for backwards compat only */
+               break;
+       case Opt_debug:
+               if (result.boolean && args->ar_errors == GFS2_ERRORS_PANIC)
+                       return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive");
+               args->ar_debug = result.boolean;
+               break;
+       case Opt_upgrade:
+               /* Retained for backwards compat only */
+               break;
+       case Opt_acl:
+               args->ar_posix_acl = result.boolean;
+               break;
+       case Opt_quota:
+               /* The quota option can be a flag or an enum. A non-zero int_32
+                  result means that we have an enum index. Otherwise we have
+                  to rely on the 'negated' flag to tell us whether 'quota' or
+                  'noquota' was specified. */
+               if (result.negated)
+                       args->ar_quota = GFS2_QUOTA_OFF;
+               else if (result.int_32 > 0)
+                       args->ar_quota = opt_quota_values[result.int_32];
+               else
+                       args->ar_quota = GFS2_QUOTA_ON;
+               break;
+       case Opt_suiddir:
+               args->ar_suiddir = result.boolean;
+               break;
+       case Opt_data:
+               /* The uint_32 result maps directly to GFS2_DATA_* */
+               args->ar_data = result.uint_32;
+               break;
+       case Opt_meta:
+               args->ar_meta = 1;
+               break;
+       case Opt_discard:
+               args->ar_discard = result.boolean;
+               break;
+       case Opt_commit:
+               if (result.int_32 <= 0)
+                       return invalf(fc, "gfs2: commit mount option requires a positive numeric argument");
+               args->ar_commit = result.int_32;
+               break;
+       case Opt_statfs_quantum:
+               if (result.int_32 < 0)
+                       return invalf(fc, "gfs2: statfs_quantum mount option requires a non-negative numeric argument");
+               args->ar_statfs_quantum = result.int_32;
+               break;
+       case Opt_quota_quantum:
+               if (result.int_32 <= 0)
+                       return invalf(fc, "gfs2: quota_quantum mount option requires a positive numeric argument");
+               args->ar_quota_quantum = result.int_32;
+               break;
+       case Opt_statfs_percent:
+               if (result.int_32 < 0 || result.int_32 > 100)
+                       return invalf(fc, "gfs2: statfs_percent mount option requires a numeric argument between 0 and 100");
+               args->ar_statfs_percent = result.int_32;
+               break;
+       case Opt_errors:
+               if (args->ar_debug && result.uint_32 == GFS2_ERRORS_PANIC)
+                       return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive");
+               args->ar_errors = result.uint_32;
+               break;
+       case Opt_barrier:
+               args->ar_nobarrier = result.boolean;
+               break;
+       case Opt_rgrplvb:
+               args->ar_rgrplvb = result.boolean;
+               break;
+       case Opt_loccookie:
+               args->ar_loccookie = result.boolean;
+               break;
+       default:
+               return invalf(fc, "gfs2: invalid mount option: %s", param->key);
+       }
+       return 0;
+}
 
-       if (!(flags & SB_RDONLY))
-               mode |= FMODE_WRITE;
+static int gfs2_reconfigure(struct fs_context *fc)
+{
+       struct super_block *sb = fc->root->d_sb;
+       struct gfs2_sbd *sdp = sb->s_fs_info;
+       struct gfs2_args *oldargs = &sdp->sd_args;
+       struct gfs2_args *newargs = fc->fs_private;
+       struct gfs2_tune *gt = &sdp->sd_tune;
+       int error = 0;
 
-       bdev = blkdev_get_by_path(dev_name, mode, fs_type);
-       if (IS_ERR(bdev))
-               return ERR_CAST(bdev);
+       sync_filesystem(sb);
 
-       /*
-        * once the super is inserted into the list by sget, s_umount
-        * will protect the lockfs code from trying to start a snapshot
-        * while we are mounting
-        */
-       mutex_lock(&bdev->bd_fsfreeze_mutex);
-       if (bdev->bd_fsfreeze_count > 0) {
-               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               error = -EBUSY;
-               goto error_bdev;
+       spin_lock(&gt->gt_spin);
+       oldargs->ar_commit = gt->gt_logd_secs;
+       oldargs->ar_quota_quantum = gt->gt_quota_quantum;
+       if (gt->gt_statfs_slow)
+               oldargs->ar_statfs_quantum = 0;
+       else
+               oldargs->ar_statfs_quantum = gt->gt_statfs_quantum;
+       spin_unlock(&gt->gt_spin);
+
+       if (strcmp(newargs->ar_lockproto, oldargs->ar_lockproto)) {
+               errorf(fc, "gfs2: reconfiguration of locking protocol not allowed");
+               return -EINVAL;
        }
-       s = sget(fs_type, test_gfs2_super, set_gfs2_super, flags, bdev);
-       mutex_unlock(&bdev->bd_fsfreeze_mutex);
-       error = PTR_ERR(s);
-       if (IS_ERR(s))
-               goto error_bdev;
-
-       if (s->s_root) {
-               /*
-                * s_umount nests inside bd_mutex during
-                * __invalidate_device().  blkdev_put() acquires
-                * bd_mutex and can't be called under s_umount.  Drop
-                * s_umount temporarily.  This is safe as we're
-                * holding an active reference.
-                */
-               up_write(&s->s_umount);
-               blkdev_put(bdev, mode);
-               down_write(&s->s_umount);
-       } else {
-               /* s_mode must be set before deactivate_locked_super calls */
-               s->s_mode = mode;
+       if (strcmp(newargs->ar_locktable, oldargs->ar_locktable)) {
+               errorf(fc, "gfs2: reconfiguration of lock table not allowed");
+               return -EINVAL;
        }
+       if (strcmp(newargs->ar_hostdata, oldargs->ar_hostdata)) {
+               errorf(fc, "gfs2: reconfiguration of host data not allowed");
+               return -EINVAL;
+       }
+       if (newargs->ar_spectator != oldargs->ar_spectator) {
+               errorf(fc, "gfs2: reconfiguration of spectator mode not allowed");
+               return -EINVAL;
+       }
+       if (newargs->ar_localflocks != oldargs->ar_localflocks) {
+               errorf(fc, "gfs2: reconfiguration of localflocks not allowed");
+               return -EINVAL;
+       }
+       if (newargs->ar_meta != oldargs->ar_meta) {
+               errorf(fc, "gfs2: switching between gfs2 and gfs2meta not allowed");
+               return -EINVAL;
+       }
+       if (oldargs->ar_spectator)
+               fc->sb_flags |= SB_RDONLY;
+
+       if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) {
+               if (fc->sb_flags & SB_RDONLY) {
+                       error = gfs2_make_fs_ro(sdp);
+                       if (error)
+                               errorf(fc, "gfs2: unable to remount read-only");
+               } else {
+                       error = gfs2_make_fs_rw(sdp);
+                       if (error)
+                               errorf(fc, "gfs2: unable to remount read-write");
+               }
+       }
+       sdp->sd_args = *newargs;
 
-       memset(&args, 0, sizeof(args));
-       args.ar_quota = GFS2_QUOTA_DEFAULT;
-       args.ar_data = GFS2_DATA_DEFAULT;
-       args.ar_commit = 30;
-       args.ar_statfs_quantum = 30;
-       args.ar_quota_quantum = 60;
-       args.ar_errors = GFS2_ERRORS_DEFAULT;
-
-       error = gfs2_mount_args(&args, data);
-       if (error) {
-               pr_warn("can't parse mount arguments\n");
-               goto error_super;
+       if (sdp->sd_args.ar_posix_acl)
+               sb->s_flags |= SB_POSIXACL;
+       else
+               sb->s_flags &= ~SB_POSIXACL;
+       if (sdp->sd_args.ar_nobarrier)
+               set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
+       else
+               clear_bit(SDF_NOBARRIERS, &sdp->sd_flags);
+       spin_lock(&gt->gt_spin);
+       gt->gt_logd_secs = newargs->ar_commit;
+       gt->gt_quota_quantum = newargs->ar_quota_quantum;
+       if (newargs->ar_statfs_quantum) {
+               gt->gt_statfs_slow = 0;
+               gt->gt_statfs_quantum = newargs->ar_statfs_quantum;
+       }
+       else {
+               gt->gt_statfs_slow = 1;
+               gt->gt_statfs_quantum = 30;
        }
+       spin_unlock(&gt->gt_spin);
+
+       gfs2_online_uevent(sdp);
+       return error;
+}
+
+static const struct fs_context_operations gfs2_context_ops = {
+       .free        = gfs2_fc_free,
+       .parse_param = gfs2_parse_param,
+       .get_tree    = gfs2_get_tree,
+       .reconfigure = gfs2_reconfigure,
+};
+
+/* Set up the filesystem mount context */
+static int gfs2_init_fs_context(struct fs_context *fc)
+{
+       struct gfs2_args *args;
+
+       args = kmalloc(sizeof(*args), GFP_KERNEL);
+       if (args == NULL)
+               return -ENOMEM;
+
+       if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+               struct gfs2_sbd *sdp = fc->root->d_sb->s_fs_info;
 
-       if (s->s_root) {
-               error = -EBUSY;
-               if ((flags ^ s->s_flags) & SB_RDONLY)
-                       goto error_super;
+               *args = sdp->sd_args;
        } else {
-               snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
-               sb_set_blocksize(s, block_size(bdev));
-               error = fill_super(s, &args, flags & SB_SILENT ? 1 : 0);
-               if (error)
-                       goto error_super;
-               s->s_flags |= SB_ACTIVE;
-               bdev->bd_super = s;
+               memset(args, 0, sizeof(*args));
+               args->ar_quota = GFS2_QUOTA_DEFAULT;
+               args->ar_data = GFS2_DATA_DEFAULT;
+               args->ar_commit = 30;
+               args->ar_statfs_quantum = 30;
+               args->ar_quota_quantum = 60;
+               args->ar_errors = GFS2_ERRORS_DEFAULT;
        }
-
-       sdp = s->s_fs_info;
-       if (args.ar_meta)
-               return dget(sdp->sd_master_dir);
-       else
-               return dget(sdp->sd_root_dir);
-
-error_super:
-       deactivate_locked_super(s);
-       return ERR_PTR(error);
-error_bdev:
-       blkdev_put(bdev, mode);
-       return ERR_PTR(error);
+       fc->fs_private = args;
+       fc->ops = &gfs2_context_ops;
+       return 0;
 }
 
-static int set_meta_super(struct super_block *s, void *ptr)
+static int set_meta_super(struct super_block *s, struct fs_context *fc)
 {
        return -EINVAL;
 }
 
-static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
-                       int flags, const char *dev_name, void *data)
+static int test_meta_super(struct super_block *s, struct fs_context *fc)
+{
+       return (fc->sget_key == s->s_bdev);
+}
+
+static int gfs2_meta_get_tree(struct fs_context *fc)
 {
        struct super_block *s;
        struct gfs2_sbd *sdp;
        struct path path;
        int error;
 
-       if (!dev_name || !*dev_name)
-               return ERR_PTR(-EINVAL);
+       if (!fc->source || !*fc->source)
+               return -EINVAL;
 
-       error = kern_path(dev_name, LOOKUP_FOLLOW, &path);
+       error = kern_path(fc->source, LOOKUP_FOLLOW, &path);
        if (error) {
                pr_warn("path_lookup on %s returned error %d\n",
-                       dev_name, error);
-               return ERR_PTR(error);
+                       fc->source, error);
+               return error;
        }
-       s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags,
-                path.dentry->d_sb->s_bdev);
+       fc->fs_type = &gfs2_fs_type;
+       fc->sget_key = path.dentry->d_sb->s_bdev;
+       s = sget_fc(fc, test_meta_super, set_meta_super);
        path_put(&path);
        if (IS_ERR(s)) {
                pr_warn("gfs2 mount does not exist\n");
-               return ERR_CAST(s);
+               return PTR_ERR(s);
        }
-       if ((flags ^ s->s_flags) & SB_RDONLY) {
+       if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
                deactivate_locked_super(s);
-               return ERR_PTR(-EBUSY);
+               return -EBUSY;
        }
        sdp = s->s_fs_info;
-       return dget(sdp->sd_master_dir);
+       fc->root = dget(sdp->sd_master_dir);
+       return 0;
+}
+
+static const struct fs_context_operations gfs2_meta_context_ops = {
+       .free        = gfs2_fc_free,
+       .get_tree    = gfs2_meta_get_tree,
+};
+
+static int gfs2_meta_init_fs_context(struct fs_context *fc)
+{
+       int ret = gfs2_init_fs_context(fc);
+
+       if (ret)
+               return ret;
+
+       fc->ops = &gfs2_meta_context_ops;
+       return 0;
 }
 
 static void gfs2_kill_sb(struct super_block *sb)
@@ -1383,7 +1642,8 @@ static void gfs2_kill_sb(struct super_block *sb)
 struct file_system_type gfs2_fs_type = {
        .name = "gfs2",
        .fs_flags = FS_REQUIRES_DEV,
-       .mount = gfs2_mount,
+       .init_fs_context = gfs2_init_fs_context,
+       .parameters = &gfs2_fs_parameters,
        .kill_sb = gfs2_kill_sb,
        .owner = THIS_MODULE,
 };
@@ -1392,7 +1652,7 @@ MODULE_ALIAS_FS("gfs2");
 struct file_system_type gfs2meta_fs_type = {
        .name = "gfs2meta",
        .fs_flags = FS_REQUIRES_DEV,
-       .mount = gfs2_mount_meta,
+       .init_fs_context = gfs2_meta_init_fs_context,
        .owner = THIS_MODULE,
 };
 MODULE_ALIAS_FS("gfs2meta");
index 644c70a..5fa1eec 100644 (file)
 #include "xattr.h"
 #include "lops.h"
 
-#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
-
-enum {
-       Opt_lockproto,
-       Opt_locktable,
-       Opt_hostdata,
-       Opt_spectator,
-       Opt_ignore_local_fs,
-       Opt_localflocks,
-       Opt_localcaching,
-       Opt_debug,
-       Opt_nodebug,
-       Opt_upgrade,
-       Opt_acl,
-       Opt_noacl,
-       Opt_quota_off,
-       Opt_quota_account,
-       Opt_quota_on,
-       Opt_quota,
-       Opt_noquota,
-       Opt_suiddir,
-       Opt_nosuiddir,
-       Opt_data_writeback,
-       Opt_data_ordered,
-       Opt_meta,
-       Opt_discard,
-       Opt_nodiscard,
-       Opt_commit,
-       Opt_err_withdraw,
-       Opt_err_panic,
-       Opt_statfs_quantum,
-       Opt_statfs_percent,
-       Opt_quota_quantum,
-       Opt_barrier,
-       Opt_nobarrier,
-       Opt_rgrplvb,
-       Opt_norgrplvb,
-       Opt_loccookie,
-       Opt_noloccookie,
-       Opt_error,
-};
-
-static const match_table_t tokens = {
-       {Opt_lockproto, "lockproto=%s"},
-       {Opt_locktable, "locktable=%s"},
-       {Opt_hostdata, "hostdata=%s"},
-       {Opt_spectator, "spectator"},
-       {Opt_spectator, "norecovery"},
-       {Opt_ignore_local_fs, "ignore_local_fs"},
-       {Opt_localflocks, "localflocks"},
-       {Opt_localcaching, "localcaching"},
-       {Opt_debug, "debug"},
-       {Opt_nodebug, "nodebug"},
-       {Opt_upgrade, "upgrade"},
-       {Opt_acl, "acl"},
-       {Opt_noacl, "noacl"},
-       {Opt_quota_off, "quota=off"},
-       {Opt_quota_account, "quota=account"},
-       {Opt_quota_on, "quota=on"},
-       {Opt_quota, "quota"},
-       {Opt_noquota, "noquota"},
-       {Opt_suiddir, "suiddir"},
-       {Opt_nosuiddir, "nosuiddir"},
-       {Opt_data_writeback, "data=writeback"},
-       {Opt_data_ordered, "data=ordered"},
-       {Opt_meta, "meta"},
-       {Opt_discard, "discard"},
-       {Opt_nodiscard, "nodiscard"},
-       {Opt_commit, "commit=%d"},
-       {Opt_err_withdraw, "errors=withdraw"},
-       {Opt_err_panic, "errors=panic"},
-       {Opt_statfs_quantum, "statfs_quantum=%d"},
-       {Opt_statfs_percent, "statfs_percent=%d"},
-       {Opt_quota_quantum, "quota_quantum=%d"},
-       {Opt_barrier, "barrier"},
-       {Opt_nobarrier, "nobarrier"},
-       {Opt_rgrplvb, "rgrplvb"},
-       {Opt_norgrplvb, "norgrplvb"},
-       {Opt_loccookie, "loccookie"},
-       {Opt_noloccookie, "noloccookie"},
-       {Opt_error, NULL}
-};
-
-/**
- * gfs2_mount_args - Parse mount options
- * @args: The structure into which the parsed options will be written
- * @options: The options to parse
- *
- * Return: errno
- */
-
-int gfs2_mount_args(struct gfs2_args *args, char *options)
-{
-       char *o;
-       int token;
-       substring_t tmp[MAX_OPT_ARGS];
-       int rv;
-
-       /* Split the options into tokens with the "," character and
-          process them */
-
-       while (1) {
-               o = strsep(&options, ",");
-               if (o == NULL)
-                       break;
-               if (*o == '\0')
-                       continue;
-
-               token = match_token(o, tokens, tmp);
-               switch (token) {
-               case Opt_lockproto:
-                       match_strlcpy(args->ar_lockproto, &tmp[0],
-                                     GFS2_LOCKNAME_LEN);
-                       break;
-               case Opt_locktable:
-                       match_strlcpy(args->ar_locktable, &tmp[0],
-                                     GFS2_LOCKNAME_LEN);
-                       break;
-               case Opt_hostdata:
-                       match_strlcpy(args->ar_hostdata, &tmp[0],
-                                     GFS2_LOCKNAME_LEN);
-                       break;
-               case Opt_spectator:
-                       args->ar_spectator = 1;
-                       break;
-               case Opt_ignore_local_fs:
-                       /* Retained for backwards compat only */
-                       break;
-               case Opt_localflocks:
-                       args->ar_localflocks = 1;
-                       break;
-               case Opt_localcaching:
-                       /* Retained for backwards compat only */
-                       break;
-               case Opt_debug:
-                       if (args->ar_errors == GFS2_ERRORS_PANIC) {
-                               pr_warn("-o debug and -o errors=panic are mutually exclusive\n");
-                               return -EINVAL;
-                       }
-                       args->ar_debug = 1;
-                       break;
-               case Opt_nodebug:
-                       args->ar_debug = 0;
-                       break;
-               case Opt_upgrade:
-                       /* Retained for backwards compat only */
-                       break;
-               case Opt_acl:
-                       args->ar_posix_acl = 1;
-                       break;
-               case Opt_noacl:
-                       args->ar_posix_acl = 0;
-                       break;
-               case Opt_quota_off:
-               case Opt_noquota:
-                       args->ar_quota = GFS2_QUOTA_OFF;
-                       break;
-               case Opt_quota_account:
-                       args->ar_quota = GFS2_QUOTA_ACCOUNT;
-                       break;
-               case Opt_quota_on:
-               case Opt_quota:
-                       args->ar_quota = GFS2_QUOTA_ON;
-                       break;
-               case Opt_suiddir:
-                       args->ar_suiddir = 1;
-                       break;
-               case Opt_nosuiddir:
-                       args->ar_suiddir = 0;
-                       break;
-               case Opt_data_writeback:
-                       args->ar_data = GFS2_DATA_WRITEBACK;
-                       break;
-               case Opt_data_ordered:
-                       args->ar_data = GFS2_DATA_ORDERED;
-                       break;
-               case Opt_meta:
-                       args->ar_meta = 1;
-                       break;
-               case Opt_discard:
-                       args->ar_discard = 1;
-                       break;
-               case Opt_nodiscard:
-                       args->ar_discard = 0;
-                       break;
-               case Opt_commit:
-                       rv = match_int(&tmp[0], &args->ar_commit);
-                       if (rv || args->ar_commit <= 0) {
-                               pr_warn("commit mount option requires a positive numeric argument\n");
-                               return rv ? rv : -EINVAL;
-                       }
-                       break;
-               case Opt_statfs_quantum:
-                       rv = match_int(&tmp[0], &args->ar_statfs_quantum);
-                       if (rv || args->ar_statfs_quantum < 0) {
-                               pr_warn("statfs_quantum mount option requires a non-negative numeric argument\n");
-                               return rv ? rv : -EINVAL;
-                       }
-                       break;
-               case Opt_quota_quantum:
-                       rv = match_int(&tmp[0], &args->ar_quota_quantum);
-                       if (rv || args->ar_quota_quantum <= 0) {
-                               pr_warn("quota_quantum mount option requires a positive numeric argument\n");
-                               return rv ? rv : -EINVAL;
-                       }
-                       break;
-               case Opt_statfs_percent:
-                       rv = match_int(&tmp[0], &args->ar_statfs_percent);
-                       if (rv || args->ar_statfs_percent < 0 ||
-                           args->ar_statfs_percent > 100) {
-                               pr_warn("statfs_percent mount option requires a numeric argument between 0 and 100\n");
-                               return rv ? rv : -EINVAL;
-                       }
-                       break;
-               case Opt_err_withdraw:
-                       args->ar_errors = GFS2_ERRORS_WITHDRAW;
-                       break;
-               case Opt_err_panic:
-                       if (args->ar_debug) {
-                               pr_warn("-o debug and -o errors=panic are mutually exclusive\n");
-                               return -EINVAL;
-                       }
-                       args->ar_errors = GFS2_ERRORS_PANIC;
-                       break;
-               case Opt_barrier:
-                       args->ar_nobarrier = 0;
-                       break;
-               case Opt_nobarrier:
-                       args->ar_nobarrier = 1;
-                       break;
-               case Opt_rgrplvb:
-                       args->ar_rgrplvb = 1;
-                       break;
-               case Opt_norgrplvb:
-                       args->ar_rgrplvb = 0;
-                       break;
-               case Opt_loccookie:
-                       args->ar_loccookie = 1;
-                       break;
-               case Opt_noloccookie:
-                       args->ar_loccookie = 0;
-                       break;
-               case Opt_error:
-               default:
-                       pr_warn("invalid mount option: %s\n", o);
-                       return -EINVAL;
-               }
-       }
-
-       return 0;
-}
-
 /**
  * gfs2_jindex_free - Clear all the journal index information
  * @sdp: The GFS2 superblock
@@ -847,7 +595,7 @@ out:
  * Returns: errno
  */
 
-static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 {
        struct gfs2_holder freeze_gh;
        int error;
@@ -1226,84 +974,6 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
        return 0;
 }
 
-/**
- * gfs2_remount_fs - called when the FS is remounted
- * @sb:  the filesystem
- * @flags:  the remount flags
- * @data:  extra data passed in (not used right now)
- *
- * Returns: errno
- */
-
-static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
-{
-       struct gfs2_sbd *sdp = sb->s_fs_info;
-       struct gfs2_args args = sdp->sd_args; /* Default to current settings */
-       struct gfs2_tune *gt = &sdp->sd_tune;
-       int error;
-
-       sync_filesystem(sb);
-
-       spin_lock(&gt->gt_spin);
-       args.ar_commit = gt->gt_logd_secs;
-       args.ar_quota_quantum = gt->gt_quota_quantum;
-       if (gt->gt_statfs_slow)
-               args.ar_statfs_quantum = 0;
-       else
-               args.ar_statfs_quantum = gt->gt_statfs_quantum;
-       spin_unlock(&gt->gt_spin);
-       error = gfs2_mount_args(&args, data);
-       if (error)
-               return error;
-
-       /* Not allowed to change locking details */
-       if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) ||
-           strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) ||
-           strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata))
-               return -EINVAL;
-
-       /* Some flags must not be changed */
-       if (args_neq(&args, &sdp->sd_args, spectator) ||
-           args_neq(&args, &sdp->sd_args, localflocks) ||
-           args_neq(&args, &sdp->sd_args, meta))
-               return -EINVAL;
-
-       if (sdp->sd_args.ar_spectator)
-               *flags |= SB_RDONLY;
-
-       if ((sb->s_flags ^ *flags) & SB_RDONLY) {
-               if (*flags & SB_RDONLY)
-                       error = gfs2_make_fs_ro(sdp);
-               else
-                       error = gfs2_make_fs_rw(sdp);
-       }
-
-       sdp->sd_args = args;
-       if (sdp->sd_args.ar_posix_acl)
-               sb->s_flags |= SB_POSIXACL;
-       else
-               sb->s_flags &= ~SB_POSIXACL;
-       if (sdp->sd_args.ar_nobarrier)
-               set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
-       else
-               clear_bit(SDF_NOBARRIERS, &sdp->sd_flags);
-       spin_lock(&gt->gt_spin);
-       gt->gt_logd_secs = args.ar_commit;
-       gt->gt_quota_quantum = args.ar_quota_quantum;
-       if (args.ar_statfs_quantum) {
-               gt->gt_statfs_slow = 0;
-               gt->gt_statfs_quantum = args.ar_statfs_quantum;
-       }
-       else {
-               gt->gt_statfs_slow = 1;
-               gt->gt_statfs_quantum = 30;
-       }
-       spin_unlock(&gt->gt_spin);
-
-       gfs2_online_uevent(sdp);
-       return error;
-}
-
 /**
  * gfs2_drop_inode - Drop an inode (test for remote unlink)
  * @inode: The inode to drop
@@ -1748,7 +1418,6 @@ const struct super_operations gfs2_super_ops = {
        .freeze_super           = gfs2_freeze,
        .thaw_super             = gfs2_unfreeze,
        .statfs                 = gfs2_statfs,
-       .remount_fs             = gfs2_remount_fs,
        .drop_inode             = gfs2_drop_inode,
        .show_options           = gfs2_show_options,
 };
index 9d49eaa..b8bf811 100644 (file)
@@ -24,8 +24,6 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
 
 extern void gfs2_jindex_free(struct gfs2_sbd *sdp);
 
-extern int gfs2_mount_args(struct gfs2_args *args, char *data);
-
 extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
 extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
 
@@ -33,6 +31,7 @@ extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
                                     struct gfs2_inode **ipp);
 
 extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
+extern int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
 extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
 extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
 extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
index 64bf28c..fef457a 100644 (file)
@@ -181,6 +181,9 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        mapping->flags = 0;
        mapping->wb_err = 0;
        atomic_set(&mapping->i_mmap_writable, 0);
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       atomic_set(&mapping->nr_thps, 0);
+#endif
        mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
        mapping->private_data = NULL;
        mapping->writeback_index = 0;
index 0dadbdb..f9a3899 100644 (file)
@@ -197,9 +197,11 @@ struct io_ring_ctx {
                unsigned                sq_entries;
                unsigned                sq_mask;
                unsigned                sq_thread_idle;
+               unsigned                cached_sq_dropped;
                struct io_uring_sqe     *sq_sqes;
 
                struct list_head        defer_list;
+               struct list_head        timeout_list;
        } ____cacheline_aligned_in_smp;
 
        /* IO offload */
@@ -211,11 +213,13 @@ struct io_ring_ctx {
 
        struct {
                unsigned                cached_cq_tail;
+               atomic_t                cached_cq_overflow;
                unsigned                cq_entries;
                unsigned                cq_mask;
                struct wait_queue_head  cq_wait;
                struct fasync_struct    *cq_fasync;
                struct eventfd_ctx      *cq_ev_fd;
+               atomic_t                cq_timeouts;
        } ____cacheline_aligned_in_smp;
 
        struct io_rings *rings;
@@ -283,6 +287,11 @@ struct io_poll_iocb {
        struct wait_queue_entry         wait;
 };
 
+struct io_timeout {
+       struct file                     *file;
+       struct hrtimer                  timer;
+};
+
 /*
  * NOTE! Each of the iocb union members has the file pointer
  * as the first entry in their struct definition. So you can
@@ -294,6 +303,7 @@ struct io_kiocb {
                struct file             *file;
                struct kiocb            rw;
                struct io_poll_iocb     poll;
+               struct io_timeout       timeout;
        };
 
        struct sqe_submit       submit;
@@ -313,6 +323,9 @@ struct io_kiocb {
 #define REQ_F_LINK_DONE                128     /* linked sqes done */
 #define REQ_F_FAIL_LINK                256     /* fail rest of links */
 #define REQ_F_SHADOW_DRAIN     512     /* link-drain shadow req */
+#define REQ_F_TIMEOUT          1024    /* timeout request */
+#define REQ_F_ISREG            2048    /* regular file */
+#define REQ_F_MUST_PUNT                4096    /* must be punted even for NONBLOCK */
        u64                     user_data;
        u32                     result;
        u32                     sequence;
@@ -344,6 +357,8 @@ struct io_submit_state {
 };
 
 static void io_sq_wq_submit_work(struct work_struct *work);
+static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
+                                long res);
 static void __io_free_req(struct io_kiocb *req);
 
 static struct kmem_cache *req_cachep;
@@ -400,27 +415,45 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
        INIT_LIST_HEAD(&ctx->poll_list);
        INIT_LIST_HEAD(&ctx->cancel_list);
        INIT_LIST_HEAD(&ctx->defer_list);
+       INIT_LIST_HEAD(&ctx->timeout_list);
        return ctx;
 }
 
+static inline bool __io_sequence_defer(struct io_ring_ctx *ctx,
+                                      struct io_kiocb *req)
+{
+       return req->sequence != ctx->cached_cq_tail + ctx->cached_sq_dropped
+                                       + atomic_read(&ctx->cached_cq_overflow);
+}
+
 static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
                                     struct io_kiocb *req)
 {
        if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
                return false;
 
-       return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped;
+       return __io_sequence_defer(ctx, req);
 }
 
 static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
 {
        struct io_kiocb *req;
 
-       if (list_empty(&ctx->defer_list))
-               return NULL;
+       req = list_first_entry_or_null(&ctx->defer_list, struct io_kiocb, list);
+       if (req && !io_sequence_defer(ctx, req)) {
+               list_del_init(&req->list);
+               return req;
+       }
+
+       return NULL;
+}
 
-       req = list_first_entry(&ctx->defer_list, struct io_kiocb, list);
-       if (!io_sequence_defer(ctx, req)) {
+static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx)
+{
+       struct io_kiocb *req;
+
+       req = list_first_entry_or_null(&ctx->timeout_list, struct io_kiocb, list);
+       if (req && !__io_sequence_defer(ctx, req)) {
                list_del_init(&req->list);
                return req;
        }
@@ -446,25 +479,50 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
 static inline void io_queue_async_work(struct io_ring_ctx *ctx,
                                       struct io_kiocb *req)
 {
-       int rw;
+       int rw = 0;
 
-       switch (req->submit.sqe->opcode) {
-       case IORING_OP_WRITEV:
-       case IORING_OP_WRITE_FIXED:
-               rw = !(req->rw.ki_flags & IOCB_DIRECT);
-               break;
-       default:
-               rw = 0;
-               break;
+       if (req->submit.sqe) {
+               switch (req->submit.sqe->opcode) {
+               case IORING_OP_WRITEV:
+               case IORING_OP_WRITE_FIXED:
+                       rw = !(req->rw.ki_flags & IOCB_DIRECT);
+                       break;
+               }
        }
 
        queue_work(ctx->sqo_wq[rw], &req->work);
 }
 
+static void io_kill_timeout(struct io_kiocb *req)
+{
+       int ret;
+
+       ret = hrtimer_try_to_cancel(&req->timeout.timer);
+       if (ret != -1) {
+               atomic_inc(&req->ctx->cq_timeouts);
+               list_del(&req->list);
+               io_cqring_fill_event(req->ctx, req->user_data, 0);
+               __io_free_req(req);
+       }
+}
+
+static void io_kill_timeouts(struct io_ring_ctx *ctx)
+{
+       struct io_kiocb *req, *tmp;
+
+       spin_lock_irq(&ctx->completion_lock);
+       list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
+               io_kill_timeout(req);
+       spin_unlock_irq(&ctx->completion_lock);
+}
+
 static void io_commit_cqring(struct io_ring_ctx *ctx)
 {
        struct io_kiocb *req;
 
+       while ((req = io_get_timeout_req(ctx)) != NULL)
+               io_kill_timeout(req);
+
        __io_commit_cqring(ctx);
 
        while ((req = io_get_deferred_req(ctx)) != NULL) {
@@ -512,9 +570,8 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
                WRITE_ONCE(cqe->res, res);
                WRITE_ONCE(cqe->flags, 0);
        } else {
-               unsigned overflow = READ_ONCE(ctx->rings->cq_overflow);
-
-               WRITE_ONCE(ctx->rings->cq_overflow, overflow + 1);
+               WRITE_ONCE(ctx->rings->cq_overflow,
+                               atomic_inc_return(&ctx->cached_cq_overflow));
        }
 }
 
@@ -541,14 +598,6 @@ static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data,
        io_cqring_ev_posted(ctx);
 }
 
-static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)
-{
-       percpu_ref_put_many(&ctx->refs, refs);
-
-       if (waitqueue_active(&ctx->wait))
-               wake_up(&ctx->wait);
-}
-
 static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
                                   struct io_submit_state *state)
 {
@@ -596,7 +645,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
        req->result = 0;
        return req;
 out:
-       io_ring_drop_ctx_refs(ctx, 1);
+       percpu_ref_put(&ctx->refs);
        return NULL;
 }
 
@@ -604,7 +653,7 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
 {
        if (*nr) {
                kmem_cache_free_bulk(req_cachep, *nr, reqs);
-               io_ring_drop_ctx_refs(ctx, *nr);
+               percpu_ref_put_many(&ctx->refs, *nr);
                *nr = 0;
        }
 }
@@ -613,7 +662,7 @@ static void __io_free_req(struct io_kiocb *req)
 {
        if (req->file && !(req->flags & REQ_F_FIXED_FILE))
                fput(req->file);
-       io_ring_drop_ctx_refs(req->ctx, 1);
+       percpu_ref_put(&req->ctx->refs);
        kmem_cache_free(req_cachep, req);
 }
 
@@ -688,6 +737,14 @@ static unsigned io_cqring_events(struct io_rings *rings)
        return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head);
 }
 
+static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
+{
+       struct io_rings *rings = ctx->rings;
+
+       /* make sure SQ entry isn't read before tail */
+       return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
+}
+
 /*
  * Find and free completed poll iocbs
  */
@@ -817,19 +874,11 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
        mutex_unlock(&ctx->uring_lock);
 }
 
-static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
-                          long min)
+static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+                           long min)
 {
-       int iters, ret = 0;
+       int iters = 0, ret = 0;
 
-       /*
-        * We disallow the app entering submit/complete with polling, but we
-        * still need to lock the ring to prevent racing with polled issue
-        * that got punted to a workqueue.
-        */
-       mutex_lock(&ctx->uring_lock);
-
-       iters = 0;
        do {
                int tmin = 0;
 
@@ -865,30 +914,45 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
                ret = 0;
        } while (min && !*nr_events && !need_resched());
 
+       return ret;
+}
+
+static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+                          long min)
+{
+       int ret;
+
+       /*
+        * We disallow the app entering submit/complete with polling, but we
+        * still need to lock the ring to prevent racing with polled issue
+        * that got punted to a workqueue.
+        */
+       mutex_lock(&ctx->uring_lock);
+       ret = __io_iopoll_check(ctx, nr_events, min);
        mutex_unlock(&ctx->uring_lock);
        return ret;
 }
 
-static void kiocb_end_write(struct kiocb *kiocb)
+static void kiocb_end_write(struct io_kiocb *req)
 {
-       if (kiocb->ki_flags & IOCB_WRITE) {
-               struct inode *inode = file_inode(kiocb->ki_filp);
+       /*
+        * Tell lockdep we inherited freeze protection from submission
+        * thread.
+        */
+       if (req->flags & REQ_F_ISREG) {
+               struct inode *inode = file_inode(req->file);
 
-               /*
-                * Tell lockdep we inherited freeze protection from submission
-                * thread.
-                */
-               if (S_ISREG(inode->i_mode))
-                       __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
-               file_end_write(kiocb->ki_filp);
+               __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
        }
+       file_end_write(req->file);
 }
 
 static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
 {
        struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
 
-       kiocb_end_write(kiocb);
+       if (kiocb->ki_flags & IOCB_WRITE)
+               kiocb_end_write(req);
 
        if ((req->flags & REQ_F_LINK) && res != req->result)
                req->flags |= REQ_F_FAIL_LINK;
@@ -900,7 +964,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
 {
        struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
 
-       kiocb_end_write(kiocb);
+       if (kiocb->ki_flags & IOCB_WRITE)
+               kiocb_end_write(req);
 
        if ((req->flags & REQ_F_LINK) && res != req->result)
                req->flags |= REQ_F_FAIL_LINK;
@@ -1014,8 +1079,17 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
        if (!req->file)
                return -EBADF;
 
-       if (force_nonblock && !io_file_supports_async(req->file))
-               force_nonblock = false;
+       if (S_ISREG(file_inode(req->file)->i_mode))
+               req->flags |= REQ_F_ISREG;
+
+       /*
+        * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
+        * we know to async punt it even if it was opened O_NONBLOCK
+        */
+       if (force_nonblock && !io_file_supports_async(req->file)) {
+               req->flags |= REQ_F_MUST_PUNT;
+               return -EAGAIN;
+       }
 
        kiocb->ki_pos = READ_ONCE(sqe->off);
        kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
@@ -1036,7 +1110,8 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
                return ret;
 
        /* don't allow async punt if RWF_NOWAIT was requested */
-       if (kiocb->ki_flags & IOCB_NOWAIT)
+       if ((kiocb->ki_flags & IOCB_NOWAIT) ||
+           (req->file->f_flags & O_NONBLOCK))
                req->flags |= REQ_F_NOWAIT;
 
        if (force_nonblock)
@@ -1049,6 +1124,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
 
                kiocb->ki_flags |= IOCB_HIPRI;
                kiocb->ki_complete = io_complete_rw_iopoll;
+               req->result = 0;
        } else {
                if (kiocb->ki_flags & IOCB_HIPRI)
                        return -EINVAL;
@@ -1248,6 +1324,51 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
        }
 }
 
+/*
+ * For files that don't have ->read_iter() and ->write_iter(), handle them
+ * by looping over ->read() or ->write() manually.
+ */
+static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
+                          struct iov_iter *iter)
+{
+       ssize_t ret = 0;
+
+       /*
+        * Don't support polled IO through this interface, and we can't
+        * support non-blocking either. For the latter, this just causes
+        * the kiocb to be handled from an async context.
+        */
+       if (kiocb->ki_flags & IOCB_HIPRI)
+               return -EOPNOTSUPP;
+       if (kiocb->ki_flags & IOCB_NOWAIT)
+               return -EAGAIN;
+
+       while (iov_iter_count(iter)) {
+               struct iovec iovec = iov_iter_iovec(iter);
+               ssize_t nr;
+
+               if (rw == READ) {
+                       nr = file->f_op->read(file, iovec.iov_base,
+                                             iovec.iov_len, &kiocb->ki_pos);
+               } else {
+                       nr = file->f_op->write(file, iovec.iov_base,
+                                              iovec.iov_len, &kiocb->ki_pos);
+               }
+
+               if (nr < 0) {
+                       if (!ret)
+                               ret = nr;
+                       break;
+               }
+               ret += nr;
+               if (nr != iovec.iov_len)
+                       break;
+               iov_iter_advance(iter, nr);
+       }
+
+       return ret;
+}
+
 static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
                   bool force_nonblock)
 {
@@ -1265,8 +1386,6 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
 
        if (unlikely(!(file->f_mode & FMODE_READ)))
                return -EBADF;
-       if (unlikely(!file->f_op->read_iter))
-               return -EINVAL;
 
        ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);
        if (ret < 0)
@@ -1281,7 +1400,11 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
        if (!ret) {
                ssize_t ret2;
 
-               ret2 = call_read_iter(file, kiocb, &iter);
+               if (file->f_op->read_iter)
+                       ret2 = call_read_iter(file, kiocb, &iter);
+               else
+                       ret2 = loop_rw_iter(READ, file, kiocb, &iter);
+
                /*
                 * In case of a short read, punt to async. This can happen
                 * if we have data partially cached. Alternatively we can
@@ -1290,7 +1413,9 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
                 * need async punt anyway, so it's more efficient to do it
                 * here.
                 */
-               if (force_nonblock && ret2 > 0 && ret2 < read_size)
+               if (force_nonblock && !(req->flags & REQ_F_NOWAIT) &&
+                   (req->flags & REQ_F_ISREG) &&
+                   ret2 > 0 && ret2 < read_size)
                        ret2 = -EAGAIN;
                /* Catch -EAGAIN return for forced non-blocking submission */
                if (!force_nonblock || ret2 != -EAGAIN) {
@@ -1326,8 +1451,6 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
        file = kiocb->ki_filp;
        if (unlikely(!(file->f_mode & FMODE_WRITE)))
                return -EBADF;
-       if (unlikely(!file->f_op->write_iter))
-               return -EINVAL;
 
        ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);
        if (ret < 0)
@@ -1357,7 +1480,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
                 * released so that it doesn't complain about the held lock when
                 * we return to userspace.
                 */
-               if (S_ISREG(file_inode(file)->i_mode)) {
+               if (req->flags & REQ_F_ISREG) {
                        __sb_start_write(file_inode(file)->i_sb,
                                                SB_FREEZE_WRITE, true);
                        __sb_writers_release(file_inode(file)->i_sb,
@@ -1365,7 +1488,10 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
                }
                kiocb->ki_flags |= IOCB_WRITE;
 
-               ret2 = call_write_iter(file, kiocb, &iter);
+               if (file->f_op->write_iter)
+                       ret2 = call_write_iter(file, kiocb, &iter);
+               else
+                       ret2 = loop_rw_iter(WRITE, file, kiocb, &iter);
                if (!force_nonblock || ret2 != -EAGAIN) {
                        io_rw_done(kiocb, ret2);
                } else {
@@ -1714,6 +1840,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        if (!poll->file)
                return -EBADF;
 
+       req->submit.sqe = NULL;
        INIT_WORK(&req->work, io_poll_complete_work);
        events = READ_ONCE(sqe->poll_events);
        poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
@@ -1765,6 +1892,114 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return ipt.error;
 }
 
+static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
+{
+       struct io_ring_ctx *ctx;
+       struct io_kiocb *req, *prev;
+       unsigned long flags;
+
+       req = container_of(timer, struct io_kiocb, timeout.timer);
+       ctx = req->ctx;
+       atomic_inc(&ctx->cq_timeouts);
+
+       spin_lock_irqsave(&ctx->completion_lock, flags);
+       /*
+        * Adjust the reqs sequence before the current one because it
+        * will consume a slot in the cq_ring and the the cq_tail pointer
+        * will be increased, otherwise other timeout reqs may return in
+        * advance without waiting for enough wait_nr.
+        */
+       prev = req;
+       list_for_each_entry_continue_reverse(prev, &ctx->timeout_list, list)
+               prev->sequence++;
+       list_del(&req->list);
+
+       io_cqring_fill_event(ctx, req->user_data, -ETIME);
+       io_commit_cqring(ctx);
+       spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
+       io_cqring_ev_posted(ctx);
+
+       io_put_req(req);
+       return HRTIMER_NORESTART;
+}
+
+static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       unsigned count;
+       struct io_ring_ctx *ctx = req->ctx;
+       struct list_head *entry;
+       struct timespec64 ts;
+       unsigned span = 0;
+
+       if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+       if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->timeout_flags ||
+           sqe->len != 1)
+               return -EINVAL;
+
+       if (get_timespec64(&ts, u64_to_user_ptr(sqe->addr)))
+               return -EFAULT;
+
+       /*
+        * sqe->off holds how many events that need to occur for this
+        * timeout event to be satisfied.
+        */
+       count = READ_ONCE(sqe->off);
+       if (!count)
+               count = 1;
+
+       req->sequence = ctx->cached_sq_head + count - 1;
+       /* reuse it to store the count */
+       req->submit.sequence = count;
+       req->flags |= REQ_F_TIMEOUT;
+
+       /*
+        * Insertion sort, ensuring the first entry in the list is always
+        * the one we need first.
+        */
+       spin_lock_irq(&ctx->completion_lock);
+       list_for_each_prev(entry, &ctx->timeout_list) {
+               struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
+               unsigned nxt_sq_head;
+               long long tmp, tmp_nxt;
+
+               /*
+                * Since cached_sq_head + count - 1 can overflow, use type long
+                * long to store it.
+                */
+               tmp = (long long)ctx->cached_sq_head + count - 1;
+               nxt_sq_head = nxt->sequence - nxt->submit.sequence + 1;
+               tmp_nxt = (long long)nxt_sq_head + nxt->submit.sequence - 1;
+
+               /*
+                * cached_sq_head may overflow, and it will never overflow twice
+                * once there is some timeout req still be valid.
+                */
+               if (ctx->cached_sq_head < nxt_sq_head)
+                       tmp += UINT_MAX;
+
+               if (tmp > tmp_nxt)
+                       break;
+
+               /*
+                * Sequence of reqs after the insert one and itself should
+                * be adjusted because each timeout req consumes a slot.
+                */
+               span++;
+               nxt->sequence++;
+       }
+       req->sequence -= span;
+       list_add(&req->list, entry);
+       spin_unlock_irq(&ctx->completion_lock);
+
+       hrtimer_init(&req->timeout.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       req->timeout.timer.function = io_timeout_fn;
+       hrtimer_start(&req->timeout.timer, timespec64_to_ktime(ts),
+                       HRTIMER_MODE_REL);
+       return 0;
+}
+
 static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
                        const struct io_uring_sqe *sqe)
 {
@@ -1842,6 +2077,9 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
        case IORING_OP_RECVMSG:
                ret = io_recvmsg(req, s->sqe, force_nonblock);
                break;
+       case IORING_OP_TIMEOUT:
+               ret = io_timeout(req, s->sqe);
+               break;
        default:
                ret = -EINVAL;
                break;
@@ -2090,21 +2328,25 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
 }
 
 static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
-                       struct sqe_submit *s, bool force_nonblock)
+                       struct sqe_submit *s)
 {
        int ret;
 
-       ret = __io_submit_sqe(ctx, req, s, force_nonblock);
-       if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
+       ret = __io_submit_sqe(ctx, req, s, true);
+
+       /*
+        * We async punt it if the file wasn't marked NOWAIT, or if the file
+        * doesn't support non-blocking read/write attempts
+        */
+       if (ret == -EAGAIN && (!(req->flags & REQ_F_NOWAIT) ||
+           (req->flags & REQ_F_MUST_PUNT))) {
                struct io_uring_sqe *sqe_copy;
 
-               sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL);
+               sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
                if (sqe_copy) {
                        struct async_list *list;
 
-                       memcpy(sqe_copy, s->sqe, sizeof(*sqe_copy));
                        s->sqe = sqe_copy;
-
                        memcpy(&req->submit, s, sizeof(*s));
                        list = io_async_list_from_sqe(ctx, s->sqe);
                        if (!io_add_to_prev_work(list, req)) {
@@ -2137,7 +2379,7 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 }
 
 static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
-                       struct sqe_submit *s, bool force_nonblock)
+                       struct sqe_submit *s)
 {
        int ret;
 
@@ -2150,18 +2392,17 @@ static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
                return 0;
        }
 
-       return __io_queue_sqe(ctx, req, s, force_nonblock);
+       return __io_queue_sqe(ctx, req, s);
 }
 
 static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req,
-                             struct sqe_submit *s, struct io_kiocb *shadow,
-                             bool force_nonblock)
+                             struct sqe_submit *s, struct io_kiocb *shadow)
 {
        int ret;
        int need_submit = false;
 
        if (!shadow)
-               return io_queue_sqe(ctx, req, s, force_nonblock);
+               return io_queue_sqe(ctx, req, s);
 
        /*
         * Mark the first IO in link list as DRAIN, let all the following
@@ -2173,6 +2414,7 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req,
        if (ret) {
                if (ret != -EIOCBQUEUED) {
                        io_free_req(req);
+                       __io_free_req(shadow);
                        io_cqring_add_event(ctx, s->sqe->user_data, ret);
                        return 0;
                }
@@ -2190,7 +2432,7 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req,
        spin_unlock_irq(&ctx->completion_lock);
 
        if (need_submit)
-               return __io_queue_sqe(ctx, req, s, force_nonblock);
+               return __io_queue_sqe(ctx, req, s);
 
        return 0;
 }
@@ -2198,8 +2440,7 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req,
 #define SQE_VALID_FLAGS        (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK)
 
 static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
-                         struct io_submit_state *state, struct io_kiocb **link,
-                         bool force_nonblock)
+                         struct io_submit_state *state, struct io_kiocb **link)
 {
        struct io_uring_sqe *sqe_copy;
        struct io_kiocb *req;
@@ -2226,6 +2467,8 @@ err:
                return;
        }
 
+       req->user_data = s->sqe->user_data;
+
        /*
         * If we already have a head request, queue this one for async
         * submittal once the head completes. If we don't have a head but
@@ -2252,7 +2495,7 @@ err:
                INIT_LIST_HEAD(&req->link_list);
                *link = req;
        } else {
-               io_queue_sqe(ctx, req, s, force_nonblock);
+               io_queue_sqe(ctx, req, s);
        }
 }
 
@@ -2332,12 +2575,13 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
 
        /* drop invalid entries */
        ctx->cached_sq_head++;
-       rings->sq_dropped++;
+       ctx->cached_sq_dropped++;
+       WRITE_ONCE(rings->sq_dropped, ctx->cached_sq_dropped);
        return false;
 }
 
-static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
-                         unsigned int nr, bool has_user, bool mm_fault)
+static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
+                         bool has_user, bool mm_fault)
 {
        struct io_submit_state state, *statep = NULL;
        struct io_kiocb *link = NULL;
@@ -2351,40 +2595,48 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
        }
 
        for (i = 0; i < nr; i++) {
+               struct sqe_submit s;
+
+               if (!io_get_sqring(ctx, &s))
+                       break;
+
                /*
                 * If previous wasn't linked and we have a linked command,
                 * that's the end of the chain. Submit the previous link.
                 */
                if (!prev_was_link && link) {
-                       io_queue_link_head(ctx, link, &link->submit, shadow_req,
-                                               true);
+                       io_queue_link_head(ctx, link, &link->submit, shadow_req);
                        link = NULL;
+                       shadow_req = NULL;
                }
-               prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0;
+               prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;
 
-               if (link && (sqes[i].sqe->flags & IOSQE_IO_DRAIN)) {
+               if (link && (s.sqe->flags & IOSQE_IO_DRAIN)) {
                        if (!shadow_req) {
                                shadow_req = io_get_req(ctx, NULL);
+                               if (unlikely(!shadow_req))
+                                       goto out;
                                shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
                                refcount_dec(&shadow_req->refs);
                        }
-                       shadow_req->sequence = sqes[i].sequence;
+                       shadow_req->sequence = s.sequence;
                }
 
+out:
                if (unlikely(mm_fault)) {
-                       io_cqring_add_event(ctx, sqes[i].sqe->user_data,
+                       io_cqring_add_event(ctx, s.sqe->user_data,
                                                -EFAULT);
                } else {
-                       sqes[i].has_user = has_user;
-                       sqes[i].needs_lock = true;
-                       sqes[i].needs_fixed_file = true;
-                       io_submit_sqe(ctx, &sqes[i], statep, &link, true);
+                       s.has_user = has_user;
+                       s.needs_lock = true;
+                       s.needs_fixed_file = true;
+                       io_submit_sqe(ctx, &s, statep, &link);
                        submitted++;
                }
        }
 
        if (link)
-               io_queue_link_head(ctx, link, &link->submit, shadow_req, true);
+               io_queue_link_head(ctx, link, &link->submit, shadow_req);
        if (statep)
                io_submit_state_end(&state);
 
@@ -2393,7 +2645,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
 
 static int io_sq_thread(void *data)
 {
-       struct sqe_submit sqes[IO_IOPOLL_BATCH];
        struct io_ring_ctx *ctx = data;
        struct mm_struct *cur_mm = NULL;
        mm_segment_t old_fs;
@@ -2408,14 +2659,27 @@ static int io_sq_thread(void *data)
 
        timeout = inflight = 0;
        while (!kthread_should_park()) {
-               bool all_fixed, mm_fault = false;
-               int i;
+               bool mm_fault = false;
+               unsigned int to_submit;
 
                if (inflight) {
                        unsigned nr_events = 0;
 
                        if (ctx->flags & IORING_SETUP_IOPOLL) {
-                               io_iopoll_check(ctx, &nr_events, 0);
+                               /*
+                                * inflight is the count of the maximum possible
+                                * entries we submitted, but it can be smaller
+                                * if we dropped some of them. If we don't have
+                                * poll entries available, then we know that we
+                                * have nothing left to poll for. Reset the
+                                * inflight count to zero in that case.
+                                */
+                               mutex_lock(&ctx->uring_lock);
+                               if (!list_empty(&ctx->poll_list))
+                                       __io_iopoll_check(ctx, &nr_events, 0);
+                               else
+                                       inflight = 0;
+                               mutex_unlock(&ctx->uring_lock);
                        } else {
                                /*
                                 * Normal IO, just pretend everything completed.
@@ -2429,14 +2693,15 @@ static int io_sq_thread(void *data)
                                timeout = jiffies + ctx->sq_thread_idle;
                }
 
-               if (!io_get_sqring(ctx, &sqes[0])) {
+               to_submit = io_sqring_entries(ctx);
+               if (!to_submit) {
                        /*
                         * We're polling. If we're within the defined idle
                         * period, then let us spin without work before going
                         * to sleep.
                         */
                        if (inflight || !time_after(jiffies, timeout)) {
-                               cpu_relax();
+                               cond_resched();
                                continue;
                        }
 
@@ -2460,7 +2725,8 @@ static int io_sq_thread(void *data)
                        /* make sure to read SQ tail after writing flags */
                        smp_mb();
 
-                       if (!io_get_sqring(ctx, &sqes[0])) {
+                       to_submit = io_sqring_entries(ctx);
+                       if (!to_submit) {
                                if (kthread_should_park()) {
                                        finish_wait(&ctx->sqo_wait, &wait);
                                        break;
@@ -2478,19 +2744,8 @@ static int io_sq_thread(void *data)
                        ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
                }
 
-               i = 0;
-               all_fixed = true;
-               do {
-                       if (all_fixed && io_sqe_needs_user(sqes[i].sqe))
-                               all_fixed = false;
-
-                       i++;
-                       if (i == ARRAY_SIZE(sqes))
-                               break;
-               } while (io_get_sqring(ctx, &sqes[i]));
-
                /* Unless all new commands are FIXED regions, grab mm */
-               if (!all_fixed && !cur_mm) {
+               if (!cur_mm) {
                        mm_fault = !mmget_not_zero(ctx->sqo_mm);
                        if (!mm_fault) {
                                use_mm(ctx->sqo_mm);
@@ -2498,8 +2753,9 @@ static int io_sq_thread(void *data)
                        }
                }
 
-               inflight += io_submit_sqes(ctx, sqes, i, cur_mm != NULL,
-                                               mm_fault);
+               to_submit = min(to_submit, ctx->sq_entries);
+               inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL,
+                                          mm_fault);
 
                /* Commit SQ ring head once we've consumed all SQEs */
                io_commit_sqring(ctx);
@@ -2516,8 +2772,7 @@ static int io_sq_thread(void *data)
        return 0;
 }
 
-static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
-                         bool block_for_last)
+static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
 {
        struct io_submit_state state, *statep = NULL;
        struct io_kiocb *link = NULL;
@@ -2531,7 +2786,6 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
        }
 
        for (i = 0; i < to_submit; i++) {
-               bool force_nonblock = true;
                struct sqe_submit s;
 
                if (!io_get_sqring(ctx, &s))
@@ -2542,49 +2796,73 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
                 * that's the end of the chain. Submit the previous link.
                 */
                if (!prev_was_link && link) {
-                       io_queue_link_head(ctx, link, &link->submit, shadow_req,
-                                               force_nonblock);
+                       io_queue_link_head(ctx, link, &link->submit, shadow_req);
                        link = NULL;
+                       shadow_req = NULL;
                }
                prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;
 
                if (link && (s.sqe->flags & IOSQE_IO_DRAIN)) {
                        if (!shadow_req) {
                                shadow_req = io_get_req(ctx, NULL);
+                               if (unlikely(!shadow_req))
+                                       goto out;
                                shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
                                refcount_dec(&shadow_req->refs);
                        }
                        shadow_req->sequence = s.sequence;
                }
 
+out:
                s.has_user = true;
                s.needs_lock = false;
                s.needs_fixed_file = false;
                submit++;
-
-               /*
-                * The caller will block for events after submit, submit the
-                * last IO non-blocking. This is either the only IO it's
-                * submitting, or it already submitted the previous ones. This
-                * improves performance by avoiding an async punt that we don't
-                * need to do.
-                */
-               if (block_for_last && submit == to_submit)
-                       force_nonblock = false;
-
-               io_submit_sqe(ctx, &s, statep, &link, force_nonblock);
+               io_submit_sqe(ctx, &s, statep, &link);
        }
-       io_commit_sqring(ctx);
 
        if (link)
-               io_queue_link_head(ctx, link, &link->submit, shadow_req,
-                                       block_for_last);
+               io_queue_link_head(ctx, link, &link->submit, shadow_req);
        if (statep)
                io_submit_state_end(statep);
 
+       io_commit_sqring(ctx);
+
        return submit;
 }
 
+struct io_wait_queue {
+       struct wait_queue_entry wq;
+       struct io_ring_ctx *ctx;
+       unsigned to_wait;
+       unsigned nr_timeouts;
+};
+
+static inline bool io_should_wake(struct io_wait_queue *iowq)
+{
+       struct io_ring_ctx *ctx = iowq->ctx;
+
+       /*
+        * Wake up if we have enough events, or if a timeout occured since we
+        * started waiting. For timeouts, we always want to return to userspace,
+        * regardless of event count.
+        */
+       return io_cqring_events(ctx->rings) >= iowq->to_wait ||
+                       atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
+}
+
+static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
+                           int wake_flags, void *key)
+{
+       struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
+                                                       wq);
+
+       if (!io_should_wake(iowq))
+               return -1;
+
+       return autoremove_wake_function(curr, mode, wake_flags, key);
+}
+
 /*
  * Wait until events become available, if we don't already have some. The
  * application must reap them itself, as they reside on the shared cq ring.
@@ -2592,6 +2870,15 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
 static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                          const sigset_t __user *sig, size_t sigsz)
 {
+       struct io_wait_queue iowq = {
+               .wq = {
+                       .private        = current,
+                       .func           = io_wake_function,
+                       .entry          = LIST_HEAD_INIT(iowq.wq.entry),
+               },
+               .ctx            = ctx,
+               .to_wait        = min_events,
+       };
        struct io_rings *rings = ctx->rings;
        int ret;
 
@@ -2611,7 +2898,21 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                        return ret;
        }
 
-       ret = wait_event_interruptible(ctx->wait, io_cqring_events(rings) >= min_events);
+       ret = 0;
+       iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
+       do {
+               prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
+                                               TASK_INTERRUPTIBLE);
+               if (io_should_wake(&iowq))
+                       break;
+               schedule();
+               if (signal_pending(current)) {
+                       ret = -ERESTARTSYS;
+                       break;
+               }
+       } while (1);
+       finish_wait(&ctx->wait, &iowq.wq);
+
        restore_saved_sigmask_unless(ret == -ERESTARTSYS);
        if (ret == -ERESTARTSYS)
                ret = -EINTR;
@@ -2682,8 +2983,12 @@ static void io_finish_async(struct io_ring_ctx *ctx)
 static void io_destruct_skb(struct sk_buff *skb)
 {
        struct io_ring_ctx *ctx = skb->sk->sk_user_data;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(ctx->sqo_wq); i++)
+               if (ctx->sqo_wq[i])
+                       flush_workqueue(ctx->sqo_wq[i]);
 
-       io_finish_async(ctx);
        unix_destruct_scm(skb);
 }
 
@@ -3263,7 +3568,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
        if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head !=
            ctx->rings->sq_ring_entries)
                mask |= EPOLLOUT | EPOLLWRNORM;
-       if (READ_ONCE(ctx->rings->sq.head) != ctx->cached_cq_tail)
+       if (READ_ONCE(ctx->rings->cq.head) != ctx->cached_cq_tail)
                mask |= EPOLLIN | EPOLLRDNORM;
 
        return mask;
@@ -3282,6 +3587,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
        percpu_ref_kill(&ctx->refs);
        mutex_unlock(&ctx->uring_lock);
 
+       io_kill_timeouts(ctx);
        io_poll_remove_all(ctx);
        io_iopoll_reap_events(ctx);
        wait_for_completion(&ctx->ctx_done);
@@ -3319,7 +3625,7 @@ static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
        }
 
        page = virt_to_head_page(ptr);
-       if (sz > (PAGE_SIZE << compound_order(page)))
+       if (sz > page_size(page))
                return -EINVAL;
 
        pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
@@ -3362,21 +3668,10 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                        wake_up(&ctx->sqo_wait);
                submitted = to_submit;
        } else if (to_submit) {
-               bool block_for_last = false;
-
                to_submit = min(to_submit, ctx->sq_entries);
 
-               /*
-                * Allow last submission to block in a series, IFF the caller
-                * asked to wait for events and we don't currently have
-                * enough. This potentially avoids an async punt.
-                */
-               if (to_submit == min_complete &&
-                   io_cqring_events(ctx->rings) < min_complete)
-                       block_for_last = true;
-
                mutex_lock(&ctx->uring_lock);
-               submitted = io_ring_submit(ctx, to_submit, block_for_last);
+               submitted = io_ring_submit(ctx, to_submit);
                mutex_unlock(&ctx->uring_lock);
        }
        if (flags & IORING_ENTER_GETEVENTS) {
@@ -3391,7 +3686,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                }
        }
 
-       io_ring_drop_ctx_refs(ctx, 1);
+       percpu_ref_put(&ctx->refs);
 out_fput:
        fdput(f);
        return submitted ? submitted : ret;
@@ -3535,10 +3830,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
        if (ret)
                goto err;
 
-       ret = io_uring_get_fd(ctx);
-       if (ret < 0)
-               goto err;
-
        memset(&p->sq_off, 0, sizeof(p->sq_off));
        p->sq_off.head = offsetof(struct io_rings, sq.head);
        p->sq_off.tail = offsetof(struct io_rings, sq.tail);
@@ -3556,6 +3847,14 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
        p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
        p->cq_off.cqes = offsetof(struct io_rings, cqes);
 
+       /*
+        * Install ring fd as the very last thing, so we don't risk someone
+        * having closed it before we finish setup
+        */
+       ret = io_uring_get_fd(ctx);
+       if (ret < 0)
+               goto err;
+
        p->features = IORING_FEAT_SINGLE_MMAP;
        return ret;
 err:
index 10517ce..1fc28c2 100644 (file)
@@ -24,7 +24,7 @@
 
 struct iomap_dio {
        struct kiocb            *iocb;
-       iomap_dio_end_io_t      *end_io;
+       const struct iomap_dio_ops *dops;
        loff_t                  i_size;
        loff_t                  size;
        atomic_t                ref;
@@ -72,18 +72,14 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
 
 static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 {
+       const struct iomap_dio_ops *dops = dio->dops;
        struct kiocb *iocb = dio->iocb;
        struct inode *inode = file_inode(iocb->ki_filp);
        loff_t offset = iocb->ki_pos;
-       ssize_t ret;
+       ssize_t ret = dio->error;
 
-       if (dio->end_io) {
-               ret = dio->end_io(iocb,
-                               dio->error ? dio->error : dio->size,
-                               dio->flags);
-       } else {
-               ret = dio->error;
-       }
+       if (dops && dops->end_io)
+               ret = dops->end_io(iocb, dio->size, ret, dio->flags);
 
        if (likely(!ret)) {
                ret = dio->size;
@@ -101,9 +97,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
         * one is a pretty crazy thing to do, so we don't support it 100%.  If
         * this invalidation fails, tough, the write still worked...
         *
-        * And this page cache invalidation has to be after dio->end_io(), as
-        * some filesystems convert unwritten extents to real allocations in
-        * end_io() when necessary, otherwise a racing buffer read would cache
+        * And this page cache invalidation has to be after ->end_io(), as some
+        * filesystems convert unwritten extents to real allocations in
+        * ->end_io() when necessary, otherwise a racing buffer read would cache
         * zeros from unwritten extents.
         */
        if (!dio->error &&
@@ -396,7 +392,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
  */
 ssize_t
 iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
-               const struct iomap_ops *ops, iomap_dio_end_io_t end_io)
+               const struct iomap_ops *ops, const struct iomap_dio_ops *dops)
 {
        struct address_space *mapping = iocb->ki_filp->f_mapping;
        struct inode *inode = file_inode(iocb->ki_filp);
@@ -421,7 +417,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        atomic_set(&dio->ref, 1);
        dio->size = 0;
        dio->i_size = i_size_read(inode);
-       dio->end_io = end_io;
+       dio->dops = dops;
        dio->error = 0;
        dio->flags = 0;
 
index 953990e..1c58859 100644 (file)
@@ -89,8 +89,6 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
 EXPORT_SYMBOL(jbd2_journal_invalidatepage);
 EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
 EXPORT_SYMBOL(jbd2_journal_force_commit);
-EXPORT_SYMBOL(jbd2_journal_inode_add_write);
-EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
 EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
 EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
 EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
index afc06da..bee8498 100644 (file)
@@ -2622,18 +2622,6 @@ done:
        return 0;
 }
 
-int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
-{
-       return jbd2_journal_file_inode(handle, jinode,
-                       JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX);
-}
-
-int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
-{
-       return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
-                       LLONG_MAX);
-}
-
 int jbd2_journal_inode_ranged_write(handle_t *handle,
                struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
 {
index cbe7063..0e6406c 100644 (file)
@@ -163,13 +163,11 @@ static const struct export_operations jffs2_export_ops = {
  * Opt_rp_size: size of reserved pool in KiB
  */
 enum {
-       Opt_source,
        Opt_override_compr,
        Opt_rp_size,
 };
 
 static const struct fs_parameter_spec jffs2_param_specs[] = {
-       fsparam_string  ("source",      Opt_source),
        fsparam_enum    ("compr",       Opt_override_compr),
        fsparam_u32     ("rp_size",     Opt_rp_size),
        {}
index c9b2850..1463b03 100644 (file)
@@ -89,58 +89,45 @@ int dcache_dir_close(struct inode *inode, struct file *file)
 EXPORT_SYMBOL(dcache_dir_close);
 
 /* parent is locked at least shared */
-static struct dentry *next_positive(struct dentry *parent,
-                                   struct list_head *from,
-                                   int count)
+/*
+ * Returns an element of siblings' list.
+ * We are looking for <count>th positive after <p>; if
+ * found, dentry is grabbed and returned to caller.
+ * If no such element exists, NULL is returned.
+ */
+static struct dentry *scan_positives(struct dentry *cursor,
+                                       struct list_head *p,
+                                       loff_t count,
+                                       struct dentry *last)
 {
-       unsigned *seq = &parent->d_inode->i_dir_seq, n;
-       struct dentry *res;
-       struct list_head *p;
-       bool skipped;
-       int i;
+       struct dentry *dentry = cursor->d_parent, *found = NULL;
 
-retry:
-       i = count;
-       skipped = false;
-       n = smp_load_acquire(seq) & ~1;
-       res = NULL;
-       rcu_read_lock();
-       for (p = from->next; p != &parent->d_subdirs; p = p->next) {
+       spin_lock(&dentry->d_lock);
+       while ((p = p->next) != &dentry->d_subdirs) {
                struct dentry *d = list_entry(p, struct dentry, d_child);
-               if (!simple_positive(d)) {
-                       skipped = true;
-               } else if (!--i) {
-                       res = d;
-                       break;
+               // we must at least skip cursors, to avoid livelocks
+               if (d->d_flags & DCACHE_DENTRY_CURSOR)
+                       continue;
+               if (simple_positive(d) && !--count) {
+                       spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
+                       if (simple_positive(d))
+                               found = dget_dlock(d);
+                       spin_unlock(&d->d_lock);
+                       if (likely(found))
+                               break;
+                       count = 1;
+               }
+               if (need_resched()) {
+                       list_move(&cursor->d_child, p);
+                       p = &cursor->d_child;
+                       spin_unlock(&dentry->d_lock);
+                       cond_resched();
+                       spin_lock(&dentry->d_lock);
                }
        }
-       rcu_read_unlock();
-       if (skipped) {
-               smp_rmb();
-               if (unlikely(*seq != n))
-                       goto retry;
-       }
-       return res;
-}
-
-static void move_cursor(struct dentry *cursor, struct list_head *after)
-{
-       struct dentry *parent = cursor->d_parent;
-       unsigned n, *seq = &parent->d_inode->i_dir_seq;
-       spin_lock(&parent->d_lock);
-       for (;;) {
-               n = *seq;
-               if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
-                       break;
-               cpu_relax();
-       }
-       __list_del(cursor->d_child.prev, cursor->d_child.next);
-       if (after)
-               list_add(&cursor->d_child, after);
-       else
-               list_add_tail(&cursor->d_child, &parent->d_subdirs);
-       smp_store_release(seq, n + 2);
-       spin_unlock(&parent->d_lock);
+       spin_unlock(&dentry->d_lock);
+       dput(last);
+       return found;
 }
 
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
@@ -158,17 +145,25 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
+               struct dentry *cursor = file->private_data;
+               struct dentry *to = NULL;
+
+               inode_lock_shared(dentry->d_inode);
+
+               if (offset > 2)
+                       to = scan_positives(cursor, &dentry->d_subdirs,
+                                           offset - 2, NULL);
+               spin_lock(&dentry->d_lock);
+               if (to)
+                       list_move(&cursor->d_child, &to->d_child);
+               else
+                       list_del_init(&cursor->d_child);
+               spin_unlock(&dentry->d_lock);
+               dput(to);
+
                file->f_pos = offset;
-               if (file->f_pos >= 2) {
-                       struct dentry *cursor = file->private_data;
-                       struct dentry *to;
-                       loff_t n = file->f_pos - 2;
-
-                       inode_lock_shared(dentry->d_inode);
-                       to = next_positive(dentry, &dentry->d_subdirs, n);
-                       move_cursor(cursor, to ? &to->d_child : NULL);
-                       inode_unlock_shared(dentry->d_inode);
-               }
+
+               inode_unlock_shared(dentry->d_inode);
        }
        return offset;
 }
@@ -190,25 +185,35 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
 {
        struct dentry *dentry = file->f_path.dentry;
        struct dentry *cursor = file->private_data;
-       struct list_head *p = &cursor->d_child;
-       struct dentry *next;
-       bool moved = false;
+       struct list_head *anchor = &dentry->d_subdirs;
+       struct dentry *next = NULL;
+       struct list_head *p;
 
        if (!dir_emit_dots(file, ctx))
                return 0;
 
        if (ctx->pos == 2)
-               p = &dentry->d_subdirs;
-       while ((next = next_positive(dentry, p, 1)) != NULL) {
+               p = anchor;
+       else if (!list_empty(&cursor->d_child))
+               p = &cursor->d_child;
+       else
+               return 0;
+
+       while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
                if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
                              d_inode(next)->i_ino, dt_type(d_inode(next))))
                        break;
-               moved = true;
-               p = &next->d_child;
                ctx->pos++;
+               p = &next->d_child;
        }
-       if (moved)
-               move_cursor(cursor, p);
+       spin_lock(&dentry->d_lock);
+       if (next)
+               list_move_tail(&cursor->d_child, &next->d_child);
+       else
+               list_del_init(&cursor->d_child);
+       spin_unlock(&dentry->d_lock);
+       dput(next);
+
        return 0;
 }
 EXPORT_SYMBOL(dcache_readdir);
@@ -468,8 +473,7 @@ EXPORT_SYMBOL(simple_write_begin);
 
 /**
  * simple_write_end - .write_end helper for non-block-device FSes
- * @available: See .write_end of address_space_operations
- * @file:              "
+ * @file: See .write_end of address_space_operations
  * @mapping:           "
  * @pos:               "
  * @len:               "
index a364ebc..6970f55 100644 (file)
@@ -212,6 +212,7 @@ struct file_lock_list_struct {
 static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
 DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
 
+
 /*
  * The blocked_hash is used to find POSIX lock loops for deadlock detection.
  * It is protected by blocked_lock_lock.
@@ -1991,6 +1992,64 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
 }
 EXPORT_SYMBOL(generic_setlease);
 
+#if IS_ENABLED(CONFIG_SRCU)
+/*
+ * Kernel subsystems can register to be notified on any attempt to set
+ * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
+ * to close files that it may have cached when there is an attempt to set a
+ * conflicting lease.
+ */
+static struct srcu_notifier_head lease_notifier_chain;
+
+static inline void
+lease_notifier_chain_init(void)
+{
+       srcu_init_notifier_head(&lease_notifier_chain);
+}
+
+static inline void
+setlease_notifier(long arg, struct file_lock *lease)
+{
+       if (arg != F_UNLCK)
+               srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
+}
+
+int lease_register_notifier(struct notifier_block *nb)
+{
+       return srcu_notifier_chain_register(&lease_notifier_chain, nb);
+}
+EXPORT_SYMBOL_GPL(lease_register_notifier);
+
+void lease_unregister_notifier(struct notifier_block *nb)
+{
+       srcu_notifier_chain_unregister(&lease_notifier_chain, nb);
+}
+EXPORT_SYMBOL_GPL(lease_unregister_notifier);
+
+#else /* !IS_ENABLED(CONFIG_SRCU) */
+static inline void
+lease_notifier_chain_init(void)
+{
+}
+
+static inline void
+setlease_notifier(long arg, struct file_lock *lease)
+{
+}
+
+int lease_register_notifier(struct notifier_block *nb)
+{
+       return 0;
+}
+EXPORT_SYMBOL_GPL(lease_register_notifier);
+
+void lease_unregister_notifier(struct notifier_block *nb)
+{
+}
+EXPORT_SYMBOL_GPL(lease_unregister_notifier);
+
+#endif /* IS_ENABLED(CONFIG_SRCU) */
+
 /**
  * vfs_setlease        -       sets a lease on an open file
  * @filp:      file pointer
@@ -2011,6 +2070,8 @@ EXPORT_SYMBOL(generic_setlease);
 int
 vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 {
+       if (lease)
+               setlease_notifier(arg, *lease);
        if (filp->f_op->setlease)
                return filp->f_op->setlease(filp, arg, lease, priv);
        else
@@ -2924,6 +2985,7 @@ static int __init filelock_init(void)
                INIT_HLIST_HEAD(&fll->hlist);
        }
 
+       lease_notifier_chain_init();
        return 0;
 }
 core_initcall(filelock_init);
index 93c0432..fe0e9e1 100644 (file)
@@ -2802,8 +2802,6 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
                                put_filesystem(type);
                                return -EINVAL;
                        }
-               } else {
-                       subtype = "";
                }
        }
 
@@ -3028,7 +3026,7 @@ void *copy_mount_options(const void __user * data)
         * the remainder of the page.
         */
        /* copy_from_user cannot cross TASK_SIZE ! */
-       size = TASK_SIZE - (unsigned long)data;
+       size = TASK_SIZE - (unsigned long)untagged_addr(data);
        if (size > PAGE_SIZE)
                size = PAGE_SIZE;
 
index 071b90a..af549d7 100644 (file)
@@ -53,6 +53,16 @@ nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
        return false;
 }
 
+struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode)
+{
+       struct nfs_delegation *delegation;
+
+       delegation = rcu_dereference(NFS_I(inode)->delegation);
+       if (nfs4_is_valid_delegation(delegation, 0))
+               return delegation;
+       return NULL;
+}
+
 static int
 nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
 {
@@ -1181,7 +1191,7 @@ bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
        if (delegation != NULL &&
            nfs4_stateid_match_other(dst, &delegation->stateid)) {
                dst->seqid = delegation->stateid.seqid;
-               return ret;
+               ret = true;
        }
        rcu_read_unlock();
 out:
index 9eb87ae..8b14d44 100644 (file)
@@ -68,6 +68,7 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state,
 bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred);
 bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
 
+struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode);
 void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
 int nfs4_have_delegation(struct inode *inode, fmode_t flags);
 int nfs4_check_delegation(struct inode *inode, fmode_t flags);
index 0adfd88..e180033 100644 (file)
@@ -1669,10 +1669,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 
 #endif /* CONFIG_NFSV4 */
 
-/*
- * Code common to create, mkdir, and mknod.
- */
-int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
+struct dentry *
+nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
                                struct nfs_fattr *fattr,
                                struct nfs4_label *label)
 {
@@ -1680,13 +1678,10 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
        struct inode *dir = d_inode(parent);
        struct inode *inode;
        struct dentry *d;
-       int error = -EACCES;
+       int error;
 
        d_drop(dentry);
 
-       /* We may have been initialized further down */
-       if (d_really_is_positive(dentry))
-               goto out;
        if (fhandle->size == 0) {
                error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL);
                if (error)
@@ -1702,18 +1697,32 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
        }
        inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
        d = d_splice_alias(inode, dentry);
-       if (IS_ERR(d)) {
-               error = PTR_ERR(d);
-               goto out_error;
-       }
-       dput(d);
 out:
        dput(parent);
-       return 0;
+       return d;
 out_error:
        nfs_mark_for_revalidate(dir);
-       dput(parent);
-       return error;
+       d = ERR_PTR(error);
+       goto out;
+}
+EXPORT_SYMBOL_GPL(nfs_add_or_obtain);
+
+/*
+ * Code common to create, mkdir, and mknod.
+ */
+int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
+                               struct nfs_fattr *fattr,
+                               struct nfs4_label *label)
+{
+       struct dentry *d;
+
+       d = nfs_add_or_obtain(dentry, fhandle, fattr, label);
+       if (IS_ERR(d))
+               return PTR_ERR(d);
+
+       /* Callers don't care */
+       dput(d);
+       return 0;
 }
 EXPORT_SYMBOL_GPL(nfs_instantiate);
 
index 222d711..040a50f 100644 (file)
 
 static struct kmem_cache *nfs_direct_cachep;
 
-/*
- * This represents a set of asynchronous requests that we're waiting on
- */
-struct nfs_direct_mirror {
-       ssize_t count;
-};
-
 struct nfs_direct_req {
        struct kref             kref;           /* release manager */
 
@@ -84,9 +77,6 @@ struct nfs_direct_req {
        atomic_t                io_count;       /* i/os we're waiting for */
        spinlock_t              lock;           /* protect completion state */
 
-       struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX];
-       int                     mirror_count;
-
        loff_t                  io_start;       /* Start offset for I/O */
        ssize_t                 count,          /* bytes actually processed */
                                max_count,      /* max expected count */
@@ -123,32 +113,42 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
 }
 
 static void
-nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
+nfs_direct_handle_truncated(struct nfs_direct_req *dreq,
+                           const struct nfs_pgio_header *hdr,
+                           ssize_t dreq_len)
 {
-       int i;
-       ssize_t count;
+       if (!(test_bit(NFS_IOHDR_ERROR, &hdr->flags) ||
+             test_bit(NFS_IOHDR_EOF, &hdr->flags)))
+               return;
+       if (dreq->max_count >= dreq_len) {
+               dreq->max_count = dreq_len;
+               if (dreq->count > dreq_len)
+                       dreq->count = dreq_len;
+
+               if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
+                       dreq->error = hdr->error;
+               else /* Clear outstanding error if this is EOF */
+                       dreq->error = 0;
+       }
+}
 
-       WARN_ON_ONCE(dreq->count >= dreq->max_count);
+static void
+nfs_direct_count_bytes(struct nfs_direct_req *dreq,
+                      const struct nfs_pgio_header *hdr)
+{
+       loff_t hdr_end = hdr->io_start + hdr->good_bytes;
+       ssize_t dreq_len = 0;
 
-       if (dreq->mirror_count == 1) {
-               dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes;
-               dreq->count += hdr->good_bytes;
-       } else {
-               /* mirrored writes */
-               count = dreq->mirrors[hdr->pgio_mirror_idx].count;
-               if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) {
-                       count = hdr->io_start + hdr->good_bytes - dreq->io_start;
-                       dreq->mirrors[hdr->pgio_mirror_idx].count = count;
-               }
-               /* update the dreq->count by finding the minimum agreed count from all
-                * mirrors */
-               count = dreq->mirrors[0].count;
+       if (hdr_end > dreq->io_start)
+               dreq_len = hdr_end - dreq->io_start;
 
-               for (i = 1; i < dreq->mirror_count; i++)
-                       count = min(count, dreq->mirrors[i].count);
+       nfs_direct_handle_truncated(dreq, hdr, dreq_len);
 
-               dreq->count = count;
-       }
+       if (dreq_len > dreq->max_count)
+               dreq_len = dreq->max_count;
+
+       if (dreq->count < dreq_len)
+               dreq->count = dreq_len;
 }
 
 /*
@@ -293,18 +293,6 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
        cinfo->completion_ops = &nfs_direct_commit_completion_ops;
 }
 
-static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq,
-                                            struct nfs_pageio_descriptor *pgio,
-                                            struct nfs_page *req)
-{
-       int mirror_count = 1;
-
-       if (pgio->pg_ops->pg_get_mirror_count)
-               mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);
-
-       dreq->mirror_count = mirror_count;
-}
-
 static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 {
        struct nfs_direct_req *dreq;
@@ -319,7 +307,6 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
        INIT_LIST_HEAD(&dreq->mds_cinfo.list);
        dreq->verf.committed = NFS_INVALID_STABLE_HOW;  /* not set yet */
        INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
-       dreq->mirror_count = 1;
        spin_lock_init(&dreq->lock);
 
        return dreq;
@@ -402,20 +389,12 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
        struct nfs_direct_req *dreq = hdr->dreq;
 
        spin_lock(&dreq->lock);
-       if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
-               dreq->error = hdr->error;
-
        if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
                spin_unlock(&dreq->lock);
                goto out_put;
        }
 
-       if (hdr->good_bytes != 0)
-               nfs_direct_good_bytes(dreq, hdr);
-
-       if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
-               dreq->error = 0;
-
+       nfs_direct_count_bytes(dreq, hdr);
        spin_unlock(&dreq->lock);
 
        while (!list_empty(&hdr->pages)) {
@@ -646,29 +625,22 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
        LIST_HEAD(reqs);
        struct nfs_commit_info cinfo;
        LIST_HEAD(failed);
-       int i;
 
        nfs_init_cinfo_from_dreq(&cinfo, dreq);
        nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
 
        dreq->count = 0;
+       dreq->max_count = 0;
+       list_for_each_entry(req, &reqs, wb_list)
+               dreq->max_count += req->wb_bytes;
        dreq->verf.committed = NFS_INVALID_STABLE_HOW;
        nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
-       for (i = 0; i < dreq->mirror_count; i++)
-               dreq->mirrors[i].count = 0;
        get_dreq(dreq);
 
        nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
                              &nfs_direct_write_completion_ops);
        desc.pg_dreq = dreq;
 
-       req = nfs_list_entry(reqs.next);
-       nfs_direct_setup_mirroring(dreq, &desc, req);
-       if (desc.pg_error < 0) {
-               list_splice_init(&reqs, &failed);
-               goto out_failed;
-       }
-
        list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
                /* Bump the transmission count */
                req->wb_nio++;
@@ -686,7 +658,6 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
        }
        nfs_pageio_complete(&desc);
 
-out_failed:
        while (!list_empty(&failed)) {
                req = nfs_list_entry(failed.next);
                nfs_list_remove_request(req);
@@ -791,17 +762,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
        nfs_init_cinfo_from_dreq(&cinfo, dreq);
 
        spin_lock(&dreq->lock);
-
-       if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
-               dreq->error = hdr->error;
-
        if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
                spin_unlock(&dreq->lock);
                goto out_put;
        }
 
+       nfs_direct_count_bytes(dreq, hdr);
        if (hdr->good_bytes != 0) {
-               nfs_direct_good_bytes(dreq, hdr);
                if (nfs_write_need_commit(hdr)) {
                        if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
                                request_commit = true;
@@ -923,7 +890,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
                                break;
                        }
 
-                       nfs_direct_setup_mirroring(dreq, &desc, req);
                        if (desc.pg_error < 0) {
                                nfs_free_request(req);
                                result = desc.pg_error;
index 3cb073c..c9b605f 100644 (file)
@@ -1164,6 +1164,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
        .id                     = LAYOUT_NFSV4_1_FILES,
        .name                   = "LAYOUT_NFSV4_1_FILES",
        .owner                  = THIS_MODULE,
+       .flags                  = PNFS_LAYOUTGET_ON_OPEN,
        .max_layoutget_response = 4096, /* 1 page or so... */
        .alloc_layout_hdr       = filelayout_alloc_layout_hdr,
        .free_layout_hdr        = filelayout_free_layout_hdr,
index e64f810..447a3c1 100644 (file)
@@ -16,14 +16,6 @@ extern const struct export_operations nfs_export_ops;
 
 struct nfs_string;
 
-/* Maximum number of readahead requests
- * FIXME: this should really be a sysctl so that users may tune it to suit
- *        their needs. People that do NFS over a slow network, might for
- *        instance want to reduce it to something closer to 1 for improved
- *        interactive response.
- */
-#define NFS_MAX_READAHEAD      (RPC_DEF_SLOT_TABLE - 1)
-
 static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr)
 {
        if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid))
index a3ad2d4..9eb2f1a 100644 (file)
@@ -279,15 +279,17 @@ static struct nfs3_createdata *nfs3_alloc_createdata(void)
        return data;
 }
 
-static int nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata *data)
+static struct dentry *
+nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata *data)
 {
        int status;
 
        status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0);
        nfs_post_op_update_inode(dir, data->res.dir_attr);
-       if (status == 0)
-               status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL);
-       return status;
+       if (status != 0)
+               return ERR_PTR(status);
+
+       return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr, NULL);
 }
 
 static void nfs3_free_createdata(struct nfs3_createdata *data)
@@ -304,6 +306,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 {
        struct posix_acl *default_acl, *acl;
        struct nfs3_createdata *data;
+       struct dentry *d_alias;
        int status = -ENOMEM;
 
        dprintk("NFS call  create %pd\n", dentry);
@@ -330,7 +333,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                goto out;
 
        for (;;) {
-               status = nfs3_do_create(dir, dentry, data);
+               d_alias = nfs3_do_create(dir, dentry, data);
+               status = PTR_ERR_OR_ZERO(d_alias);
 
                if (status != -ENOTSUPP)
                        break;
@@ -355,6 +359,9 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
        if (status != 0)
                goto out_release_acls;
 
+       if (d_alias)
+               dentry = d_alias;
+
        /* When we created the file with exclusive semantics, make
         * sure we set the attributes afterwards. */
        if (data->arg.create.createmode == NFS3_CREATE_EXCLUSIVE) {
@@ -372,11 +379,13 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                nfs_post_op_update_inode(d_inode(dentry), data->res.fattr);
                dprintk("NFS reply setattr (post-create): %d\n", status);
                if (status != 0)
-                       goto out_release_acls;
+                       goto out_dput;
        }
 
        status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
 
+out_dput:
+       dput(d_alias);
 out_release_acls:
        posix_acl_release(acl);
        posix_acl_release(default_acl);
@@ -504,6 +513,7 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
                  unsigned int len, struct iattr *sattr)
 {
        struct nfs3_createdata *data;
+       struct dentry *d_alias;
        int status = -ENOMEM;
 
        if (len > NFS3_MAXPATHLEN)
@@ -522,7 +532,11 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
        data->arg.symlink.pathlen = len;
        data->arg.symlink.sattr = sattr;
 
-       status = nfs3_do_create(dir, dentry, data);
+       d_alias = nfs3_do_create(dir, dentry, data);
+       status = PTR_ERR_OR_ZERO(d_alias);
+
+       if (status == 0)
+               dput(d_alias);
 
        nfs3_free_createdata(data);
 out:
@@ -535,6 +549,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 {
        struct posix_acl *default_acl, *acl;
        struct nfs3_createdata *data;
+       struct dentry *d_alias;
        int status = -ENOMEM;
 
        dprintk("NFS call  mkdir %pd\n", dentry);
@@ -553,12 +568,18 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
        data->arg.mkdir.len = dentry->d_name.len;
        data->arg.mkdir.sattr = sattr;
 
-       status = nfs3_do_create(dir, dentry, data);
+       d_alias = nfs3_do_create(dir, dentry, data);
+       status = PTR_ERR_OR_ZERO(d_alias);
+
        if (status != 0)
                goto out_release_acls;
 
+       if (d_alias)
+               dentry = d_alias;
+
        status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
 
+       dput(d_alias);
 out_release_acls:
        posix_acl_release(acl);
        posix_acl_release(default_acl);
@@ -660,6 +681,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 {
        struct posix_acl *default_acl, *acl;
        struct nfs3_createdata *data;
+       struct dentry *d_alias;
        int status = -ENOMEM;
 
        dprintk("NFS call  mknod %pd %u:%u\n", dentry,
@@ -698,12 +720,17 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                goto out;
        }
 
-       status = nfs3_do_create(dir, dentry, data);
+       d_alias = nfs3_do_create(dir, dentry, data);
+       status = PTR_ERR_OR_ZERO(d_alias);
        if (status != 0)
                goto out_release_acls;
 
+       if (d_alias)
+               dentry = d_alias;
+
        status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
 
+       dput(d_alias);
 out_release_acls:
        posix_acl_release(acl);
        posix_acl_release(default_acl);
index 3564da1..16b2e5c 100644 (file)
@@ -491,8 +491,6 @@ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t,
                const struct nfs_lock_context *, nfs4_stateid *,
                const struct cred **);
-extern bool nfs4_refresh_open_stateid(nfs4_stateid *dst,
-               struct nfs4_state *state);
 extern bool nfs4_copy_open_stateid(nfs4_stateid *dst,
                struct nfs4_state *state);
 
@@ -574,6 +572,15 @@ static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stat
        return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0;
 }
 
+static inline void nfs4_stateid_seqid_inc(nfs4_stateid *s1)
+{
+       u32 seqid = be32_to_cpu(s1->seqid);
+
+       if (++seqid == 0)
+               ++seqid;
+       s1->seqid = cpu_to_be32(seqid);
+}
+
 static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
 {
        return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
index 1406858..caacf5e 100644 (file)
@@ -1073,14 +1073,26 @@ static const struct rpc_call_ops nfs40_call_sync_ops = {
        .rpc_call_done = nfs40_call_sync_done,
 };
 
+static int nfs4_call_sync_custom(struct rpc_task_setup *task_setup)
+{
+       int ret;
+       struct rpc_task *task;
+
+       task = rpc_run_task(task_setup);
+       if (IS_ERR(task))
+               return PTR_ERR(task);
+
+       ret = task->tk_status;
+       rpc_put_task(task);
+       return ret;
+}
+
 static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
                                   struct nfs_server *server,
                                   struct rpc_message *msg,
                                   struct nfs4_sequence_args *args,
                                   struct nfs4_sequence_res *res)
 {
-       int ret;
-       struct rpc_task *task;
        struct nfs_client *clp = server->nfs_client;
        struct nfs4_call_sync_data data = {
                .seq_server = server,
@@ -1094,14 +1106,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
                .callback_data = &data
        };
 
-       task = rpc_run_task(&task_setup);
-       if (IS_ERR(task))
-               ret = PTR_ERR(task);
-       else {
-               ret = task->tk_status;
-               rpc_put_task(task);
-       }
-       return ret;
+       return nfs4_call_sync_custom(&task_setup);
 }
 
 int nfs4_call_sync(struct rpc_clnt *clnt,
@@ -1435,8 +1440,6 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode,
                return 0;
        if ((delegation->type & fmode) != fmode)
                return 0;
-       if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
-               return 0;
        switch (claim) {
        case NFS4_OPEN_CLAIM_NULL:
        case NFS4_OPEN_CLAIM_FH:
@@ -1805,7 +1808,6 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo
 static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
 {
        struct nfs4_state *state = opendata->state;
-       struct nfs_inode *nfsi = NFS_I(state->inode);
        struct nfs_delegation *delegation;
        int open_mode = opendata->o_arg.open_flags;
        fmode_t fmode = opendata->o_arg.fmode;
@@ -1822,7 +1824,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
                }
                spin_unlock(&state->owner->so_lock);
                rcu_read_lock();
-               delegation = rcu_dereference(nfsi->delegation);
+               delegation = nfs4_get_valid_delegation(state->inode);
                if (!can_open_delegated(delegation, fmode, claim)) {
                        rcu_read_unlock();
                        break;
@@ -2366,7 +2368,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
                                        data->o_arg.open_flags, claim))
                        goto out_no_action;
                rcu_read_lock();
-               delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
+               delegation = nfs4_get_valid_delegation(data->state->inode);
                if (can_open_delegated(delegation, data->o_arg.fmode, claim))
                        goto unlock_no_action;
                rcu_read_unlock();
@@ -3308,6 +3310,75 @@ nfs4_wait_on_layoutreturn(struct inode *inode, struct rpc_task *task)
        return pnfs_wait_on_layoutreturn(inode, task);
 }
 
+/*
+ * Update the seqid of an open stateid
+ */
+static void nfs4_sync_open_stateid(nfs4_stateid *dst,
+               struct nfs4_state *state)
+{
+       __be32 seqid_open;
+       u32 dst_seqid;
+       int seq;
+
+       for (;;) {
+               if (!nfs4_valid_open_stateid(state))
+                       break;
+               seq = read_seqbegin(&state->seqlock);
+               if (!nfs4_state_match_open_stateid_other(state, dst)) {
+                       nfs4_stateid_copy(dst, &state->open_stateid);
+                       if (read_seqretry(&state->seqlock, seq))
+                               continue;
+                       break;
+               }
+               seqid_open = state->open_stateid.seqid;
+               if (read_seqretry(&state->seqlock, seq))
+                       continue;
+
+               dst_seqid = be32_to_cpu(dst->seqid);
+               if ((s32)(dst_seqid - be32_to_cpu(seqid_open)) < 0)
+                       dst->seqid = seqid_open;
+               break;
+       }
+}
+
+/*
+ * Update the seqid of an open stateid after receiving
+ * NFS4ERR_OLD_STATEID
+ */
+static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst,
+               struct nfs4_state *state)
+{
+       __be32 seqid_open;
+       u32 dst_seqid;
+       bool ret;
+       int seq;
+
+       for (;;) {
+               ret = false;
+               if (!nfs4_valid_open_stateid(state))
+                       break;
+               seq = read_seqbegin(&state->seqlock);
+               if (!nfs4_state_match_open_stateid_other(state, dst)) {
+                       if (read_seqretry(&state->seqlock, seq))
+                               continue;
+                       break;
+               }
+               seqid_open = state->open_stateid.seqid;
+               if (read_seqretry(&state->seqlock, seq))
+                       continue;
+
+               dst_seqid = be32_to_cpu(dst->seqid);
+               if ((s32)(dst_seqid - be32_to_cpu(seqid_open)) >= 0)
+                       dst->seqid = cpu_to_be32(dst_seqid + 1);
+               else
+                       dst->seqid = seqid_open;
+               ret = true;
+               break;
+       }
+
+       return ret;
+}
+
 struct nfs4_closedata {
        struct inode *inode;
        struct nfs4_state *state;
@@ -3358,32 +3429,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
        trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status);
 
        /* Handle Layoutreturn errors */
-       if (calldata->arg.lr_args && task->tk_status != 0) {
-               switch (calldata->res.lr_ret) {
-               default:
-                       calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
-                       break;
-               case 0:
-                       calldata->arg.lr_args = NULL;
-                       calldata->res.lr_res = NULL;
-                       break;
-               case -NFS4ERR_OLD_STATEID:
-                       if (nfs4_layoutreturn_refresh_stateid(&calldata->arg.lr_args->stateid,
-                                               &calldata->arg.lr_args->range,
-                                               calldata->inode))
-                               goto lr_restart;
-                       /* Fallthrough */
-               case -NFS4ERR_ADMIN_REVOKED:
-               case -NFS4ERR_DELEG_REVOKED:
-               case -NFS4ERR_EXPIRED:
-               case -NFS4ERR_BAD_STATEID:
-               case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
-               case -NFS4ERR_WRONG_CRED:
-                       calldata->arg.lr_args = NULL;
-                       calldata->res.lr_res = NULL;
-                       goto lr_restart;
-               }
-       }
+       if (pnfs_roc_done(task, calldata->inode,
+                               &calldata->arg.lr_args,
+                               &calldata->res.lr_res,
+                               &calldata->res.lr_ret) == -EAGAIN)
+               goto out_restart;
 
        /* hmm. we are done with the inode, and in the process of freeing
         * the state_owner. we keep this around to process errors
@@ -3403,7 +3453,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
                        break;
                case -NFS4ERR_OLD_STATEID:
                        /* Did we race with OPEN? */
-                       if (nfs4_refresh_open_stateid(&calldata->arg.stateid,
+                       if (nfs4_refresh_open_old_stateid(&calldata->arg.stateid,
                                                state))
                                goto out_restart;
                        goto out_release;
@@ -3415,7 +3465,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
                                        task->tk_msg.rpc_cred);
                        /* Fallthrough */
                case -NFS4ERR_BAD_STATEID:
-                       break;
+                       if (calldata->arg.fmode == 0)
+                               break;
+                       /* Fallthrough */
                default:
                        task->tk_status = nfs4_async_handle_exception(task,
                                        server, task->tk_status, &exception);
@@ -3430,8 +3482,6 @@ out_release:
        nfs_refresh_inode(calldata->inode, &calldata->fattr);
        dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
        return;
-lr_restart:
-       calldata->res.lr_ret = 0;
 out_restart:
        task->tk_status = 0;
        rpc_restart_call_prepare(task);
@@ -3472,8 +3522,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
        } else if (is_rdwr)
                calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
 
-       if (!nfs4_valid_open_stateid(state) ||
-           !nfs4_refresh_open_stateid(&calldata->arg.stateid, state))
+       nfs4_sync_open_stateid(&calldata->arg.stateid, state);
+       if (!nfs4_valid_open_stateid(state))
                call_close = 0;
        spin_unlock(&state->owner->so_lock);
 
@@ -6018,7 +6068,6 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
                .rpc_resp = res,
                .rpc_cred = cred,
        };
-       struct rpc_task *task;
        struct rpc_task_setup task_setup_data = {
                .rpc_client = clp->cl_rpcclient,
                .rpc_message = &msg,
@@ -6051,17 +6100,13 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
        dprintk("NFS call  setclientid auth=%s, '%s'\n",
                clp->cl_rpcclient->cl_auth->au_ops->au_name,
                clp->cl_owner_id);
-       task = rpc_run_task(&task_setup_data);
-       if (IS_ERR(task)) {
-               status = PTR_ERR(task);
-               goto out;
-       }
-       status = task->tk_status;
+
+       status = nfs4_call_sync_custom(&task_setup_data);
        if (setclientid.sc_cred) {
+               kfree(clp->cl_acceptor);
                clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred);
                put_rpccred(setclientid.sc_cred);
        }
-       rpc_put_task(task);
 out:
        trace_nfs4_setclientid(clp, status);
        dprintk("NFS reply setclientid: %d\n", status);
@@ -6129,32 +6174,11 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
        trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status);
 
        /* Handle Layoutreturn errors */
-       if (data->args.lr_args && task->tk_status != 0) {
-               switch(data->res.lr_ret) {
-               default:
-                       data->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
-                       break;
-               case 0:
-                       data->args.lr_args = NULL;
-                       data->res.lr_res = NULL;
-                       break;
-               case -NFS4ERR_OLD_STATEID:
-                       if (nfs4_layoutreturn_refresh_stateid(&data->args.lr_args->stateid,
-                                               &data->args.lr_args->range,
-                                               data->inode))
-                               goto lr_restart;
-                       /* Fallthrough */
-               case -NFS4ERR_ADMIN_REVOKED:
-               case -NFS4ERR_DELEG_REVOKED:
-               case -NFS4ERR_EXPIRED:
-               case -NFS4ERR_BAD_STATEID:
-               case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
-               case -NFS4ERR_WRONG_CRED:
-                       data->args.lr_args = NULL;
-                       data->res.lr_res = NULL;
-                       goto lr_restart;
-               }
-       }
+       if (pnfs_roc_done(task, data->inode,
+                               &data->args.lr_args,
+                               &data->res.lr_res,
+                               &data->res.lr_ret) == -EAGAIN)
+               goto out_restart;
 
        switch (task->tk_status) {
        case 0:
@@ -6192,8 +6216,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
        }
        data->rpc_status = task->tk_status;
        return;
-lr_restart:
-       data->res.lr_ret = 0;
 out_restart:
        task->tk_status = 0;
        rpc_restart_call_prepare(task);
@@ -6386,6 +6408,42 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *
        return err;
 }
 
+/*
+ * Update the seqid of a lock stateid after receiving
+ * NFS4ERR_OLD_STATEID
+ */
+static bool nfs4_refresh_lock_old_stateid(nfs4_stateid *dst,
+               struct nfs4_lock_state *lsp)
+{
+       struct nfs4_state *state = lsp->ls_state;
+       bool ret = false;
+
+       spin_lock(&state->state_lock);
+       if (!nfs4_stateid_match_other(dst, &lsp->ls_stateid))
+               goto out;
+       if (!nfs4_stateid_is_newer(&lsp->ls_stateid, dst))
+               nfs4_stateid_seqid_inc(dst);
+       else
+               dst->seqid = lsp->ls_stateid.seqid;
+       ret = true;
+out:
+       spin_unlock(&state->state_lock);
+       return ret;
+}
+
+static bool nfs4_sync_lock_stateid(nfs4_stateid *dst,
+               struct nfs4_lock_state *lsp)
+{
+       struct nfs4_state *state = lsp->ls_state;
+       bool ret;
+
+       spin_lock(&state->state_lock);
+       ret = !nfs4_stateid_match_other(dst, &lsp->ls_stateid);
+       nfs4_stateid_copy(dst, &lsp->ls_stateid);
+       spin_unlock(&state->state_lock);
+       return ret;
+}
+
 struct nfs4_unlockdata {
        struct nfs_locku_args arg;
        struct nfs_locku_res res;
@@ -6403,7 +6461,8 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
                struct nfs_seqid *seqid)
 {
        struct nfs4_unlockdata *p;
-       struct inode *inode = lsp->ls_state->inode;
+       struct nfs4_state *state = lsp->ls_state;
+       struct inode *inode = state->inode;
 
        p = kzalloc(sizeof(*p), GFP_NOFS);
        if (p == NULL)
@@ -6419,6 +6478,9 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
        locks_init_lock(&p->fl);
        locks_copy_lock(&p->fl, fl);
        p->server = NFS_SERVER(inode);
+       spin_lock(&state->state_lock);
+       nfs4_stateid_copy(&p->arg.stateid, &lsp->ls_stateid);
+       spin_unlock(&state->state_lock);
        return p;
 }
 
@@ -6457,10 +6519,14 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
                                        task->tk_msg.rpc_cred);
                        /* Fall through */
                case -NFS4ERR_BAD_STATEID:
-               case -NFS4ERR_OLD_STATEID:
                case -NFS4ERR_STALE_STATEID:
-                       if (!nfs4_stateid_match(&calldata->arg.stateid,
-                                               &calldata->lsp->ls_stateid))
+                       if (nfs4_sync_lock_stateid(&calldata->arg.stateid,
+                                               calldata->lsp))
+                               rpc_restart_call_prepare(task);
+                       break;
+               case -NFS4ERR_OLD_STATEID:
+                       if (nfs4_refresh_lock_old_stateid(&calldata->arg.stateid,
+                                               calldata->lsp))
                                rpc_restart_call_prepare(task);
                        break;
                default:
@@ -6483,7 +6549,6 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
 
        if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
                goto out_wait;
-       nfs4_stateid_copy(&calldata->arg.stateid, &calldata->lsp->ls_stateid);
        if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
                /* Note: exit _without_ running nfs4_locku_done */
                goto out_no_action;
@@ -7645,6 +7710,8 @@ int nfs4_proc_fsid_present(struct inode *inode, const struct cred *cred)
 static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors, bool use_integrity)
 {
        int status;
+       struct rpc_clnt *clnt = NFS_SERVER(dir)->client;
+       struct nfs_client *clp = NFS_SERVER(dir)->nfs_client;
        struct nfs4_secinfo_arg args = {
                .dir_fh = NFS_FH(dir),
                .name   = name,
@@ -7657,26 +7724,37 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
                .rpc_argp = &args,
                .rpc_resp = &res,
        };
-       struct rpc_clnt *clnt = NFS_SERVER(dir)->client;
+       struct nfs4_call_sync_data data = {
+               .seq_server = NFS_SERVER(dir),
+               .seq_args = &args.seq_args,
+               .seq_res = &res.seq_res,
+       };
+       struct rpc_task_setup task_setup = {
+               .rpc_client = clnt,
+               .rpc_message = &msg,
+               .callback_ops = clp->cl_mvops->call_sync_ops,
+               .callback_data = &data,
+               .flags = RPC_TASK_NO_ROUND_ROBIN,
+       };
        const struct cred *cred = NULL;
 
        if (use_integrity) {
-               clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient;
-               cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client);
+               clnt = clp->cl_rpcclient;
+               task_setup.rpc_client = clnt;
+
+               cred = nfs4_get_clid_cred(clp);
                msg.rpc_cred = cred;
        }
 
        dprintk("NFS call  secinfo %s\n", name->name);
 
-       nfs4_state_protect(NFS_SERVER(dir)->nfs_client,
-               NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg);
+       nfs4_state_protect(clp, NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg);
+       nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
+       status = nfs4_call_sync_custom(&task_setup);
 
-       status = nfs4_call_sync(clnt, NFS_SERVER(dir), &msg, &args.seq_args,
-                               &res.seq_res, RPC_TASK_NO_ROUND_ROBIN);
        dprintk("NFS reply  secinfo: %d\n", status);
 
        put_cred(cred);
-
        return status;
 }
 
@@ -8344,7 +8422,6 @@ static const struct rpc_call_ops nfs4_get_lease_time_ops = {
 
 int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
 {
-       struct rpc_task *task;
        struct nfs4_get_lease_time_args args;
        struct nfs4_get_lease_time_res res = {
                .lr_fsinfo = fsinfo,
@@ -8366,17 +8443,9 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
                .callback_data = &data,
                .flags = RPC_TASK_TIMEOUT,
        };
-       int status;
 
        nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0, 1);
-       task = rpc_run_task(&task_setup);
-
-       if (IS_ERR(task))
-               return PTR_ERR(task);
-
-       status = task->tk_status;
-       rpc_put_task(task);
-       return status;
+       return nfs4_call_sync_custom(&task_setup);
 }
 
 #ifdef CONFIG_NFS_V4_1
@@ -8845,7 +8914,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
                const struct cred *cred)
 {
        struct nfs4_reclaim_complete_data *calldata;
-       struct rpc_task *task;
        struct rpc_message msg = {
                .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RECLAIM_COMPLETE],
                .rpc_cred = cred,
@@ -8854,7 +8922,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
                .rpc_client = clp->cl_rpcclient,
                .rpc_message = &msg,
                .callback_ops = &nfs4_reclaim_complete_call_ops,
-               .flags = RPC_TASK_ASYNC | RPC_TASK_NO_ROUND_ROBIN,
+               .flags = RPC_TASK_NO_ROUND_ROBIN,
        };
        int status = -ENOMEM;
 
@@ -8869,15 +8937,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
        msg.rpc_argp = &calldata->arg;
        msg.rpc_resp = &calldata->res;
        task_setup_data.callback_data = calldata;
-       task = rpc_run_task(&task_setup_data);
-       if (IS_ERR(task)) {
-               status = PTR_ERR(task);
-               goto out;
-       }
-       status = rpc_wait_for_completion_task(task);
-       if (status == 0)
-               status = task->tk_status;
-       rpc_put_task(task);
+       status = nfs4_call_sync_custom(&task_setup_data);
 out:
        dprintk("<-- %s status=%d\n", __func__, status);
        return status;
@@ -9103,10 +9163,19 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
        if (!nfs41_sequence_process(task, &lrp->res.seq_res))
                return;
 
+       /*
+        * Was there an RPC level error? Assume the call succeeded,
+        * and that we need to release the layout
+        */
+       if (task->tk_rpc_status != 0 && RPC_WAS_SENT(task)) {
+               lrp->res.lrs_present = 0;
+               return;
+       }
+
        server = NFS_SERVER(lrp->args.inode);
        switch (task->tk_status) {
        case -NFS4ERR_OLD_STATEID:
-               if (nfs4_layoutreturn_refresh_stateid(&lrp->args.stateid,
+               if (nfs4_layout_refresh_old_stateid(&lrp->args.stateid,
                                        &lrp->args.range,
                                        lrp->args.inode))
                        goto out_restart;
@@ -9362,18 +9431,32 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
                .rpc_resp = &res,
        };
        struct rpc_clnt *clnt = server->client;
+       struct nfs4_call_sync_data data = {
+               .seq_server = server,
+               .seq_args = &args.seq_args,
+               .seq_res = &res.seq_res,
+       };
+       struct rpc_task_setup task_setup = {
+               .rpc_client = server->client,
+               .rpc_message = &msg,
+               .callback_ops = server->nfs_client->cl_mvops->call_sync_ops,
+               .callback_data = &data,
+               .flags = RPC_TASK_NO_ROUND_ROBIN,
+       };
        const struct cred *cred = NULL;
        int status;
 
        if (use_integrity) {
                clnt = server->nfs_client->cl_rpcclient;
+               task_setup.rpc_client = clnt;
+
                cred = nfs4_get_clid_cred(server->nfs_client);
                msg.rpc_cred = cred;
        }
 
        dprintk("--> %s\n", __func__);
-       status = nfs4_call_sync(clnt, server, &msg, &args.seq_args,
-                               &res.seq_res, RPC_TASK_NO_ROUND_ROBIN);
+       nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
+       status = nfs4_call_sync_custom(&task_setup);
        dprintk("<-- %s status=%d\n", __func__, status);
 
        put_cred(cred);
index cad4e06..0c6d53d 100644 (file)
@@ -1015,22 +1015,6 @@ out:
        return ret;
 }
 
-bool nfs4_refresh_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
-{
-       bool ret;
-       int seq;
-
-       do {
-               ret = false;
-               seq = read_seqbegin(&state->seqlock);
-               if (nfs4_state_match_open_stateid_other(state, dst)) {
-                       dst->seqid = state->open_stateid.seqid;
-                       ret = true;
-               }
-       } while (read_seqretry(&state->seqlock, seq));
-       return ret;
-}
-
 bool nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
 {
        bool ret;
@@ -2095,8 +2079,10 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
        }
 
        status = nfs4_begin_drain_session(clp);
-       if (status != 0)
-               return status;
+       if (status != 0) {
+               result = status;
+               goto out;
+       }
 
        status = nfs4_replace_transport(server, locations);
        if (status != 0) {
index 46a8d63..ab07db0 100644 (file)
@@ -1174,7 +1174,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
                } else
                        *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
        }
-       if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) {
+       if (label && (bmval[2] & FATTR4_WORD2_SECURITY_LABEL)) {
                *p++ = cpu_to_be32(label->lfs);
                *p++ = cpu_to_be32(label->pi);
                *p++ = cpu_to_be32(label->len);
index 4525d5a..bb80034 100644 (file)
@@ -359,9 +359,10 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
 }
 
 /*
- * Update the seqid of a layout stateid
+ * Update the seqid of a layout stateid after receiving
+ * NFS4ERR_OLD_STATEID
  */
-bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst,
+bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
                struct pnfs_layout_range *dst_range,
                struct inode *inode)
 {
@@ -377,7 +378,15 @@ bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst,
 
        spin_lock(&inode->i_lock);
        lo = NFS_I(inode)->layout;
-       if (lo && nfs4_stateid_match_other(dst, &lo->plh_stateid)) {
+       if (lo &&  pnfs_layout_is_valid(lo) &&
+           nfs4_stateid_match_other(dst, &lo->plh_stateid)) {
+               /* Is our call using the most recent seqid? If so, bump it */
+               if (!nfs4_stateid_is_newer(&lo->plh_stateid, dst)) {
+                       nfs4_stateid_seqid_inc(dst);
+                       ret = true;
+                       goto out;
+               }
+               /* Try to update the seqid to the most recent */
                err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
                if (err != -EBUSY) {
                        dst->seqid = lo->plh_stateid.seqid;
@@ -385,6 +394,7 @@ bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst,
                        ret = true;
                }
        }
+out:
        spin_unlock(&inode->i_lock);
        pnfs_free_lseg_list(&head);
        return ret;
@@ -1440,6 +1450,52 @@ out_noroc:
        return false;
 }
 
+int pnfs_roc_done(struct rpc_task *task, struct inode *inode,
+               struct nfs4_layoutreturn_args **argpp,
+               struct nfs4_layoutreturn_res **respp,
+               int *ret)
+{
+       struct nfs4_layoutreturn_args *arg = *argpp;
+       int retval = -EAGAIN;
+
+       if (!arg)
+               return 0;
+       /* Handle Layoutreturn errors */
+       switch (*ret) {
+       case 0:
+               retval = 0;
+               break;
+       case -NFS4ERR_NOMATCHING_LAYOUT:
+               /* Was there an RPC level error? If not, retry */
+               if (task->tk_rpc_status == 0)
+                       break;
+               /* If the call was not sent, let caller handle it */
+               if (!RPC_WAS_SENT(task))
+                       return 0;
+               /*
+                * Otherwise, assume the call succeeded and
+                * that we need to release the layout
+                */
+               *ret = 0;
+               (*respp)->lrs_present = 0;
+               retval = 0;
+               break;
+       case -NFS4ERR_DELAY:
+               /* Let the caller handle the retry */
+               *ret = -NFS4ERR_NOMATCHING_LAYOUT;
+               return 0;
+       case -NFS4ERR_OLD_STATEID:
+               if (!nfs4_layout_refresh_old_stateid(&arg->stateid,
+                                       &arg->range, inode))
+                       break;
+               *ret = -NFS4ERR_NOMATCHING_LAYOUT;
+               return -EAGAIN;
+       }
+       *argpp = NULL;
+       *respp = NULL;
+       return retval;
+}
+
 void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
                struct nfs4_layoutreturn_res *res,
                int ret)
@@ -1449,10 +1505,15 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
        const nfs4_stateid *res_stateid = NULL;
        struct nfs4_xdr_opaque_data *ld_private = args->ld_private;
 
-       if (ret == 0) {
-               arg_stateid = &args->stateid;
+       switch (ret) {
+       case -NFS4ERR_NOMATCHING_LAYOUT:
+               break;
+       case 0:
                if (res->lrs_present)
                        res_stateid = &res->stateid;
+               /* Fallthrough */
+       default:
+               arg_stateid = &args->stateid;
        }
        pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range,
                        res_stateid);
index f15609c..f8a3806 100644 (file)
@@ -261,7 +261,7 @@ int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
                bool is_recall);
 int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
                bool is_recall);
-bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst,
+bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
                struct pnfs_layout_range *dst_range,
                struct inode *inode);
 void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
@@ -282,6 +282,10 @@ bool pnfs_roc(struct inode *ino,
                struct nfs4_layoutreturn_args *args,
                struct nfs4_layoutreturn_res *res,
                const struct cred *cred);
+int pnfs_roc_done(struct rpc_task *task, struct inode *inode,
+               struct nfs4_layoutreturn_args **argpp,
+               struct nfs4_layoutreturn_res **respp,
+               int *ret);
 void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
                struct nfs4_layoutreturn_res *res,
                int ret);
@@ -701,6 +705,15 @@ pnfs_roc(struct inode *ino,
        return false;
 }
 
+static inline int
+pnfs_roc_done(struct rpc_task *task, struct inode *inode,
+               struct nfs4_layoutreturn_args **argpp,
+               struct nfs4_layoutreturn_res **respp,
+               int *ret)
+{
+       return 0;
+}
+
 static inline void
 pnfs_roc_release(struct nfs4_layoutreturn_args *args,
                struct nfs4_layoutreturn_res *res,
@@ -785,7 +798,7 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
 {
 }
 
-static inline bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst,
+static inline bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
                struct pnfs_layout_range *dst_range,
                struct inode *inode)
 {
index 19a76cf..a84df7d 100644 (file)
@@ -2645,6 +2645,13 @@ int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
 }
 EXPORT_SYMBOL_GPL(nfs_clone_sb_security);
 
+static void nfs_set_readahead(struct backing_dev_info *bdi,
+                             unsigned long iomax_pages)
+{
+       bdi->ra_pages = VM_READAHEAD_PAGES;
+       bdi->io_pages = iomax_pages;
+}
+
 struct dentry *nfs_fs_mount_common(struct nfs_server *server,
                                   int flags, const char *dev_name,
                                   struct nfs_mount_info *mount_info,
@@ -2687,7 +2694,7 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
                        mntroot = ERR_PTR(error);
                        goto error_splat_super;
                }
-               s->s_bdi->ra_pages = server->rpages * NFS_MAX_READAHEAD;
+               nfs_set_readahead(s->s_bdi, server->rpages);
                server->super = s;
        }
 
index 85ca495..52cab65 100644 (file)
@@ -786,7 +786,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_page *head;
 
-       atomic_long_dec(&nfsi->nrequests);
        if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
                head = req->wb_head;
 
@@ -799,8 +798,10 @@ static void nfs_inode_remove_request(struct nfs_page *req)
                spin_unlock(&mapping->private_lock);
        }
 
-       if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
+       if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
                nfs_release_request(req);
+               atomic_long_dec(&nfsi->nrequests);
+       }
 }
 
 static void
index d25f6bb..10cefb0 100644 (file)
@@ -3,6 +3,7 @@ config NFSD
        tristate "NFS server support"
        depends on INET
        depends on FILE_LOCKING
+       depends on FSNOTIFY
        select LOCKD
        select SUNRPC
        select EXPORTFS
@@ -147,7 +148,7 @@ config NFSD_V4_SECURITY_LABEL
 
 config NFSD_FAULT_INJECTION
        bool "NFS server manual fault injection"
-       depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS
+       depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS && BROKEN
        help
          This option enables support for manually injecting faults
          into the NFS server.  This is intended to be used for
index 2bfb58e..6a40b1a 100644 (file)
@@ -11,7 +11,8 @@ obj-$(CONFIG_NFSD)    += nfsd.o
 nfsd-y                 += trace.o
 
 nfsd-y                         += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
-                          export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+                          export.o auth.o lockd.o nfscache.o nfsxdr.o \
+                          stats.o filecache.o
 nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
 nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
index 4cd7c69..ba14d2f 100644 (file)
@@ -39,14 +39,6 @@ struct nfs4_acl;
 struct svc_fh;
 struct svc_rqst;
 
-/*
- * Maximum ACL we'll accept from a client; chosen (somewhat
- * arbitrarily) so that kmalloc'ing the ACL shouldn't require a
- * high-order allocation.  This allows 204 ACEs on x86_64:
- */
-#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
-                       / sizeof(struct nfs4_ace))
-
 int nfs4_acl_bytes(int entries);
 int nfs4_acl_get_whotype(char *, u32);
 __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
index 66d4c55..9bbaa67 100644 (file)
@@ -15,6 +15,7 @@
 
 #include "blocklayoutxdr.h"
 #include "pnfs.h"
+#include "filecache.h"
 
 #define NFSDDBG_FACILITY       NFSDDBG_PNFS
 
@@ -404,7 +405,7 @@ static void
 nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
 {
        struct nfs4_client *clp = ls->ls_stid.sc_client;
-       struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
+       struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
 
        bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
                        nfsd4_scsi_pr_key(clp), 0, true);
index baa0195..15422c9 100644 (file)
@@ -22,6 +22,7 @@
 #include "nfsfh.h"
 #include "netns.h"
 #include "pnfs.h"
+#include "filecache.h"
 
 #define NFSDDBG_FACILITY       NFSDDBG_EXPORT
 
@@ -232,6 +233,17 @@ static struct cache_head *expkey_alloc(void)
                return NULL;
 }
 
+static void expkey_flush(void)
+{
+       /*
+        * Take the nfsd_mutex here to ensure that the file cache is not
+        * destroyed while we're in the middle of flushing.
+        */
+       mutex_lock(&nfsd_mutex);
+       nfsd_file_cache_purge(current->nsproxy->net_ns);
+       mutex_unlock(&nfsd_mutex);
+}
+
 static const struct cache_detail svc_expkey_cache_template = {
        .owner          = THIS_MODULE,
        .hash_size      = EXPKEY_HASHMAX,
@@ -244,6 +256,7 @@ static const struct cache_detail svc_expkey_cache_template = {
        .init           = expkey_init,
        .update         = expkey_update,
        .alloc          = expkey_alloc,
+       .flush          = expkey_flush,
 };
 
 static int
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
new file mode 100644 (file)
index 0000000..ef55e9b
--- /dev/null
@@ -0,0 +1,934 @@
+/*
+ * Open file cache.
+ *
+ * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
+ */
+
+#include <linux/hash.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/sched.h>
+#include <linux/list_lru.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/fsnotify.h>
+#include <linux/seq_file.h>
+
+#include "vfs.h"
+#include "nfsd.h"
+#include "nfsfh.h"
+#include "netns.h"
+#include "filecache.h"
+#include "trace.h"
+
+#define NFSDDBG_FACILITY       NFSDDBG_FH
+
+/* FIXME: dynamically size this for the machine somehow? */
+#define NFSD_FILE_HASH_BITS                   12
+#define NFSD_FILE_HASH_SIZE                  (1 << NFSD_FILE_HASH_BITS)
+#define NFSD_LAUNDRETTE_DELAY               (2 * HZ)
+
+#define NFSD_FILE_LRU_RESCAN                (0)
+#define NFSD_FILE_SHUTDOWN                  (1)
+#define NFSD_FILE_LRU_THRESHOLD                     (4096UL)
+#define NFSD_FILE_LRU_LIMIT                 (NFSD_FILE_LRU_THRESHOLD << 2)
+
+/* We only care about NFSD_MAY_READ/WRITE for this cache */
+#define NFSD_FILE_MAY_MASK     (NFSD_MAY_READ|NFSD_MAY_WRITE)
+
+struct nfsd_fcache_bucket {
+       struct hlist_head       nfb_head;
+       spinlock_t              nfb_lock;
+       unsigned int            nfb_count;
+       unsigned int            nfb_maxcount;
+};
+
+static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+
+static struct kmem_cache               *nfsd_file_slab;
+static struct kmem_cache               *nfsd_file_mark_slab;
+static struct nfsd_fcache_bucket       *nfsd_file_hashtbl;
+static struct list_lru                 nfsd_file_lru;
+static long                            nfsd_file_lru_flags;
+static struct fsnotify_group           *nfsd_file_fsnotify_group;
+static atomic_long_t                   nfsd_filecache_count;
+static struct delayed_work             nfsd_filecache_laundrette;
+
+enum nfsd_file_laundrette_ctl {
+       NFSD_FILE_LAUNDRETTE_NOFLUSH = 0,
+       NFSD_FILE_LAUNDRETTE_MAY_FLUSH
+};
+
+static void
+nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl)
+{
+       long count = atomic_long_read(&nfsd_filecache_count);
+
+       if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
+               return;
+
+       /* Be more aggressive about scanning if over the threshold */
+       if (count > NFSD_FILE_LRU_THRESHOLD)
+               mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0);
+       else
+               schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY);
+
+       if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH)
+               return;
+
+       /* ...and don't delay flushing if we're out of control */
+       if (count >= NFSD_FILE_LRU_LIMIT)
+               flush_delayed_work(&nfsd_filecache_laundrette);
+}
+
+static void
+nfsd_file_slab_free(struct rcu_head *rcu)
+{
+       struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
+
+       put_cred(nf->nf_cred);
+       kmem_cache_free(nfsd_file_slab, nf);
+}
+
+static void
+nfsd_file_mark_free(struct fsnotify_mark *mark)
+{
+       struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
+                                                 nfm_mark);
+
+       kmem_cache_free(nfsd_file_mark_slab, nfm);
+}
+
+static struct nfsd_file_mark *
+nfsd_file_mark_get(struct nfsd_file_mark *nfm)
+{
+       if (!atomic_inc_not_zero(&nfm->nfm_ref))
+               return NULL;
+       return nfm;
+}
+
+static void
+nfsd_file_mark_put(struct nfsd_file_mark *nfm)
+{
+       if (atomic_dec_and_test(&nfm->nfm_ref)) {
+
+               fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
+               fsnotify_put_mark(&nfm->nfm_mark);
+       }
+}
+
+static struct nfsd_file_mark *
+nfsd_file_mark_find_or_create(struct nfsd_file *nf)
+{
+       int                     err;
+       struct fsnotify_mark    *mark;
+       struct nfsd_file_mark   *nfm = NULL, *new;
+       struct inode *inode = nf->nf_inode;
+
+       do {
+               mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
+               mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
+                               nfsd_file_fsnotify_group);
+               if (mark) {
+                       nfm = nfsd_file_mark_get(container_of(mark,
+                                                struct nfsd_file_mark,
+                                                nfm_mark));
+                       mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
+                       fsnotify_put_mark(mark);
+                       if (likely(nfm))
+                               break;
+               } else
+                       mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
+
+               /* allocate a new nfm */
+               new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
+               if (!new)
+                       return NULL;
+               fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
+               new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
+               atomic_set(&new->nfm_ref, 1);
+
+               err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
+
+               /*
+                * If the add was successful, then return the object.
+                * Otherwise, we need to put the reference we hold on the
+                * nfm_mark. The fsnotify code will take a reference and put
+                * it on failure, so we can't just free it directly. It's also
+                * not safe to call fsnotify_destroy_mark on it as the
+                * mark->group will be NULL. Thus, we can't let the nfm_ref
+                * counter drive the destruction at this point.
+                */
+               if (likely(!err))
+                       nfm = new;
+               else
+                       fsnotify_put_mark(&new->nfm_mark);
+       } while (unlikely(err == -EEXIST));
+
+       return nfm;
+}
+
+static struct nfsd_file *
+nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
+               struct net *net)
+{
+       struct nfsd_file *nf;
+
+       nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
+       if (nf) {
+               INIT_HLIST_NODE(&nf->nf_node);
+               INIT_LIST_HEAD(&nf->nf_lru);
+               nf->nf_file = NULL;
+               nf->nf_cred = get_current_cred();
+               nf->nf_net = net;
+               nf->nf_flags = 0;
+               nf->nf_inode = inode;
+               nf->nf_hashval = hashval;
+               atomic_set(&nf->nf_ref, 1);
+               nf->nf_may = may & NFSD_FILE_MAY_MASK;
+               if (may & NFSD_MAY_NOT_BREAK_LEASE) {
+                       if (may & NFSD_MAY_WRITE)
+                               __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
+                       if (may & NFSD_MAY_READ)
+                               __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+               }
+               nf->nf_mark = NULL;
+               trace_nfsd_file_alloc(nf);
+       }
+       return nf;
+}
+
+static bool
+nfsd_file_free(struct nfsd_file *nf)
+{
+       bool flush = false;
+
+       trace_nfsd_file_put_final(nf);
+       if (nf->nf_mark)
+               nfsd_file_mark_put(nf->nf_mark);
+       if (nf->nf_file) {
+               get_file(nf->nf_file);
+               filp_close(nf->nf_file, NULL);
+               fput(nf->nf_file);
+               flush = true;
+       }
+       call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
+       return flush;
+}
+
+static bool
+nfsd_file_check_writeback(struct nfsd_file *nf)
+{
+       struct file *file = nf->nf_file;
+       struct address_space *mapping;
+
+       if (!file || !(file->f_mode & FMODE_WRITE))
+               return false;
+       mapping = file->f_mapping;
+       return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
+               mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
+}
+
+static int
+nfsd_file_check_write_error(struct nfsd_file *nf)
+{
+       struct file *file = nf->nf_file;
+
+       if (!file || !(file->f_mode & FMODE_WRITE))
+               return 0;
+       return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
+}
+
+static bool
+nfsd_file_in_use(struct nfsd_file *nf)
+{
+       return nfsd_file_check_writeback(nf) ||
+                       nfsd_file_check_write_error(nf);
+}
+
+static void
+nfsd_file_do_unhash(struct nfsd_file *nf)
+{
+       lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+       trace_nfsd_file_unhash(nf);
+
+       if (nfsd_file_check_write_error(nf))
+               nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
+       --nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
+       hlist_del_rcu(&nf->nf_node);
+       if (!list_empty(&nf->nf_lru))
+               list_lru_del(&nfsd_file_lru, &nf->nf_lru);
+       atomic_long_dec(&nfsd_filecache_count);
+}
+
+static bool
+nfsd_file_unhash(struct nfsd_file *nf)
+{
+       if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+               nfsd_file_do_unhash(nf);
+               return true;
+       }
+       return false;
+}
+
+/*
+ * Return true if the file was unhashed.
+ */
+static bool
+nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
+{
+       lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+       trace_nfsd_file_unhash_and_release_locked(nf);
+       if (!nfsd_file_unhash(nf))
+               return false;
+       /* keep final reference for nfsd_file_lru_dispose */
+       if (atomic_add_unless(&nf->nf_ref, -1, 1))
+               return true;
+
+       list_add(&nf->nf_lru, dispose);
+       return true;
+}
+
+static int
+nfsd_file_put_noref(struct nfsd_file *nf)
+{
+       int count;
+       trace_nfsd_file_put(nf);
+
+       count = atomic_dec_return(&nf->nf_ref);
+       if (!count) {
+               WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
+               nfsd_file_free(nf);
+       }
+       return count;
+}
+
+void
+nfsd_file_put(struct nfsd_file *nf)
+{
+       bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
+       bool unused = !nfsd_file_in_use(nf);
+
+       set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+       if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused)
+               nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH);
+}
+
+struct nfsd_file *
+nfsd_file_get(struct nfsd_file *nf)
+{
+       if (likely(atomic_inc_not_zero(&nf->nf_ref)))
+               return nf;
+       return NULL;
+}
+
+static void
+nfsd_file_dispose_list(struct list_head *dispose)
+{
+       struct nfsd_file *nf;
+
+       while(!list_empty(dispose)) {
+               nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+               list_del(&nf->nf_lru);
+               nfsd_file_put_noref(nf);
+       }
+}
+
+static void
+nfsd_file_dispose_list_sync(struct list_head *dispose)
+{
+       bool flush = false;
+       struct nfsd_file *nf;
+
+       while(!list_empty(dispose)) {
+               nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+               list_del(&nf->nf_lru);
+               if (!atomic_dec_and_test(&nf->nf_ref))
+                       continue;
+               if (nfsd_file_free(nf))
+                       flush = true;
+       }
+       if (flush)
+               flush_delayed_fput();
+}
+
+/*
+ * Note this can deadlock with nfsd_file_cache_purge.
+ */
+static enum lru_status
+nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+                spinlock_t *lock, void *arg)
+       __releases(lock)
+       __acquires(lock)
+{
+       struct list_head *head = arg;
+       struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
+
+       /*
+        * Do a lockless refcount check. The hashtable holds one reference, so
+        * we look to see if anything else has a reference, or if any have
+        * been put since the shrinker last ran. Those don't get unhashed and
+        * released.
+        *
+        * Note that in the put path, we set the flag and then decrement the
+        * counter. Here we check the counter and then test and clear the flag.
+        * That order is deliberate to ensure that we can do this locklessly.
+        */
+       if (atomic_read(&nf->nf_ref) > 1)
+               goto out_skip;
+
+       /*
+        * Don't throw out files that are still undergoing I/O or
+        * that have uncleared errors pending.
+        */
+       if (nfsd_file_check_writeback(nf))
+               goto out_skip;
+
+       if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
+               goto out_rescan;
+
+       if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
+               goto out_skip;
+
+       list_lru_isolate_move(lru, &nf->nf_lru, head);
+       return LRU_REMOVED;
+out_rescan:
+       set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags);
+out_skip:
+       return LRU_SKIP;
+}
+
+static void
+nfsd_file_lru_dispose(struct list_head *head)
+{
+       while(!list_empty(head)) {
+               struct nfsd_file *nf = list_first_entry(head,
+                               struct nfsd_file, nf_lru);
+               list_del_init(&nf->nf_lru);
+               spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+               nfsd_file_do_unhash(nf);
+               spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+               nfsd_file_put_noref(nf);
+       }
+}
+
+static unsigned long
+nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
+{
+       return list_lru_count(&nfsd_file_lru);
+}
+
+static unsigned long
+nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
+{
+       LIST_HEAD(head);
+       unsigned long ret;
+
+       ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head);
+       nfsd_file_lru_dispose(&head);
+       return ret;
+}
+
+static struct shrinker nfsd_file_shrinker = {
+       .scan_objects = nfsd_file_lru_scan,
+       .count_objects = nfsd_file_lru_count,
+       .seeks = 1,
+};
+
+static void
+__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
+                       struct list_head *dispose)
+{
+       struct nfsd_file        *nf;
+       struct hlist_node       *tmp;
+
+       spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+       hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
+               if (inode == nf->nf_inode)
+                       nfsd_file_unhash_and_release_locked(nf, dispose);
+       }
+       spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+}
+
+/**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Walk the whole hash bucket, looking for any files that correspond to "inode".
+ * If any do, then unhash them and put the hashtable reference to them and
+ * destroy any that had their last reference put. Also ensure that any of the
+ * fputs also have their final __fput done as well.
+ */
+void
+nfsd_file_close_inode_sync(struct inode *inode)
+{
+       unsigned int            hashval = (unsigned int)hash_long(inode->i_ino,
+                                               NFSD_FILE_HASH_BITS);
+       LIST_HEAD(dispose);
+
+       __nfsd_file_close_inode(inode, hashval, &dispose);
+       trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
+       nfsd_file_dispose_list_sync(&dispose);
+}
+
+/**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Walk the whole hash bucket, looking for any files that correspond to "inode".
+ * If any do, then unhash them and put the hashtable reference to them and
+ * destroy any that had their last reference put.
+ */
+static void
+nfsd_file_close_inode(struct inode *inode)
+{
+       unsigned int            hashval = (unsigned int)hash_long(inode->i_ino,
+                                               NFSD_FILE_HASH_BITS);
+       LIST_HEAD(dispose);
+
+       __nfsd_file_close_inode(inode, hashval, &dispose);
+       trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
+       nfsd_file_dispose_list(&dispose);
+}
+
+/**
+ * nfsd_file_delayed_close - close unused nfsd_files
+ * @work: dummy
+ *
+ * Walk the LRU list and close any entries that have not been used since
+ * the last scan.
+ *
+ * Note this can deadlock with nfsd_file_cache_purge.
+ */
+static void
+nfsd_file_delayed_close(struct work_struct *work)
+{
+       LIST_HEAD(head);
+
+       list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX);
+
+       if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags))
+               nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH);
+
+       if (!list_empty(&head)) {
+               nfsd_file_lru_dispose(&head);
+               flush_delayed_fput();
+       }
+}
+
+static int
+nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
+                           void *data)
+{
+       struct file_lock *fl = data;
+
+       /* Only close files for F_SETLEASE leases */
+       if (fl->fl_flags & FL_LEASE)
+               nfsd_file_close_inode_sync(file_inode(fl->fl_file));
+       return 0;
+}
+
+static struct notifier_block nfsd_file_lease_notifier = {
+       .notifier_call = nfsd_file_lease_notifier_call,
+};
+
+static int
+nfsd_file_fsnotify_handle_event(struct fsnotify_group *group,
+                               struct inode *inode,
+                               u32 mask, const void *data, int data_type,
+                               const struct qstr *file_name, u32 cookie,
+                               struct fsnotify_iter_info *iter_info)
+{
+       trace_nfsd_file_fsnotify_handle_event(inode, mask);
+
+       /* Should be no marks on non-regular files */
+       if (!S_ISREG(inode->i_mode)) {
+               WARN_ON_ONCE(1);
+               return 0;
+       }
+
+       /* don't close files if this was not the last link */
+       if (mask & FS_ATTRIB) {
+               if (inode->i_nlink)
+                       return 0;
+       }
+
+       nfsd_file_close_inode(inode);
+       return 0;
+}
+
+
+static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
+       .handle_event = nfsd_file_fsnotify_handle_event,
+       .free_mark = nfsd_file_mark_free,
+};
+
+int
+nfsd_file_cache_init(void)
+{
+       int             ret = -ENOMEM;
+       unsigned int    i;
+
+       clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
+
+       if (nfsd_file_hashtbl)
+               return 0;
+
+       nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
+                               sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
+       if (!nfsd_file_hashtbl) {
+               pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
+               goto out_err;
+       }
+
+       nfsd_file_slab = kmem_cache_create("nfsd_file",
+                               sizeof(struct nfsd_file), 0, 0, NULL);
+       if (!nfsd_file_slab) {
+               pr_err("nfsd: unable to create nfsd_file_slab\n");
+               goto out_err;
+       }
+
+       nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
+                                       sizeof(struct nfsd_file_mark), 0, 0, NULL);
+       if (!nfsd_file_mark_slab) {
+               pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
+               goto out_err;
+       }
+
+
+       ret = list_lru_init(&nfsd_file_lru);
+       if (ret) {
+               pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
+               goto out_err;
+       }
+
+       ret = register_shrinker(&nfsd_file_shrinker);
+       if (ret) {
+               pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
+               goto out_lru;
+       }
+
+       ret = lease_register_notifier(&nfsd_file_lease_notifier);
+       if (ret) {
+               pr_err("nfsd: unable to register lease notifier: %d\n", ret);
+               goto out_shrinker;
+       }
+
+       nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
+       if (IS_ERR(nfsd_file_fsnotify_group)) {
+               pr_err("nfsd: unable to create fsnotify group: %ld\n",
+                       PTR_ERR(nfsd_file_fsnotify_group));
+               nfsd_file_fsnotify_group = NULL;
+               goto out_notifier;
+       }
+
+       for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+               INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
+               spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
+       }
+
+       INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close);
+out:
+       return ret;
+out_notifier:
+       lease_unregister_notifier(&nfsd_file_lease_notifier);
+out_shrinker:
+       unregister_shrinker(&nfsd_file_shrinker);
+out_lru:
+       list_lru_destroy(&nfsd_file_lru);
+out_err:
+       kmem_cache_destroy(nfsd_file_slab);
+       nfsd_file_slab = NULL;
+       kmem_cache_destroy(nfsd_file_mark_slab);
+       nfsd_file_mark_slab = NULL;
+       kfree(nfsd_file_hashtbl);
+       nfsd_file_hashtbl = NULL;
+       goto out;
+}
+
+/*
+ * Note this can deadlock with nfsd_file_lru_cb.
+ */
+void
+nfsd_file_cache_purge(struct net *net)
+{
+       unsigned int            i;
+       struct nfsd_file        *nf;
+       struct hlist_node       *next;
+       LIST_HEAD(dispose);
+       bool del;
+
+       if (!nfsd_file_hashtbl)
+               return;
+
+       for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+               struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
+
+               spin_lock(&nfb->nfb_lock);
+               hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
+                       if (net && nf->nf_net != net)
+                               continue;
+                       del = nfsd_file_unhash_and_release_locked(nf, &dispose);
+
+                       /*
+                        * Deadlock detected! Something marked this entry as
+                        * unhased, but hasn't removed it from the hash list.
+                        */
+                       WARN_ON_ONCE(!del);
+               }
+               spin_unlock(&nfb->nfb_lock);
+               nfsd_file_dispose_list(&dispose);
+       }
+}
+
+void
+nfsd_file_cache_shutdown(void)
+{
+       LIST_HEAD(dispose);
+
+       set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
+
+       lease_unregister_notifier(&nfsd_file_lease_notifier);
+       unregister_shrinker(&nfsd_file_shrinker);
+       /*
+        * make sure all callers of nfsd_file_lru_cb are done before
+        * calling nfsd_file_cache_purge
+        */
+       cancel_delayed_work_sync(&nfsd_filecache_laundrette);
+       nfsd_file_cache_purge(NULL);
+       list_lru_destroy(&nfsd_file_lru);
+       rcu_barrier();
+       fsnotify_put_group(nfsd_file_fsnotify_group);
+       nfsd_file_fsnotify_group = NULL;
+       kmem_cache_destroy(nfsd_file_slab);
+       nfsd_file_slab = NULL;
+       fsnotify_wait_marks_destroyed();
+       kmem_cache_destroy(nfsd_file_mark_slab);
+       nfsd_file_mark_slab = NULL;
+       kfree(nfsd_file_hashtbl);
+       nfsd_file_hashtbl = NULL;
+}
+
+static bool
+nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+{
+       int i;
+
+       if (!uid_eq(c1->fsuid, c2->fsuid))
+               return false;
+       if (!gid_eq(c1->fsgid, c2->fsgid))
+               return false;
+       if (c1->group_info == NULL || c2->group_info == NULL)
+               return c1->group_info == c2->group_info;
+       if (c1->group_info->ngroups != c2->group_info->ngroups)
+               return false;
+       for (i = 0; i < c1->group_info->ngroups; i++) {
+               if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
+                       return false;
+       }
+       return true;
+}
+
+static struct nfsd_file *
+nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
+                       unsigned int hashval, struct net *net)
+{
+       struct nfsd_file *nf;
+       unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
+
+       hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+                                nf_node) {
+               if ((need & nf->nf_may) != need)
+                       continue;
+               if (nf->nf_inode != inode)
+                       continue;
+               if (nf->nf_net != net)
+                       continue;
+               if (!nfsd_match_cred(nf->nf_cred, current_cred()))
+                       continue;
+               if (nfsd_file_get(nf) != NULL)
+                       return nf;
+       }
+       return NULL;
+}
+
+/**
+ * nfsd_file_is_cached - are there any cached open files for this fh?
+ * @inode: inode of the file to check
+ *
+ * Scan the hashtable for open files that match this fh. Returns true if there
+ * are any, and false if not.
+ */
+bool
+nfsd_file_is_cached(struct inode *inode)
+{
+       bool                    ret = false;
+       struct nfsd_file        *nf;
+       unsigned int            hashval;
+
+        hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+                                nf_node) {
+               if (inode == nf->nf_inode) {
+                       ret = true;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       trace_nfsd_file_is_cached(inode, hashval, (int)ret);
+       return ret;
+}
+
+__be32
+nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+                 unsigned int may_flags, struct nfsd_file **pnf)
+{
+       __be32  status;
+       struct net *net = SVC_NET(rqstp);
+       struct nfsd_file *nf, *new;
+       struct inode *inode;
+       unsigned int hashval;
+
+       /* FIXME: skip this if fh_dentry is already set? */
+       status = fh_verify(rqstp, fhp, S_IFREG,
+                               may_flags|NFSD_MAY_OWNER_OVERRIDE);
+       if (status != nfs_ok)
+               return status;
+
+       inode = d_inode(fhp->fh_dentry);
+       hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
+retry:
+       rcu_read_lock();
+       nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
+       rcu_read_unlock();
+       if (nf)
+               goto wait_for_construction;
+
+       new = nfsd_file_alloc(inode, may_flags, hashval, net);
+       if (!new) {
+               trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
+                                       NULL, nfserr_jukebox);
+               return nfserr_jukebox;
+       }
+
+       spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+       nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
+       if (nf == NULL)
+               goto open_file;
+       spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+       nfsd_file_slab_free(&new->nf_rcu);
+
+wait_for_construction:
+       wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
+
+       /* Did construction of this file fail? */
+       if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+               nfsd_file_put_noref(nf);
+               goto retry;
+       }
+
+       this_cpu_inc(nfsd_file_cache_hits);
+
+       if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
+               bool write = (may_flags & NFSD_MAY_WRITE);
+
+               if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
+                   (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
+                       status = nfserrno(nfsd_open_break_lease(
+                                       file_inode(nf->nf_file), may_flags));
+                       if (status == nfs_ok) {
+                               clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+                               if (write)
+                                       clear_bit(NFSD_FILE_BREAK_WRITE,
+                                                 &nf->nf_flags);
+                       }
+               }
+       }
+out:
+       if (status == nfs_ok) {
+               *pnf = nf;
+       } else {
+               nfsd_file_put(nf);
+               nf = NULL;
+       }
+
+       trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
+       return status;
+open_file:
+       nf = new;
+       /* Take reference for the hashtable */
+       atomic_inc(&nf->nf_ref);
+       __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+       __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+       list_lru_add(&nfsd_file_lru, &nf->nf_lru);
+       hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
+       ++nfsd_file_hashtbl[hashval].nfb_count;
+       nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
+                       nfsd_file_hashtbl[hashval].nfb_count);
+       spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+       atomic_long_inc(&nfsd_filecache_count);
+
+       nf->nf_mark = nfsd_file_mark_find_or_create(nf);
+       if (nf->nf_mark)
+               status = nfsd_open_verified(rqstp, fhp, S_IFREG,
+                               may_flags, &nf->nf_file);
+       else
+               status = nfserr_jukebox;
+       /*
+        * If construction failed, or we raced with a call to unlink()
+        * then unhash.
+        */
+       if (status != nfs_ok || inode->i_nlink == 0) {
+               bool do_free;
+               spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+               do_free = nfsd_file_unhash(nf);
+               spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+               if (do_free)
+                       nfsd_file_put_noref(nf);
+       }
+       clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
+       smp_mb__after_atomic();
+       wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
+       goto out;
+}
+
+/*
+ * Note that fields may be added, removed or reordered in the future. Programs
+ * scraping this file for info should test the labels to ensure they're
+ * getting the correct field.
+ */
+static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+{
+       unsigned int i, count = 0, longest = 0;
+       unsigned long hits = 0;
+
+       /*
+        * No need for spinlocks here since we're not terribly interested in
+        * accuracy. We do take the nfsd_mutex simply to ensure that we
+        * don't end up racing with server shutdown
+        */
+       mutex_lock(&nfsd_mutex);
+       if (nfsd_file_hashtbl) {
+               for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+                       count += nfsd_file_hashtbl[i].nfb_count;
+                       longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
+               }
+       }
+       mutex_unlock(&nfsd_mutex);
+
+       for_each_possible_cpu(i)
+               hits += per_cpu(nfsd_file_cache_hits, i);
+
+       seq_printf(m, "total entries: %u\n", count);
+       seq_printf(m, "longest chain: %u\n", longest);
+       seq_printf(m, "cache hits:    %lu\n", hits);
+       return 0;
+}
+
+int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, nfsd_file_cache_stats_show, NULL);
+}
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
new file mode 100644 (file)
index 0000000..851d9ab
--- /dev/null
@@ -0,0 +1,61 @@
+#ifndef _FS_NFSD_FILECACHE_H
+#define _FS_NFSD_FILECACHE_H
+
+#include <linux/fsnotify_backend.h>
+
+/*
+ * This is the fsnotify_mark container that nfsd attaches to the files that it
+ * is holding open. Note that we have a separate refcount here aside from the
+ * one in the fsnotify_mark. We only want a single fsnotify_mark attached to
+ * the inode, and for each nfsd_file to hold a reference to it.
+ *
+ * The fsnotify_mark is itself refcounted, but that's not sufficient to tell us
+ * how to put that reference. If there are still outstanding nfsd_files that
+ * reference the mark, then we would want to call fsnotify_put_mark on it.
+ * If there were not, then we'd need to call fsnotify_destroy_mark. Since we
+ * can't really tell the difference, we use the nfm_mark to keep track of how
+ * many nfsd_files hold references to the mark. When that counter goes to zero
+ * then we know to call fsnotify_destroy_mark on it.
+ */
+struct nfsd_file_mark {
+       struct fsnotify_mark    nfm_mark;
+       atomic_t                nfm_ref;
+};
+
+/*
+ * A representation of a file that has been opened by knfsd. These are hashed
+ * in the hashtable by inode pointer value. Note that this object doesn't
+ * hold a reference to the inode by itself, so the nf_inode pointer should
+ * never be dereferenced, only used for comparison.
+ */
+struct nfsd_file {
+       struct hlist_node       nf_node;
+       struct list_head        nf_lru;
+       struct rcu_head         nf_rcu;
+       struct file             *nf_file;
+       const struct cred       *nf_cred;
+       struct net              *nf_net;
+#define NFSD_FILE_HASHED       (0)
+#define NFSD_FILE_PENDING      (1)
+#define NFSD_FILE_BREAK_READ   (2)
+#define NFSD_FILE_BREAK_WRITE  (3)
+#define NFSD_FILE_REFERENCED   (4)
+       unsigned long           nf_flags;
+       struct inode            *nf_inode;
+       unsigned int            nf_hashval;
+       atomic_t                nf_ref;
+       unsigned char           nf_may;
+       struct nfsd_file_mark   *nf_mark;
+};
+
+int nfsd_file_cache_init(void);
+void nfsd_file_cache_purge(struct net *);
+void nfsd_file_cache_shutdown(void);
+void nfsd_file_put(struct nfsd_file *nf);
+struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
+void nfsd_file_close_inode_sync(struct inode *inode);
+bool nfsd_file_is_cached(struct inode *inode);
+__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+                 unsigned int may_flags, struct nfsd_file **nfp);
+int    nfsd_file_cache_stats_open(struct inode *, struct file *);
+#endif /* _FS_NFSD_FILECACHE_H */
index bdfe5bc..9a4ef81 100644 (file)
@@ -104,6 +104,7 @@ struct nfsd_net {
 
        /* Time of server startup */
        struct timespec64 nfssvc_boot;
+       seqlock_t boot_lock;
 
        /*
         * Max number of connections this nfsd container will allow. Defaults
@@ -179,4 +180,7 @@ struct nfsd_net {
 extern void nfsd_netns_free_versions(struct nfsd_net *nn);
 
 extern unsigned int nfsd_net_id;
+
+void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
+void nfsd_reset_boot_verifier(struct nfsd_net *nn);
 #endif /* __NFSD_NETNS_H__ */
index 9bc32af..cea68d8 100644 (file)
@@ -172,13 +172,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
        nfserr = nfsd_read(rqstp, &resp->fh,
                                  argp->offset,
                                  rqstp->rq_vec, argp->vlen,
-                                 &resp->count);
-       if (nfserr == 0) {
-               struct inode    *inode = d_inode(resp->fh.fh_dentry);
-               resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
-                                                       inode->i_size);
-       }
-
+                                 &resp->count,
+                                 &resp->eof);
        RETURN_STATUS(nfserr);
 }
 
index fcf3182..86e5658 100644 (file)
@@ -27,6 +27,7 @@ static u32    nfs3_ftypes[] = {
        NF3SOCK, NF3BAD,  NF3LNK, NF3BAD,
 };
 
+
 /*
  * XDR functions for basic NFS types
  */
@@ -751,14 +752,16 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
 {
        struct nfsd3_writeres *resp = rqstp->rq_resp;
        struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+       __be32 verf[2];
 
        p = encode_wcc_data(rqstp, p, &resp->fh);
        if (resp->status == 0) {
                *p++ = htonl(resp->count);
                *p++ = htonl(resp->committed);
                /* unique identifier, y2038 overflow can be ignored */
-               *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
-               *p++ = htonl(nn->nfssvc_boot.tv_nsec);
+               nfsd_copy_boot_verifier(verf, nn);
+               *p++ = verf[0];
+               *p++ = verf[1];
        }
        return xdr_ressize_check(rqstp, p);
 }
@@ -1125,13 +1128,15 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
 {
        struct nfsd3_commitres *resp = rqstp->rq_resp;
        struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+       __be32 verf[2];
 
        p = encode_wcc_data(rqstp, p, &resp->fh);
        /* Write verifier */
        if (resp->status == 0) {
                /* unique identifier, y2038 overflow can be ignored */
-               *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
-               *p++ = htonl(nn->nfssvc_boot.tv_nsec);
+               nfsd_copy_boot_verifier(verf, nn);
+               *p++ = verf[0];
+               *p++ = verf[1];
        }
        return xdr_ressize_check(rqstp, p);
 }
index 397eb78..5241114 100644 (file)
@@ -512,11 +512,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
        if (unlikely(status))
                return status;
 
-       if (cb != NULL) {
-               status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status || cb->cb_seq_status))
-                       return status;
-       }
+       status = decode_cb_sequence4res(xdr, cb);
+       if (unlikely(status || cb->cb_seq_status))
+               return status;
 
        return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
 }
@@ -604,11 +602,10 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
        if (unlikely(status))
                return status;
 
-       if (cb) {
-               status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status || cb->cb_seq_status))
-                       return status;
-       }
+       status = decode_cb_sequence4res(xdr, cb);
+       if (unlikely(status || cb->cb_seq_status))
+               return status;
+
        return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
 }
 #endif /* CONFIG_NFSD_PNFS */
@@ -663,11 +660,10 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
        if (unlikely(status))
                return status;
 
-       if (cb) {
-               status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status || cb->cb_seq_status))
-                       return status;
-       }
+       status = decode_cb_sequence4res(xdr, cb);
+       if (unlikely(status || cb->cb_seq_status))
+               return status;
+
        return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status);
 }
 
@@ -759,11 +755,10 @@ static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
        if (unlikely(status))
                return status;
 
-       if (cb) {
-               status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status || cb->cb_seq_status))
-                       return status;
-       }
+       status = decode_cb_sequence4res(xdr, cb);
+       if (unlikely(status || cb->cb_seq_status))
+               return status;
+
        return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status);
 }
 /*
index a79e24b..2681c70 100644 (file)
@@ -169,8 +169,8 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
        spin_unlock(&fp->fi_lock);
 
        if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
-               vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls);
-       fput(ls->ls_file);
+               vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
+       nfsd_file_put(ls->ls_file);
 
        if (ls->ls_recalled)
                atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
@@ -197,7 +197,7 @@ nfsd4_layout_setlease(struct nfs4_layout_stateid *ls)
        fl->fl_end = OFFSET_MAX;
        fl->fl_owner = ls;
        fl->fl_pid = current->tgid;
-       fl->fl_file = ls->ls_file;
+       fl->fl_file = ls->ls_file->nf_file;
 
        status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL);
        if (status) {
@@ -236,13 +236,13 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
                        NFSPROC4_CLNT_CB_LAYOUT);
 
        if (parent->sc_type == NFS4_DELEG_STID)
-               ls->ls_file = get_file(fp->fi_deleg_file);
+               ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
        else
                ls->ls_file = find_any_file(fp);
        BUG_ON(!ls->ls_file);
 
        if (nfsd4_layout_setlease(ls)) {
-               fput(ls->ls_file);
+               nfsd_file_put(ls->ls_file);
                put_nfs4_file(fp);
                kmem_cache_free(nfs4_layout_stateid_cache, ls);
                return NULL;
@@ -626,7 +626,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
 
        argv[0] = (char *)nfsd_recall_failed;
        argv[1] = addr_str;
-       argv[2] = ls->ls_file->f_path.mnt->mnt_sb->s_id;
+       argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
        argv[3] = NULL;
 
        error = call_usermodehelper(nfsd_recall_failed, argv, envp,
index 8beda99..4e3e77b 100644 (file)
@@ -568,17 +568,11 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
 {
-       __be32 verf[2];
-       struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+       __be32 *verf = (__be32 *)verifier->data;
 
-       /*
-        * This is opaque to client, so no need to byte-swap. Use
-        * __force to keep sparse happy. y2038 time_t overflow is
-        * irrelevant in this usage.
-        */
-       verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
-       verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
-       memcpy(verifier->data, verf, sizeof(verifier->data));
+       BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
+
+       nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id));
 }
 
 static __be32
@@ -761,7 +755,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        struct nfsd4_read *read = &u->read;
        __be32 status;
 
-       read->rd_filp = NULL;
+       read->rd_nf = NULL;
        if (read->rd_offset >= OFFSET_MAX)
                return nfserr_inval;
 
@@ -782,7 +776,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        /* check stateid */
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
                                        &read->rd_stateid, RD_STATE,
-                                       &read->rd_filp, &read->rd_tmp_file);
+                                       &read->rd_nf);
        if (status) {
                dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
                goto out;
@@ -798,8 +792,8 @@ out:
 static void
 nfsd4_read_release(union nfsd4_op_u *u)
 {
-       if (u->read.rd_filp)
-               fput(u->read.rd_filp);
+       if (u->read.rd_nf)
+               nfsd_file_put(u->read.rd_nf);
        trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
                             u->read.rd_offset, u->read.rd_length);
 }
@@ -954,7 +948,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
                status = nfs4_preprocess_stateid_op(rqstp, cstate,
                                &cstate->current_fh, &setattr->sa_stateid,
-                               WR_STATE, NULL, NULL);
+                               WR_STATE, NULL);
                if (status) {
                        dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
                        return status;
@@ -993,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
        struct nfsd4_write *write = &u->write;
        stateid_t *stateid = &write->wr_stateid;
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
        __be32 status = nfs_ok;
        unsigned long cnt;
        int nvecs;
@@ -1005,7 +999,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        trace_nfsd_write_start(rqstp, &cstate->current_fh,
                               write->wr_offset, cnt);
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
-                                               stateid, WR_STATE, &filp, NULL);
+                                               stateid, WR_STATE, &nf);
        if (status) {
                dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
                return status;
@@ -1018,10 +1012,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                      &write->wr_head, write->wr_buflen);
        WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
 
-       status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
+       status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf->nf_file,
                                write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
                                write->wr_how_written);
-       fput(filp);
+       nfsd_file_put(nf);
 
        write->wr_bytes_written = cnt;
        trace_nfsd_write_done(rqstp, &cstate->current_fh,
@@ -1031,8 +1025,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-                 stateid_t *src_stateid, struct file **src,
-                 stateid_t *dst_stateid, struct file **dst)
+                 stateid_t *src_stateid, struct nfsd_file **src,
+                 stateid_t *dst_stateid, struct nfsd_file **dst)
 {
        __be32 status;
 
@@ -1040,22 +1034,22 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                return nfserr_nofilehandle;
 
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
-                                           src_stateid, RD_STATE, src, NULL);
+                                           src_stateid, RD_STATE, src);
        if (status) {
                dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
                goto out;
        }
 
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
-                                           dst_stateid, WR_STATE, dst, NULL);
+                                           dst_stateid, WR_STATE, dst);
        if (status) {
                dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
                goto out_put_src;
        }
 
        /* fix up for NFS-specific error code */
-       if (!S_ISREG(file_inode(*src)->i_mode) ||
-           !S_ISREG(file_inode(*dst)->i_mode)) {
+       if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) ||
+           !S_ISREG(file_inode((*dst)->nf_file)->i_mode)) {
                status = nfserr_wrong_type;
                goto out_put_dst;
        }
@@ -1063,9 +1057,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 out:
        return status;
 out_put_dst:
-       fput(*dst);
+       nfsd_file_put(*dst);
 out_put_src:
-       fput(*src);
+       nfsd_file_put(*src);
        goto out;
 }
 
@@ -1074,7 +1068,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                union nfsd4_op_u *u)
 {
        struct nfsd4_clone *clone = &u->clone;
-       struct file *src, *dst;
+       struct nfsd_file *src, *dst;
        __be32 status;
 
        status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
@@ -1082,11 +1076,11 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (status)
                goto out;
 
-       status = nfsd4_clone_file_range(src, clone->cl_src_pos,
-                       dst, clone->cl_dst_pos, clone->cl_count);
+       status = nfsd4_clone_file_range(src->nf_file, clone->cl_src_pos,
+                       dst->nf_file, clone->cl_dst_pos, clone->cl_count);
 
-       fput(dst);
-       fput(src);
+       nfsd_file_put(dst);
+       nfsd_file_put(src);
 out:
        return status;
 }
@@ -1176,8 +1170,9 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
        do {
                if (kthread_should_stop())
                        break;
-               bytes_copied = nfsd_copy_file_range(copy->file_src, src_pos,
-                               copy->file_dst, dst_pos, bytes_total);
+               bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
+                               src_pos, copy->nf_dst->nf_file, dst_pos,
+                               bytes_total);
                if (bytes_copied <= 0)
                        break;
                bytes_total -= bytes_copied;
@@ -1204,8 +1199,8 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
                status = nfs_ok;
        }
 
-       fput(copy->file_src);
-       fput(copy->file_dst);
+       nfsd_file_put(copy->nf_src);
+       nfsd_file_put(copy->nf_dst);
        return status;
 }
 
@@ -1218,16 +1213,16 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
        memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res));
        memcpy(&dst->fh, &src->fh, sizeof(src->fh));
        dst->cp_clp = src->cp_clp;
-       dst->file_dst = get_file(src->file_dst);
-       dst->file_src = get_file(src->file_src);
+       dst->nf_dst = nfsd_file_get(src->nf_dst);
+       dst->nf_src = nfsd_file_get(src->nf_src);
        memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid));
 }
 
 static void cleanup_async_copy(struct nfsd4_copy *copy)
 {
        nfs4_free_cp_state(copy);
-       fput(copy->file_dst);
-       fput(copy->file_src);
+       nfsd_file_put(copy->nf_dst);
+       nfsd_file_put(copy->nf_src);
        spin_lock(&copy->cp_clp->async_lock);
        list_del(&copy->copies);
        spin_unlock(&copy->cp_clp->async_lock);
@@ -1264,8 +1259,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        struct nfsd4_copy *async_copy = NULL;
 
        status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
-                                  &copy->file_src, &copy->cp_dst_stateid,
-                                  &copy->file_dst);
+                                  &copy->nf_src, &copy->cp_dst_stateid,
+                                  &copy->nf_dst);
        if (status)
                goto out;
 
@@ -1347,21 +1342,21 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                struct nfsd4_fallocate *fallocate, int flags)
 {
        __be32 status;
-       struct file *file;
+       struct nfsd_file *nf;
 
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
                                            &fallocate->falloc_stateid,
-                                           WR_STATE, &file, NULL);
+                                           WR_STATE, &nf);
        if (status != nfs_ok) {
                dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
                return status;
        }
 
-       status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
+       status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file,
                                     fallocate->falloc_offset,
                                     fallocate->falloc_length,
                                     flags);
-       fput(file);
+       nfsd_file_put(nf);
        return status;
 }
 static __be32
@@ -1406,11 +1401,11 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        struct nfsd4_seek *seek = &u->seek;
        int whence;
        __be32 status;
-       struct file *file;
+       struct nfsd_file *nf;
 
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
                                            &seek->seek_stateid,
-                                           RD_STATE, &file, NULL);
+                                           RD_STATE, &nf);
        if (status) {
                dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
                return status;
@@ -1432,14 +1427,14 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
         * Note:  This call does change file->f_pos, but nothing in NFSD
         *        should ever file->f_pos.
         */
-       seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence);
+       seek->seek_pos = vfs_llseek(nf->nf_file, seek->seek_offset, whence);
        if (seek->seek_pos < 0)
                status = nfserrno(seek->seek_pos);
-       else if (seek->seek_pos >= i_size_read(file_inode(file)))
+       else if (seek->seek_pos >= i_size_read(file_inode(nf->nf_file)))
                seek->seek_eof = true;
 
 out:
-       fput(file);
+       nfsd_file_put(nf);
        return status;
 }
 
index 8767955..cdc75ad 100644 (file)
@@ -59,8 +59,13 @@ struct nfsd4_client_tracking_ops {
        void (*remove)(struct nfs4_client *);
        int (*check)(struct nfs4_client *);
        void (*grace_done)(struct nfsd_net *);
+       uint8_t version;
+       size_t msglen;
 };
 
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops;
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2;
+
 /* Globals */
 static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
 
@@ -173,6 +178,7 @@ __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
                const char *dname, int len, struct nfsd_net *nn)
 {
        struct xdr_netobj name;
+       struct xdr_netobj princhash = { .len = 0, .data = NULL };
        struct nfs4_client_reclaim *crp;
 
        name.data = kmemdup(dname, len, GFP_KERNEL);
@@ -182,7 +188,7 @@ __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
                return;
        }
        name.len = len;
-       crp = nfs4_client_to_reclaim(name, nn);
+       crp = nfs4_client_to_reclaim(name, princhash, nn);
        if (!crp) {
                kfree(name.data);
                return;
@@ -482,6 +488,7 @@ static int
 load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 {
        struct xdr_netobj name;
+       struct xdr_netobj princhash = { .len = 0, .data = NULL };
 
        if (child->d_name.len != HEXDIR_LEN - 1) {
                printk("%s: illegal name %pd in recovery directory\n",
@@ -496,7 +503,7 @@ load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
                goto out;
        }
        name.len = HEXDIR_LEN;
-       if (!nfs4_client_to_reclaim(name, nn))
+       if (!nfs4_client_to_reclaim(name, princhash, nn))
                kfree(name.data);
 out:
        return 0;
@@ -718,6 +725,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
        .remove         = nfsd4_remove_clid_dir,
        .check          = nfsd4_check_legacy_client,
        .grace_done     = nfsd4_recdir_purge_old,
+       .version        = 1,
+       .msglen         = 0,
 };
 
 /* Globals */
@@ -731,25 +740,32 @@ struct cld_net {
        struct list_head         cn_list;
        unsigned int             cn_xid;
        bool                     cn_has_legacy;
+       struct crypto_shash     *cn_tfm;
 };
 
 struct cld_upcall {
        struct list_head         cu_list;
        struct cld_net          *cu_net;
        struct completion        cu_done;
-       struct cld_msg           cu_msg;
+       union {
+               struct cld_msg_hdr       cu_hdr;
+               struct cld_msg           cu_msg;
+               struct cld_msg_v2        cu_msg_v2;
+       } cu_u;
 };
 
 static int
-__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
 {
        int ret;
        struct rpc_pipe_msg msg;
-       struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg);
+       struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u);
+       struct nfsd_net *nn = net_generic(pipe->dentry->d_sb->s_fs_info,
+                                         nfsd_net_id);
 
        memset(&msg, 0, sizeof(msg));
        msg.data = cmsg;
-       msg.len = sizeof(*cmsg);
+       msg.len = nn->client_tracking_ops->msglen;
 
        ret = rpc_queue_upcall(pipe, &msg);
        if (ret < 0) {
@@ -765,7 +781,7 @@ out:
 }
 
 static int
-cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
 {
        int ret;
 
@@ -781,11 +797,11 @@ cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
 }
 
 static ssize_t
-__cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
+__cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
                struct nfsd_net *nn)
 {
-       uint8_t cmd;
-       struct xdr_netobj name;
+       uint8_t cmd, princhashlen;
+       struct xdr_netobj name, princhash = { .len = 0, .data = NULL };
        uint16_t namelen;
        struct cld_net *cn = nn->cld_net;
 
@@ -794,22 +810,48 @@ __cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
                return -EFAULT;
        }
        if (cmd == Cld_GraceStart) {
-               if (get_user(namelen, &cmsg->cm_u.cm_name.cn_len))
-                       return -EFAULT;
-               name.data = memdup_user(&cmsg->cm_u.cm_name.cn_id, namelen);
-               if (IS_ERR_OR_NULL(name.data))
-                       return -EFAULT;
-               name.len = namelen;
+               if (nn->client_tracking_ops->version >= 2) {
+                       const struct cld_clntinfo __user *ci;
+
+                       ci = &cmsg->cm_u.cm_clntinfo;
+                       if (get_user(namelen, &ci->cc_name.cn_len))
+                               return -EFAULT;
+                       name.data = memdup_user(&ci->cc_name.cn_id, namelen);
+                       if (IS_ERR_OR_NULL(name.data))
+                               return -EFAULT;
+                       name.len = namelen;
+                       get_user(princhashlen, &ci->cc_princhash.cp_len);
+                       if (princhashlen > 0) {
+                               princhash.data = memdup_user(
+                                               &ci->cc_princhash.cp_data,
+                                               princhashlen);
+                               if (IS_ERR_OR_NULL(princhash.data))
+                                       return -EFAULT;
+                               princhash.len = princhashlen;
+                       } else
+                               princhash.len = 0;
+               } else {
+                       const struct cld_name __user *cnm;
+
+                       cnm = &cmsg->cm_u.cm_name;
+                       if (get_user(namelen, &cnm->cn_len))
+                               return -EFAULT;
+                       name.data = memdup_user(&cnm->cn_id, namelen);
+                       if (IS_ERR_OR_NULL(name.data))
+                               return -EFAULT;
+                       name.len = namelen;
+               }
                if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
                        name.len = name.len - 5;
                        memmove(name.data, name.data + 5, name.len);
                        cn->cn_has_legacy = true;
                }
-               if (!nfs4_client_to_reclaim(name, nn)) {
+               if (!nfs4_client_to_reclaim(name, princhash, nn)) {
                        kfree(name.data);
+                       kfree(princhash.data);
                        return -EFAULT;
                }
-               return sizeof(*cmsg);
+               return nn->client_tracking_ops->msglen;
        }
        return -EFAULT;
 }
@@ -818,21 +860,22 @@ static ssize_t
 cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 {
        struct cld_upcall *tmp, *cup;
-       struct cld_msg __user *cmsg = (struct cld_msg __user *)src;
+       struct cld_msg_hdr __user *hdr = (struct cld_msg_hdr __user *)src;
+       struct cld_msg_v2 __user *cmsg = (struct cld_msg_v2 __user *)src;
        uint32_t xid;
        struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
                                                nfsd_net_id);
        struct cld_net *cn = nn->cld_net;
        int16_t status;
 
-       if (mlen != sizeof(*cmsg)) {
+       if (mlen != nn->client_tracking_ops->msglen) {
                dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
-                       sizeof(*cmsg));
+                       nn->client_tracking_ops->msglen);
                return -EINVAL;
        }
 
        /* copy just the xid so we can try to find that */
-       if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) {
+       if (copy_from_user(&xid, &hdr->cm_xid, sizeof(xid)) != 0) {
                dprintk("%s: error when copying xid from userspace", __func__);
                return -EFAULT;
        }
@@ -842,7 +885,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
         * list (for -EINPROGRESS, we just want to make sure the xid is
         * valid, not remove the upcall from the list)
         */
-       if (get_user(status, &cmsg->cm_status)) {
+       if (get_user(status, &hdr->cm_status)) {
                dprintk("%s: error when copying status from userspace", __func__);
                return -EFAULT;
        }
@@ -851,7 +894,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
        cup = NULL;
        spin_lock(&cn->cn_lock);
        list_for_each_entry(tmp, &cn->cn_list, cu_list) {
-               if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
+               if (get_unaligned(&tmp->cu_u.cu_hdr.cm_xid) == xid) {
                        cup = tmp;
                        if (status != -EINPROGRESS)
                                list_del_init(&cup->cu_list);
@@ -869,7 +912,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
        if (status == -EINPROGRESS)
                return __cld_pipe_inprogress_downcall(cmsg, nn);
 
-       if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
+       if (copy_from_user(&cup->cu_u.cu_msg_v2, src, mlen) != 0)
                return -EFAULT;
 
        complete(&cup->cu_done);
@@ -881,7 +924,7 @@ cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 {
        struct cld_msg *cmsg = msg->data;
        struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
-                                                cu_msg);
+                                                cu_u.cu_msg);
 
        /* errno >= 0 means we got a downcall */
        if (msg->errno >= 0)
@@ -1007,14 +1050,17 @@ nfsd4_remove_cld_pipe(struct net *net)
 
        nfsd4_cld_unregister_net(net, cn->cn_pipe);
        rpc_destroy_pipe_data(cn->cn_pipe);
+       if (cn->cn_tfm)
+               crypto_free_shash(cn->cn_tfm);
        kfree(nn->cld_net);
        nn->cld_net = NULL;
 }
 
 static struct cld_upcall *
-alloc_cld_upcall(struct cld_net *cn)
+alloc_cld_upcall(struct nfsd_net *nn)
 {
        struct cld_upcall *new, *tmp;
+       struct cld_net *cn = nn->cld_net;
 
        new = kzalloc(sizeof(*new), GFP_KERNEL);
        if (!new)
@@ -1024,20 +1070,20 @@ alloc_cld_upcall(struct cld_net *cn)
 restart_search:
        spin_lock(&cn->cn_lock);
        list_for_each_entry(tmp, &cn->cn_list, cu_list) {
-               if (tmp->cu_msg.cm_xid == cn->cn_xid) {
+               if (tmp->cu_u.cu_msg.cm_xid == cn->cn_xid) {
                        cn->cn_xid++;
                        spin_unlock(&cn->cn_lock);
                        goto restart_search;
                }
        }
        init_completion(&new->cu_done);
-       new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
-       put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid);
+       new->cu_u.cu_msg.cm_vers = nn->client_tracking_ops->version;
+       put_unaligned(cn->cn_xid++, &new->cu_u.cu_msg.cm_xid);
        new->cu_net = cn;
        list_add(&new->cu_list, &cn->cn_list);
        spin_unlock(&cn->cn_lock);
 
-       dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid);
+       dprintk("%s: allocated xid %u\n", __func__, new->cu_u.cu_msg.cm_xid);
 
        return new;
 }
@@ -1066,20 +1112,20 @@ nfsd4_cld_create(struct nfs4_client *clp)
        if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
                return;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_Create;
-       cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
-       memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+       cup->cu_u.cu_msg.cm_cmd = Cld_Create;
+       cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+       memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
                        clp->cl_name.len);
 
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret) {
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
                set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
        }
 
@@ -1090,6 +1136,75 @@ out_err:
                                "record on stable storage: %d\n", ret);
 }
 
+/* Ask daemon to create a new record */
+static void
+nfsd4_cld_create_v2(struct nfs4_client *clp)
+{
+       int ret;
+       struct cld_upcall *cup;
+       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+       struct cld_net *cn = nn->cld_net;
+       struct cld_msg_v2 *cmsg;
+       struct crypto_shash *tfm = cn->cn_tfm;
+       struct xdr_netobj cksum;
+       char *principal = NULL;
+       SHASH_DESC_ON_STACK(desc, tfm);
+
+       /* Don't upcall if it's already stored */
+       if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+               return;
+
+       cup = alloc_cld_upcall(nn);
+       if (!cup) {
+               ret = -ENOMEM;
+               goto out_err;
+       }
+
+       cmsg = &cup->cu_u.cu_msg_v2;
+       cmsg->cm_cmd = Cld_Create;
+       cmsg->cm_u.cm_clntinfo.cc_name.cn_len = clp->cl_name.len;
+       memcpy(cmsg->cm_u.cm_clntinfo.cc_name.cn_id, clp->cl_name.data,
+                       clp->cl_name.len);
+       if (clp->cl_cred.cr_raw_principal)
+               principal = clp->cl_cred.cr_raw_principal;
+       else if (clp->cl_cred.cr_principal)
+               principal = clp->cl_cred.cr_principal;
+       if (principal) {
+               desc->tfm = tfm;
+               cksum.len = crypto_shash_digestsize(tfm);
+               cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+               if (cksum.data == NULL) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+               ret = crypto_shash_digest(desc, principal, strlen(principal),
+                                         cksum.data);
+               shash_desc_zero(desc);
+               if (ret) {
+                       kfree(cksum.data);
+                       goto out;
+               }
+               cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len;
+               memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
+                      cksum.data, cksum.len);
+               kfree(cksum.data);
+       } else
+               cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0;
+
+       ret = cld_pipe_upcall(cn->cn_pipe, cmsg);
+       if (!ret) {
+               ret = cmsg->cm_status;
+               set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+       }
+
+out:
+       free_cld_upcall(cup);
+out_err:
+       if (ret)
+               pr_err("NFSD: Unable to create client record on stable storage: %d\n",
+                               ret);
+}
+
 /* Ask daemon to create a new record */
 static void
 nfsd4_cld_remove(struct nfs4_client *clp)
@@ -1103,20 +1218,20 @@ nfsd4_cld_remove(struct nfs4_client *clp)
        if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
                return;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_Remove;
-       cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
-       memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+       cup->cu_u.cu_msg.cm_cmd = Cld_Remove;
+       cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+       memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
                        clp->cl_name.len);
 
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret) {
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
                clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
        }
 
@@ -1145,21 +1260,21 @@ nfsd4_cld_check_v0(struct nfs4_client *clp)
        if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
                return 0;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                printk(KERN_ERR "NFSD: Unable to check client record on "
                                "stable storage: %d\n", -ENOMEM);
                return -ENOMEM;
        }
 
-       cup->cu_msg.cm_cmd = Cld_Check;
-       cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
-       memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+       cup->cu_u.cu_msg.cm_cmd = Cld_Check;
+       cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+       memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
                        clp->cl_name.len);
 
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret) {
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
                set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
        }
 
@@ -1216,6 +1331,79 @@ found:
        return 0;
 }
 
+static int
+nfsd4_cld_check_v2(struct nfs4_client *clp)
+{
+       struct nfs4_client_reclaim *crp;
+       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+       struct cld_net *cn = nn->cld_net;
+       int status;
+       char dname[HEXDIR_LEN];
+       struct xdr_netobj name;
+       struct crypto_shash *tfm = cn->cn_tfm;
+       struct xdr_netobj cksum;
+       char *principal = NULL;
+       SHASH_DESC_ON_STACK(desc, tfm);
+
+       /* did we already find that this client is stable? */
+       if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+               return 0;
+
+       /* look for it in the reclaim hashtable otherwise */
+       crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
+       if (crp)
+               goto found;
+
+       if (cn->cn_has_legacy) {
+               status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+               if (status)
+                       return -ENOENT;
+
+               name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+               if (!name.data) {
+                       dprintk("%s: failed to allocate memory for name.data\n",
+                                       __func__);
+                       return -ENOENT;
+               }
+               name.len = HEXDIR_LEN;
+               crp = nfsd4_find_reclaim_client(name, nn);
+               kfree(name.data);
+               if (crp)
+                       goto found;
+
+       }
+       return -ENOENT;
+found:
+       if (crp->cr_princhash.len) {
+               if (clp->cl_cred.cr_raw_principal)
+                       principal = clp->cl_cred.cr_raw_principal;
+               else if (clp->cl_cred.cr_principal)
+                       principal = clp->cl_cred.cr_principal;
+               if (principal == NULL)
+                       return -ENOENT;
+               desc->tfm = tfm;
+               cksum.len = crypto_shash_digestsize(tfm);
+               cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+               if (cksum.data == NULL)
+                       return -ENOENT;
+               status = crypto_shash_digest(desc, principal, strlen(principal),
+                                            cksum.data);
+               shash_desc_zero(desc);
+               if (status) {
+                       kfree(cksum.data);
+                       return -ENOENT;
+               }
+               if (memcmp(crp->cr_princhash.data, cksum.data,
+                               crp->cr_princhash.len)) {
+                       kfree(cksum.data);
+                       return -ENOENT;
+               }
+               kfree(cksum.data);
+       }
+       crp->cr_clp = clp;
+       return 0;
+}
+
 static int
 nfsd4_cld_grace_start(struct nfsd_net *nn)
 {
@@ -1223,16 +1411,16 @@ nfsd4_cld_grace_start(struct nfsd_net *nn)
        struct cld_upcall *cup;
        struct cld_net *cn = nn->cld_net;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_GraceStart;
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart;
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret)
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
 
        free_cld_upcall(cup);
 out_err:
@@ -1250,17 +1438,17 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
        struct cld_upcall *cup;
        struct cld_net *cn = nn->cld_net;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_GraceDone;
-       cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+       cup->cu_u.cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret)
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
 
        free_cld_upcall(cup);
 out_err:
@@ -1279,16 +1467,16 @@ nfsd4_cld_grace_done(struct nfsd_net *nn)
        struct cld_upcall *cup;
        struct cld_net *cn = nn->cld_net;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_GraceDone;
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret)
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
 
        free_cld_upcall(cup);
 out_err:
@@ -1336,6 +1524,53 @@ cld_running(struct nfsd_net *nn)
        return pipe->nreaders || pipe->nwriters;
 }
 
+static int
+nfsd4_cld_get_version(struct nfsd_net *nn)
+{
+       int ret = 0;
+       struct cld_upcall *cup;
+       struct cld_net *cn = nn->cld_net;
+       uint8_t version;
+
+       cup = alloc_cld_upcall(nn);
+       if (!cup) {
+               ret = -ENOMEM;
+               goto out_err;
+       }
+       cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion;
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
+       if (!ret) {
+               ret = cup->cu_u.cu_msg.cm_status;
+               if (ret)
+                       goto out_free;
+               version = cup->cu_u.cu_msg.cm_u.cm_version;
+               dprintk("%s: userspace returned version %u\n",
+                               __func__, version);
+               if (version < 1)
+                       version = 1;
+               else if (version > CLD_UPCALL_VERSION)
+                       version = CLD_UPCALL_VERSION;
+
+               switch (version) {
+               case 1:
+                       nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
+                       break;
+               case 2:
+                       nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v2;
+                       break;
+               default:
+                       break;
+               }
+       }
+out_free:
+       free_cld_upcall(cup);
+out_err:
+       if (ret)
+               dprintk("%s: Unable to get version from userspace: %d\n",
+                       __func__, ret);
+       return ret;
+}
+
 static int
 nfsd4_cld_tracking_init(struct net *net)
 {
@@ -1351,6 +1586,11 @@ nfsd4_cld_tracking_init(struct net *net)
        status = __nfsd4_init_cld_pipe(net);
        if (status)
                goto err_shutdown;
+       nn->cld_net->cn_tfm = crypto_alloc_shash("sha256", 0, 0);
+       if (IS_ERR(nn->cld_net->cn_tfm)) {
+               status = PTR_ERR(nn->cld_net->cn_tfm);
+               goto err_remove;
+       }
 
        /*
         * rpc pipe upcalls take 30 seconds to time out, so we don't want to
@@ -1368,10 +1608,14 @@ nfsd4_cld_tracking_init(struct net *net)
                goto err_remove;
        }
 
+       status = nfsd4_cld_get_version(nn);
+       if (status == -EOPNOTSUPP)
+               pr_warn("NFSD: nfsdcld GetVersion upcall failed. Please upgrade nfsdcld.\n");
+
        status = nfsd4_cld_grace_start(nn);
        if (status) {
                if (status == -EOPNOTSUPP)
-                       printk(KERN_WARNING "NFSD: Please upgrade nfsdcld.\n");
+                       pr_warn("NFSD: nfsdcld GraceStart upcall failed. Please upgrade nfsdcld.\n");
                nfs4_release_reclaim(nn);
                goto err_remove;
        } else
@@ -1403,6 +1647,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = {
        .remove         = nfsd4_cld_remove,
        .check          = nfsd4_cld_check_v0,
        .grace_done     = nfsd4_cld_grace_done_v0,
+       .version        = 1,
+       .msglen         = sizeof(struct cld_msg),
 };
 
 /* For newer nfsdcld's */
@@ -1413,6 +1659,20 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
        .remove         = nfsd4_cld_remove,
        .check          = nfsd4_cld_check,
        .grace_done     = nfsd4_cld_grace_done,
+       .version        = 1,
+       .msglen         = sizeof(struct cld_msg),
+};
+
+/* v2 create/check ops include the principal, if available */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2 = {
+       .init           = nfsd4_cld_tracking_init,
+       .exit           = nfsd4_cld_tracking_exit,
+       .create         = nfsd4_cld_create_v2,
+       .remove         = nfsd4_cld_remove,
+       .check          = nfsd4_cld_check_v2,
+       .grace_done     = nfsd4_cld_grace_done,
+       .version        = 2,
+       .msglen         = sizeof(struct cld_msg_v2),
 };
 
 /* upcall via usermodehelper */
@@ -1760,6 +2020,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
        .remove         = nfsd4_umh_cltrack_remove,
        .check          = nfsd4_umh_cltrack_check,
        .grace_done     = nfsd4_umh_cltrack_grace_done,
+       .version        = 1,
+       .msglen         = 0,
 };
 
 int
index 7857942..c65aeaa 100644 (file)
@@ -50,6 +50,7 @@
 
 #include "netns.h"
 #include "pnfs.h"
+#include "filecache.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -429,18 +430,18 @@ put_nfs4_file(struct nfs4_file *fi)
        }
 }
 
-static struct file *
+static struct nfsd_file *
 __nfs4_get_fd(struct nfs4_file *f, int oflag)
 {
        if (f->fi_fds[oflag])
-               return get_file(f->fi_fds[oflag]);
+               return nfsd_file_get(f->fi_fds[oflag]);
        return NULL;
 }
 
-static struct file *
+static struct nfsd_file *
 find_writeable_file_locked(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        lockdep_assert_held(&f->fi_lock);
 
@@ -450,10 +451,10 @@ find_writeable_file_locked(struct nfs4_file *f)
        return ret;
 }
 
-static struct file *
+static struct nfsd_file *
 find_writeable_file(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        spin_lock(&f->fi_lock);
        ret = find_writeable_file_locked(f);
@@ -462,9 +463,10 @@ find_writeable_file(struct nfs4_file *f)
        return ret;
 }
 
-static struct file *find_readable_file_locked(struct nfs4_file *f)
+static struct nfsd_file *
+find_readable_file_locked(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        lockdep_assert_held(&f->fi_lock);
 
@@ -474,10 +476,10 @@ static struct file *find_readable_file_locked(struct nfs4_file *f)
        return ret;
 }
 
-static struct file *
+static struct nfsd_file *
 find_readable_file(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        spin_lock(&f->fi_lock);
        ret = find_readable_file_locked(f);
@@ -486,10 +488,10 @@ find_readable_file(struct nfs4_file *f)
        return ret;
 }
 
-struct file *
+struct nfsd_file *
 find_any_file(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        spin_lock(&f->fi_lock);
        ret = __nfs4_get_fd(f, O_RDWR);
@@ -590,17 +592,17 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
        might_lock(&fp->fi_lock);
 
        if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
-               struct file *f1 = NULL;
-               struct file *f2 = NULL;
+               struct nfsd_file *f1 = NULL;
+               struct nfsd_file *f2 = NULL;
 
                swap(f1, fp->fi_fds[oflag]);
                if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
                        swap(f2, fp->fi_fds[O_RDWR]);
                spin_unlock(&fp->fi_lock);
                if (f1)
-                       fput(f1);
+                       nfsd_file_put(f1);
                if (f2)
-                       fput(f2);
+                       nfsd_file_put(f2);
        }
 }
 
@@ -933,25 +935,25 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
 
 static void put_deleg_file(struct nfs4_file *fp)
 {
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
 
        spin_lock(&fp->fi_lock);
        if (--fp->fi_delegees == 0)
-               swap(filp, fp->fi_deleg_file);
+               swap(nf, fp->fi_deleg_file);
        spin_unlock(&fp->fi_lock);
 
-       if (filp)
-               fput(filp);
+       if (nf)
+               nfsd_file_put(nf);
 }
 
 static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
 {
        struct nfs4_file *fp = dp->dl_stid.sc_file;
-       struct file *filp = fp->fi_deleg_file;
+       struct nfsd_file *nf = fp->fi_deleg_file;
 
        WARN_ON_ONCE(!fp->fi_delegees);
 
-       vfs_setlease(filp, F_UNLCK, NULL, (void **)&dp);
+       vfs_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp);
        put_deleg_file(fp);
 }
 
@@ -1289,11 +1291,14 @@ static void nfs4_free_lock_stateid(struct nfs4_stid *stid)
 {
        struct nfs4_ol_stateid *stp = openlockstateid(stid);
        struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
-       struct file *file;
+       struct nfsd_file *nf;
 
-       file = find_any_file(stp->st_stid.sc_file);
-       if (file)
-               filp_close(file, (fl_owner_t)lo);
+       nf = find_any_file(stp->st_stid.sc_file);
+       if (nf) {
+               get_file(nf->nf_file);
+               filp_close(nf->nf_file, (fl_owner_t)lo);
+               nfsd_file_put(nf);
+       }
        nfs4_free_ol_stateid(stid);
 }
 
@@ -1563,21 +1568,39 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
  * re-negotiate active sessions and reduce their slot usage to make
  * room for new connections. For now we just fail the create session.
  */
-static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
+static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
 {
        u32 slotsize = slot_bytes(ca);
        u32 num = ca->maxreqs;
        unsigned long avail, total_avail;
+       unsigned int scale_factor;
 
        spin_lock(&nfsd_drc_lock);
-       total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
+       if (nfsd_drc_max_mem > nfsd_drc_mem_used)
+               total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
+       else
+               /* We have handed out more space than we chose in
+                * set_max_drc() to allow.  That isn't really a
+                * problem as long as that doesn't make us think we
+                * have lots more due to integer overflow.
+                */
+               total_avail = 0;
        avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
        /*
-        * Never use more than a third of the remaining memory,
-        * unless it's the only way to give this client a slot:
+        * Never use more than a fraction of the remaining memory,
+        * unless it's the only way to give this client a slot.
+        * The chosen fraction is either 1/8 or 1/number of threads,
+        * whichever is smaller.  This ensures there are adequate
+        * slots to support multiple clients per thread.
+        * Give the client one slot even if that would require
+        * over-allocation--it is better than failure.
         */
-       avail = clamp_t(unsigned long, avail, slotsize, total_avail/3);
+       scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
+
+       avail = clamp_t(unsigned long, avail, slotsize,
+                       total_avail/scale_factor);
        num = min_t(int, num, avail / slotsize);
+       num = max_t(int, num, 1);
        nfsd_drc_mem_used += num * slotsize;
        spin_unlock(&nfsd_drc_lock);
 
@@ -2323,9 +2346,9 @@ static void states_stop(struct seq_file *s, void *v)
        spin_unlock(&clp->cl_lock);
 }
 
-static void nfs4_show_superblock(struct seq_file *s, struct file *f)
+static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
 {
-       struct inode *inode = file_inode(f);
+       struct inode *inode = f->nf_inode;
 
        seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
                                        MAJOR(inode->i_sb->s_dev),
@@ -2343,7 +2366,7 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
 {
        struct nfs4_ol_stateid *ols;
        struct nfs4_file *nf;
-       struct file *file;
+       struct nfsd_file *file;
        struct nfs4_stateowner *oo;
        unsigned int access, deny;
 
@@ -2370,7 +2393,7 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
        seq_printf(s, ", ");
        nfs4_show_owner(s, oo);
        seq_printf(s, " }\n");
-       fput(file);
+       nfsd_file_put(file);
 
        return 0;
 }
@@ -2379,7 +2402,7 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
 {
        struct nfs4_ol_stateid *ols;
        struct nfs4_file *nf;
-       struct file *file;
+       struct nfsd_file *file;
        struct nfs4_stateowner *oo;
 
        ols = openlockstateid(st);
@@ -2401,7 +2424,7 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
        seq_printf(s, ", ");
        nfs4_show_owner(s, oo);
        seq_printf(s, " }\n");
-       fput(file);
+       nfsd_file_put(file);
 
        return 0;
 }
@@ -2410,7 +2433,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
 {
        struct nfs4_delegation *ds;
        struct nfs4_file *nf;
-       struct file *file;
+       struct nfsd_file *file;
 
        ds = delegstateid(st);
        nf = st->sc_file;
@@ -2433,7 +2456,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
 static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
 {
        struct nfs4_layout_stateid *ls;
-       struct file *file;
+       struct nfsd_file *file;
 
        ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
        file = ls->ls_file;
@@ -3169,10 +3192,10 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
         * performance.  When short on memory we therefore prefer to
         * decrease number of slots instead of their size.  Clients that
         * request larger slots than they need will get poor results:
+        * Note that we always allow at least one slot, because our
+        * accounting is soft and provides no guarantees either way.
         */
-       ca->maxreqs = nfsd4_get_drc_mem(ca);
-       if (!ca->maxreqs)
-               return nfserr_jukebox;
+       ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
 
        return nfs_ok;
 }
@@ -4651,7 +4674,7 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
                struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
                struct nfsd4_open *open)
 {
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
        __be32 status;
        int oflag = nfs4_access_to_omode(open->op_share_access);
        int access = nfs4_access_to_access(open->op_share_access);
@@ -4687,18 +4710,18 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
 
        if (!fp->fi_fds[oflag]) {
                spin_unlock(&fp->fi_lock);
-               status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
+               status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
                if (status)
                        goto out_put_access;
                spin_lock(&fp->fi_lock);
                if (!fp->fi_fds[oflag]) {
-                       fp->fi_fds[oflag] = filp;
-                       filp = NULL;
+                       fp->fi_fds[oflag] = nf;
+                       nf = NULL;
                }
        }
        spin_unlock(&fp->fi_lock);
-       if (filp)
-               fput(filp);
+       if (nf)
+               nfsd_file_put(nf);
 
        status = nfsd4_truncate(rqstp, cur_fh, open);
        if (status)
@@ -4767,7 +4790,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
        fl->fl_end = OFFSET_MAX;
        fl->fl_owner = (fl_owner_t)dp;
        fl->fl_pid = current->tgid;
-       fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file;
+       fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
        return fl;
 }
 
@@ -4777,7 +4800,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
 {
        int status = 0;
        struct nfs4_delegation *dp;
-       struct file *filp;
+       struct nfsd_file *nf;
        struct file_lock *fl;
 
        /*
@@ -4788,8 +4811,8 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
        if (fp->fi_had_conflict)
                return ERR_PTR(-EAGAIN);
 
-       filp = find_readable_file(fp);
-       if (!filp) {
+       nf = find_readable_file(fp);
+       if (!nf) {
                /* We should always have a readable file here */
                WARN_ON_ONCE(1);
                return ERR_PTR(-EBADF);
@@ -4799,17 +4822,17 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
        if (nfs4_delegation_exists(clp, fp))
                status = -EAGAIN;
        else if (!fp->fi_deleg_file) {
-               fp->fi_deleg_file = filp;
+               fp->fi_deleg_file = nf;
                /* increment early to prevent fi_deleg_file from being
                 * cleared */
                fp->fi_delegees = 1;
-               filp = NULL;
+               nf = NULL;
        } else
                fp->fi_delegees++;
        spin_unlock(&fp->fi_lock);
        spin_unlock(&state_lock);
-       if (filp)
-               fput(filp);
+       if (nf)
+               nfsd_file_put(nf);
        if (status)
                return ERR_PTR(status);
 
@@ -4822,7 +4845,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
        if (!fl)
                goto out_clnt_odstate;
 
-       status = vfs_setlease(fp->fi_deleg_file, fl->fl_type, &fl, NULL);
+       status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL);
        if (fl)
                locks_free_lock(fl);
        if (status)
@@ -4842,7 +4865,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
 
        return dp;
 out_unlock:
-       vfs_setlease(fp->fi_deleg_file, F_UNLCK, NULL, (void **)&dp);
+       vfs_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
 out_clnt_odstate:
        put_clnt_odstate(dp->dl_clnt_odstate);
        nfs4_put_stid(&dp->dl_stid);
@@ -5513,7 +5536,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
        return nfs_ok;
 }
 
-static struct file *
+static struct nfsd_file *
 nfs4_find_file(struct nfs4_stid *s, int flags)
 {
        if (!s)
@@ -5523,7 +5546,7 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
        case NFS4_DELEG_STID:
                if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
                        return NULL;
-               return get_file(s->sc_file->fi_deleg_file);
+               return nfsd_file_get(s->sc_file->fi_deleg_file);
        case NFS4_OPEN_STID:
        case NFS4_LOCK_STID:
                if (flags & RD_STATE)
@@ -5549,32 +5572,28 @@ nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags)
 
 static __be32
 nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
-               struct file **filpp, bool *tmp_file, int flags)
+               struct nfsd_file **nfp, int flags)
 {
        int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
-       struct file *file;
+       struct nfsd_file *nf;
        __be32 status;
 
-       file = nfs4_find_file(s, flags);
-       if (file) {
+       nf = nfs4_find_file(s, flags);
+       if (nf) {
                status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
                                acc | NFSD_MAY_OWNER_OVERRIDE);
                if (status) {
-                       fput(file);
-                       return status;
+                       nfsd_file_put(nf);
+                       goto out;
                }
-
-               *filpp = file;
        } else {
-               status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp);
+               status = nfsd_file_acquire(rqstp, fhp, acc, &nf);
                if (status)
                        return status;
-
-               if (tmp_file)
-                       *tmp_file = true;
        }
-
-       return 0;
+       *nfp = nf;
+out:
+       return status;
 }
 
 /*
@@ -5583,7 +5602,7 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
 __be32
 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
-               stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file)
+               stateid_t *stateid, int flags, struct nfsd_file **nfp)
 {
        struct inode *ino = d_inode(fhp->fh_dentry);
        struct net *net = SVC_NET(rqstp);
@@ -5591,10 +5610,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
        struct nfs4_stid *s = NULL;
        __be32 status;
 
-       if (filpp)
-               *filpp = NULL;
-       if (tmp_file)
-               *tmp_file = false;
+       if (nfp)
+               *nfp = NULL;
 
        if (grace_disallows_io(net, ino))
                return nfserr_grace;
@@ -5631,8 +5648,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
        status = nfs4_check_fh(fhp, s);
 
 done:
-       if (!status && filpp)
-               status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags);
+       if (status == nfs_ok && nfp)
+               status = nfs4_check_file(rqstp, fhp, s, nfp, flags);
 out:
        if (s)
                nfs4_put_stid(s);
@@ -6392,7 +6409,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        struct nfs4_ol_stateid *lock_stp = NULL;
        struct nfs4_ol_stateid *open_stp = NULL;
        struct nfs4_file *fp;
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
        struct nfsd4_blocked_lock *nbl = NULL;
        struct file_lock *file_lock = NULL;
        struct file_lock *conflock = NULL;
@@ -6474,8 +6491,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                        /* Fallthrough */
                case NFS4_READ_LT:
                        spin_lock(&fp->fi_lock);
-                       filp = find_readable_file_locked(fp);
-                       if (filp)
+                       nf = find_readable_file_locked(fp);
+                       if (nf)
                                get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
                        spin_unlock(&fp->fi_lock);
                        fl_type = F_RDLCK;
@@ -6486,8 +6503,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                        /* Fallthrough */
                case NFS4_WRITE_LT:
                        spin_lock(&fp->fi_lock);
-                       filp = find_writeable_file_locked(fp);
-                       if (filp)
+                       nf = find_writeable_file_locked(fp);
+                       if (nf)
                                get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
                        spin_unlock(&fp->fi_lock);
                        fl_type = F_WRLCK;
@@ -6497,7 +6514,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                goto out;
        }
 
-       if (!filp) {
+       if (!nf) {
                status = nfserr_openmode;
                goto out;
        }
@@ -6513,7 +6530,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        file_lock->fl_type = fl_type;
        file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
        file_lock->fl_pid = current->tgid;
-       file_lock->fl_file = filp;
+       file_lock->fl_file = nf->nf_file;
        file_lock->fl_flags = fl_flags;
        file_lock->fl_lmops = &nfsd_posix_mng_ops;
        file_lock->fl_start = lock->lk_offset;
@@ -6535,7 +6552,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                spin_unlock(&nn->blocked_locks_lock);
        }
 
-       err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
+       err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, conflock);
        switch (err) {
        case 0: /* success! */
                nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
@@ -6570,8 +6587,8 @@ out:
                }
                free_blocked_lock(nbl);
        }
-       if (filp)
-               fput(filp);
+       if (nf)
+               nfsd_file_put(nf);
        if (lock_stp) {
                /* Bump seqid manually if the 4.0 replay owner is openowner */
                if (cstate->replay_owner &&
@@ -6606,11 +6623,11 @@ out:
  */
 static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
 {
-       struct file *file;
-       __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+       struct nfsd_file *nf;
+       __be32 err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
        if (!err) {
-               err = nfserrno(vfs_test_lock(file, lock));
-               fput(file);
+               err = nfserrno(vfs_test_lock(nf->nf_file, lock));
+               nfsd_file_put(nf);
        }
        return err;
 }
@@ -6698,7 +6715,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
        struct nfsd4_locku *locku = &u->locku;
        struct nfs4_ol_stateid *stp;
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
        struct file_lock *file_lock = NULL;
        __be32 status;
        int err;
@@ -6716,8 +6733,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                        &stp, nn);
        if (status)
                goto out;
-       filp = find_any_file(stp->st_stid.sc_file);
-       if (!filp) {
+       nf = find_any_file(stp->st_stid.sc_file);
+       if (!nf) {
                status = nfserr_lock_range;
                goto put_stateid;
        }
@@ -6725,13 +6742,13 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (!file_lock) {
                dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
                status = nfserr_jukebox;
-               goto fput;
+               goto put_file;
        }
 
        file_lock->fl_type = F_UNLCK;
        file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner));
        file_lock->fl_pid = current->tgid;
-       file_lock->fl_file = filp;
+       file_lock->fl_file = nf->nf_file;
        file_lock->fl_flags = FL_POSIX;
        file_lock->fl_lmops = &nfsd_posix_mng_ops;
        file_lock->fl_start = locku->lu_offset;
@@ -6740,14 +6757,14 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                                locku->lu_length);
        nfs4_transform_lock_offset(file_lock);
 
-       err = vfs_lock_file(filp, F_SETLK, file_lock, NULL);
+       err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, NULL);
        if (err) {
                dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
                goto out_nfserr;
        }
        nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid);
-fput:
-       fput(filp);
+put_file:
+       nfsd_file_put(nf);
 put_stateid:
        mutex_unlock(&stp->st_mutex);
        nfs4_put_stid(&stp->st_stid);
@@ -6759,7 +6776,7 @@ out:
 
 out_nfserr:
        status = nfserrno(err);
-       goto fput;
+       goto put_file;
 }
 
 /*
@@ -6772,17 +6789,17 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
 {
        struct file_lock *fl;
        int status = false;
-       struct file *filp = find_any_file(fp);
+       struct nfsd_file *nf = find_any_file(fp);
        struct inode *inode;
        struct file_lock_context *flctx;
 
-       if (!filp) {
+       if (!nf) {
                /* Any valid lock stateid should have some sort of access */
                WARN_ON_ONCE(1);
                return status;
        }
 
-       inode = locks_inode(filp);
+       inode = locks_inode(nf->nf_file);
        flctx = inode->i_flctx;
 
        if (flctx && !list_empty_careful(&flctx->flc_posix)) {
@@ -6795,7 +6812,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
                }
                spin_unlock(&flctx->flc_lock);
        }
-       fput(filp);
+       nfsd_file_put(nf);
        return status;
 }
 
@@ -6888,7 +6905,8 @@ nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn)
  * will be freed in nfs4_remove_reclaim_record in the normal case).
  */
 struct nfs4_client_reclaim *
-nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn)
+nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
+               struct nfsd_net *nn)
 {
        unsigned int strhashval;
        struct nfs4_client_reclaim *crp;
@@ -6901,6 +6919,8 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn)
                list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
                crp->cr_name.data = name.data;
                crp->cr_name.len = name.len;
+               crp->cr_princhash.data = princhash.data;
+               crp->cr_princhash.len = princhash.len;
                crp->cr_clp = NULL;
                nn->reclaim_str_hashtbl_size++;
        }
@@ -6912,6 +6932,7 @@ nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
 {
        list_del(&crp->cr_strhash);
        kfree(crp->cr_name.data);
+       kfree(crp->cr_princhash.data);
        kfree(crp);
        nn->reclaim_str_hashtbl_size--;
 }
index 4428118..533d0fc 100644 (file)
@@ -49,6 +49,7 @@
 #include "cache.h"
 #include "netns.h"
 #include "pnfs.h"
+#include "filecache.h"
 
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 #include <linux/security.h>
@@ -203,6 +204,13 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
        return p;
 }
 
+static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp)
+{
+       unsigned int this = (char *)argp->end - (char *)argp->p;
+
+       return this + argp->pagelen;
+}
+
 static int zero_clientid(clientid_t *clid)
 {
        return (clid->cl_boot == 0) && (clid->cl_id == 0);
@@ -211,10 +219,10 @@ static int zero_clientid(clientid_t *clid)
 /**
  * svcxdr_tmpalloc - allocate memory to be freed after compound processing
  * @argp: NFSv4 compound argument structure
- * @p: pointer to be freed (with kfree())
+ * @len: length of buffer to allocate
  *
- * Marks @p to be freed when processing the compound operation
- * described in @argp finishes.
+ * Allocates a buffer of size @len to be freed when processing the compound
+ * operation described in @argp finishes.
  */
 static void *
 svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
@@ -347,7 +355,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
                READ_BUF(4); len += 4;
                nace = be32_to_cpup(p++);
 
-               if (nace > NFS4_ACL_MAX)
+               if (nace > compoundargs_bytes_left(argp)/20)
+                       /*
+                        * Even with 4-byte names there wouldn't be
+                        * space for that many aces; something fishy is
+                        * going on:
+                        */
                        return nfserr_fbig;
 
                *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
@@ -1418,7 +1431,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
                            struct nfsd4_create_session *sess)
 {
        DECODE_HEAD;
-       u32 dummy;
 
        READ_BUF(16);
        COPYMEM(&sess->clientid, 8);
@@ -1427,7 +1439,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
 
        /* Fore channel attrs */
        READ_BUF(28);
-       dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
+       p++; /* headerpadsz is always 0 */
        sess->fore_channel.maxreq_sz = be32_to_cpup(p++);
        sess->fore_channel.maxresp_sz = be32_to_cpup(p++);
        sess->fore_channel.maxresp_cached = be32_to_cpup(p++);
@@ -1444,7 +1456,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
 
        /* Back channel attrs */
        READ_BUF(28);
-       dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
+       p++; /* headerpadsz is always 0 */
        sess->back_channel.maxreq_sz = be32_to_cpup(p++);
        sess->back_channel.maxresp_sz = be32_to_cpup(p++);
        sess->back_channel.maxresp_cached = be32_to_cpup(p++);
@@ -1736,7 +1748,6 @@ static __be32
 nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
 {
        DECODE_HEAD;
-       unsigned int tmp;
 
        status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid);
        if (status)
@@ -1751,7 +1762,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
        p = xdr_decode_hyper(p, &copy->cp_count);
        p++; /* ca_consecutive: we always do consecutive copies */
        copy->cp_synchronous = be32_to_cpup(p++);
-       tmp = be32_to_cpup(p); /* Source server list not supported */
+       /* tmp = be32_to_cpup(p); Source server list not supported */
 
        DECODE_TAIL;
 }
@@ -3217,9 +3228,8 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
        if (!p)
                return nfserr_resource;
        encode_cinfo(p, &create->cr_cinfo);
-       nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
+       return nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
                        create->cr_bmval[1], create->cr_bmval[2]);
-       return 0;
 }
 
 static __be32
@@ -3462,7 +3472,7 @@ static __be32 nfsd4_encode_splice_read(
 
        len = maxcount;
        nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
-                                 file, read->rd_offset, &maxcount);
+                                 file, read->rd_offset, &maxcount, &eof);
        read->rd_length = maxcount;
        if (nfserr) {
                /*
@@ -3474,9 +3484,6 @@ static __be32 nfsd4_encode_splice_read(
                return nfserr;
        }
 
-       eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
-                               d_inode(read->rd_fhp->fh_dentry)->i_size);
-
        *(p++) = htonl(eof);
        *(p++) = htonl(maxcount);
 
@@ -3547,15 +3554,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 
        len = maxcount;
        nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
-                           resp->rqstp->rq_vec, read->rd_vlen, &maxcount);
+                           resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
+                           &eof);
        read->rd_length = maxcount;
        if (nfserr)
                return nfserr;
        xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
 
-       eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
-                               d_inode(read->rd_fhp->fh_dentry)->i_size);
-
        tmp = htonl(eof);
        write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
        tmp = htonl(maxcount);
@@ -3574,11 +3579,14 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 {
        unsigned long maxcount;
        struct xdr_stream *xdr = &resp->xdr;
-       struct file *file = read->rd_filp;
+       struct file *file;
        int starting_len = xdr->buf->len;
-       struct raparms *ra = NULL;
        __be32 *p;
 
+       if (nfserr)
+               return nfserr;
+       file = read->rd_nf->nf_file;
+
        p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
        if (!p) {
                WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
@@ -3596,18 +3604,12 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
                         (xdr->buf->buflen - xdr->buf->len));
        maxcount = min_t(unsigned long, maxcount, read->rd_length);
 
-       if (read->rd_tmp_file)
-               ra = nfsd_init_raparms(file);
-
        if (file->f_op->splice_read &&
            test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
                nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
        else
                nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
 
-       if (ra)
-               nfsd_put_raparams(file, ra);
-
        if (nfserr)
                xdr_truncate_encode(xdr, starting_len);
 
index 2c21517..11b42c5 100644 (file)
@@ -1476,6 +1476,7 @@ static __net_init int nfsd_init_net(struct net *net)
 
        atomic_set(&nn->ntf_refcnt, 0);
        init_waitqueue_head(&nn->ntf_wq);
+       seqlock_init(&nn->boot_lock);
 
        mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
        if (IS_ERR(mnt)) {
index 0d20fd1..c83ddac 100644 (file)
@@ -172,6 +172,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
        struct nfsd_readargs *argp = rqstp->rq_argp;
        struct nfsd_readres *resp = rqstp->rq_resp;
        __be32  nfserr;
+       u32 eof;
 
        dprintk("nfsd: READ    %s %d bytes at %d\n",
                SVCFH_fmt(&argp->fh),
@@ -195,7 +196,8 @@ nfsd_proc_read(struct svc_rqst *rqstp)
        nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
                                  argp->offset,
                                  rqstp->rq_vec, argp->vlen,
-                                 &resp->count);
+                                 &resp->count,
+                                 &eof);
 
        if (nfserr) return nfserr;
        return fh_getattr(&resp->fh, &resp->stat);
index 18d94ea..fdf7ed4 100644 (file)
@@ -27,6 +27,7 @@
 #include "cache.h"
 #include "vfs.h"
 #include "netns.h"
+#include "filecache.h"
 
 #define NFSDDBG_FACILITY       NFSDDBG_SVC
 
@@ -313,22 +314,17 @@ static int nfsd_startup_generic(int nrservs)
        if (nfsd_users++)
                return 0;
 
-       /*
-        * Readahead param cache - will no-op if it already exists.
-        * (Note therefore results will be suboptimal if number of
-        * threads is modified after nfsd start.)
-        */
-       ret = nfsd_racache_init(2*nrservs);
+       ret = nfsd_file_cache_init();
        if (ret)
                goto dec_users;
 
        ret = nfs4_state_start();
        if (ret)
-               goto out_racache;
+               goto out_file_cache;
        return 0;
 
-out_racache:
-       nfsd_racache_shutdown();
+out_file_cache:
+       nfsd_file_cache_shutdown();
 dec_users:
        nfsd_users--;
        return ret;
@@ -340,7 +336,7 @@ static void nfsd_shutdown_generic(void)
                return;
 
        nfs4_state_shutdown();
-       nfsd_racache_shutdown();
+       nfsd_file_cache_shutdown();
 }
 
 static bool nfsd_needs_lockd(struct nfsd_net *nn)
@@ -348,6 +344,35 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
        return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
 }
 
+void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
+{
+       int seq = 0;
+
+       do {
+               read_seqbegin_or_lock(&nn->boot_lock, &seq);
+               /*
+                * This is opaque to client, so no need to byte-swap. Use
+                * __force to keep sparse happy. y2038 time_t overflow is
+                * irrelevant in this usage
+                */
+               verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
+               verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
+       } while (need_seqretry(&nn->boot_lock, seq));
+       done_seqretry(&nn->boot_lock, seq);
+}
+
+static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
+{
+       ktime_get_real_ts64(&nn->nfssvc_boot);
+}
+
+void nfsd_reset_boot_verifier(struct nfsd_net *nn)
+{
+       write_seqlock(&nn->boot_lock);
+       nfsd_reset_boot_verifier_locked(nn);
+       write_sequnlock(&nn->boot_lock);
+}
+
 static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred)
 {
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -391,6 +416,7 @@ static void nfsd_shutdown_net(struct net *net)
 {
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
+       nfsd_file_cache_purge(net);
        nfs4_state_shutdown_net(net);
        if (nn->lockd_up) {
                lockd_down(net);
@@ -599,7 +625,7 @@ int nfsd_create_serv(struct net *net)
 #endif
        }
        atomic_inc(&nn->ntf_refcnt);
-       ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */
+       nfsd_reset_boot_verifier(nn);
        return 0;
 }
 
index 5dbd169..46f56af 100644 (file)
@@ -378,6 +378,7 @@ struct nfs4_client_reclaim {
        struct list_head        cr_strhash;     /* hash by cr_name */
        struct nfs4_client      *cr_clp;        /* pointer to associated clp */
        struct xdr_netobj       cr_name;        /* recovery dir name */
+       struct xdr_netobj       cr_princhash;
 };
 
 /* A reasonable value for REPLAY_ISIZE was estimated as follows:  
@@ -506,7 +507,7 @@ struct nfs4_file {
        };
        struct list_head        fi_clnt_odstate;
        /* One each for O_RDONLY, O_WRONLY, O_RDWR: */
-       struct file *           fi_fds[3];
+       struct nfsd_file        *fi_fds[3];
        /*
         * Each open or lock stateid contributes 0-4 to the counts
         * below depending on which bits are set in st_access_bitmap:
@@ -516,7 +517,7 @@ struct nfs4_file {
         */
        atomic_t                fi_access[2];
        u32                     fi_share_deny;
-       struct file             *fi_deleg_file;
+       struct nfsd_file        *fi_deleg_file;
        int                     fi_delegees;
        struct knfsd_fh         fi_fhandle;
        bool                    fi_had_conflict;
@@ -565,7 +566,7 @@ struct nfs4_layout_stateid {
        spinlock_t                      ls_lock;
        struct list_head                ls_layouts;
        u32                             ls_layout_type;
-       struct file                     *ls_file;
+       struct nfsd_file                *ls_file;
        struct nfsd4_callback           ls_recall;
        stateid_t                       ls_recall_sid;
        bool                            ls_recalled;
@@ -616,7 +617,7 @@ struct nfsd4_copy;
 
 extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
-               stateid_t *stateid, int flags, struct file **filp, bool *tmp_file);
+               stateid_t *stateid, int flags, struct nfsd_file **filp);
 __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
                     stateid_t *stateid, unsigned char typemask,
                     struct nfs4_stid **s, struct nfsd_net *nn);
@@ -645,7 +646,7 @@ extern void nfsd4_shutdown_callback(struct nfs4_client *);
 extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
 extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
 extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
-                                                       struct nfsd_net *nn);
+                               struct xdr_netobj princhash, struct nfsd_net *nn);
 extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
 
 struct nfs4_file *find_file(struct knfsd_fh *fh);
@@ -657,7 +658,7 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
 {
        refcount_inc(&fi->fi_ref);
 }
-struct file *find_any_file(struct nfs4_file *f);
+struct nfsd_file *find_any_file(struct nfs4_file *f);
 
 /* grace period management */
 void nfsd4_end_grace(struct nfsd_net *nn);
index 80933e4..ffc78a0 100644 (file)
@@ -126,6 +126,8 @@ DEFINE_NFSD_ERR_EVENT(read_err);
 DEFINE_NFSD_ERR_EVENT(write_err);
 
 #include "state.h"
+#include "filecache.h"
+#include "vfs.h"
 
 DECLARE_EVENT_CLASS(nfsd_stateid_class,
        TP_PROTO(stateid_t *stp),
@@ -164,6 +166,144 @@ DEFINE_STATEID_EVENT(layout_recall_done);
 DEFINE_STATEID_EVENT(layout_recall_fail);
 DEFINE_STATEID_EVENT(layout_recall_release);
 
+#define show_nf_flags(val)                                             \
+       __print_flags(val, "|",                                         \
+               { 1 << NFSD_FILE_HASHED,        "HASHED" },             \
+               { 1 << NFSD_FILE_PENDING,       "PENDING" },            \
+               { 1 << NFSD_FILE_BREAK_READ,    "BREAK_READ" },         \
+               { 1 << NFSD_FILE_BREAK_WRITE,   "BREAK_WRITE" },        \
+               { 1 << NFSD_FILE_REFERENCED,    "REFERENCED"})
+
+/* FIXME: This should probably be fleshed out in the future. */
+#define show_nf_may(val)                                               \
+       __print_flags(val, "|",                                         \
+               { NFSD_MAY_READ,                "READ" },               \
+               { NFSD_MAY_WRITE,               "WRITE" },              \
+               { NFSD_MAY_NOT_BREAK_LEASE,     "NOT_BREAK_LEASE" })
+
+DECLARE_EVENT_CLASS(nfsd_file_class,
+       TP_PROTO(struct nfsd_file *nf),
+       TP_ARGS(nf),
+       TP_STRUCT__entry(
+               __field(unsigned int, nf_hashval)
+               __field(void *, nf_inode)
+               __field(int, nf_ref)
+               __field(unsigned long, nf_flags)
+               __field(unsigned char, nf_may)
+               __field(struct file *, nf_file)
+       ),
+       TP_fast_assign(
+               __entry->nf_hashval = nf->nf_hashval;
+               __entry->nf_inode = nf->nf_inode;
+               __entry->nf_ref = atomic_read(&nf->nf_ref);
+               __entry->nf_flags = nf->nf_flags;
+               __entry->nf_may = nf->nf_may;
+               __entry->nf_file = nf->nf_file;
+       ),
+       TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p",
+               __entry->nf_hashval,
+               __entry->nf_inode,
+               __entry->nf_ref,
+               show_nf_flags(__entry->nf_flags),
+               show_nf_may(__entry->nf_may),
+               __entry->nf_file)
+)
+
+#define DEFINE_NFSD_FILE_EVENT(name) \
+DEFINE_EVENT(nfsd_file_class, name, \
+       TP_PROTO(struct nfsd_file *nf), \
+       TP_ARGS(nf))
+
+DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked);
+
+TRACE_EVENT(nfsd_file_acquire,
+       TP_PROTO(struct svc_rqst *rqstp, unsigned int hash,
+                struct inode *inode, unsigned int may_flags,
+                struct nfsd_file *nf, __be32 status),
+
+       TP_ARGS(rqstp, hash, inode, may_flags, nf, status),
+
+       TP_STRUCT__entry(
+               __field(__be32, xid)
+               __field(unsigned int, hash)
+               __field(void *, inode)
+               __field(unsigned int, may_flags)
+               __field(int, nf_ref)
+               __field(unsigned long, nf_flags)
+               __field(unsigned char, nf_may)
+               __field(struct file *, nf_file)
+               __field(__be32, status)
+       ),
+
+       TP_fast_assign(
+               __entry->xid = rqstp->rq_xid;
+               __entry->hash = hash;
+               __entry->inode = inode;
+               __entry->may_flags = may_flags;
+               __entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0;
+               __entry->nf_flags = nf ? nf->nf_flags : 0;
+               __entry->nf_may = nf ? nf->nf_may : 0;
+               __entry->nf_file = nf ? nf->nf_file : NULL;
+               __entry->status = status;
+       ),
+
+       TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u",
+                       be32_to_cpu(__entry->xid), __entry->hash, __entry->inode,
+                       show_nf_may(__entry->may_flags), __entry->nf_ref,
+                       show_nf_flags(__entry->nf_flags),
+                       show_nf_may(__entry->nf_may), __entry->nf_file,
+                       be32_to_cpu(__entry->status))
+);
+
+DECLARE_EVENT_CLASS(nfsd_file_search_class,
+       TP_PROTO(struct inode *inode, unsigned int hash, int found),
+       TP_ARGS(inode, hash, found),
+       TP_STRUCT__entry(
+               __field(struct inode *, inode)
+               __field(unsigned int, hash)
+               __field(int, found)
+       ),
+       TP_fast_assign(
+               __entry->inode = inode;
+               __entry->hash = hash;
+               __entry->found = found;
+       ),
+       TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash,
+                       __entry->inode, __entry->found)
+);
+
+#define DEFINE_NFSD_FILE_SEARCH_EVENT(name)                            \
+DEFINE_EVENT(nfsd_file_search_class, name,                             \
+       TP_PROTO(struct inode *inode, unsigned int hash, int found),    \
+       TP_ARGS(inode, hash, found))
+
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached);
+
+TRACE_EVENT(nfsd_file_fsnotify_handle_event,
+       TP_PROTO(struct inode *inode, u32 mask),
+       TP_ARGS(inode, mask),
+       TP_STRUCT__entry(
+               __field(struct inode *, inode)
+               __field(unsigned int, nlink)
+               __field(umode_t, mode)
+               __field(u32, mask)
+       ),
+       TP_fast_assign(
+               __entry->inode = inode;
+               __entry->nlink = inode->i_nlink;
+               __entry->mode = inode->i_mode;
+               __entry->mask = mask;
+       ),
+       TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
+                       __entry->nlink, __entry->mode, __entry->mask)
+);
+
 #endif /* _NFSD_TRACE_H */
 
 #undef TRACE_INCLUDE_PATH
index c85783e..bd0a385 100644 (file)
 
 #include "nfsd.h"
 #include "vfs.h"
+#include "filecache.h"
 #include "trace.h"
 
 #define NFSDDBG_FACILITY               NFSDDBG_FILEOP
 
-
-/*
- * This is a cache of readahead params that help us choose the proper
- * readahead strategy. Initially, we set all readahead parameters to 0
- * and let the VFS handle things.
- * If you increase the number of cached files very much, you'll need to
- * add a hash table here.
- */
-struct raparms {
-       struct raparms          *p_next;
-       unsigned int            p_count;
-       ino_t                   p_ino;
-       dev_t                   p_dev;
-       int                     p_set;
-       struct file_ra_state    p_ra;
-       unsigned int            p_hindex;
-};
-
-struct raparm_hbucket {
-       struct raparms          *pb_head;
-       spinlock_t              pb_lock;
-} ____cacheline_aligned_in_smp;
-
-#define RAPARM_HASH_BITS       4
-#define RAPARM_HASH_SIZE       (1<<RAPARM_HASH_BITS)
-#define RAPARM_HASH_MASK       (RAPARM_HASH_SIZE-1)
-static struct raparm_hbucket   raparm_hash[RAPARM_HASH_SIZE];
-
 /* 
  * Called from nfsd_lookup and encode_dirent. Check if we have crossed 
  * a mount point.
@@ -699,7 +672,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
 }
 #endif /* CONFIG_NFSD_V3 */
 
-static int nfsd_open_break_lease(struct inode *inode, int access)
+int nfsd_open_break_lease(struct inode *inode, int access)
 {
        unsigned int mode;
 
@@ -715,8 +688,8 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
  * and additional flags.
  * N.B. After this call fhp needs an fh_put
  */
-__be32
-nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+static __be32
+__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
                        int may_flags, struct file **filp)
 {
        struct path     path;
@@ -726,25 +699,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
        __be32          err;
        int             host_err = 0;
 
-       validate_process_creds();
-
-       /*
-        * If we get here, then the client has already done an "open",
-        * and (hopefully) checked permission - so allow OWNER_OVERRIDE
-        * in case a chmod has now revoked permission.
-        *
-        * Arguably we should also allow the owner override for
-        * directories, but we never have and it doesn't seem to have
-        * caused anyone a problem.  If we were to change this, note
-        * also that our filldir callbacks would need a variant of
-        * lookup_one_len that doesn't check permissions.
-        */
-       if (type == S_IFREG)
-               may_flags |= NFSD_MAY_OWNER_OVERRIDE;
-       err = fh_verify(rqstp, fhp, type, may_flags);
-       if (err)
-               goto out;
-
        path.mnt = fhp->fh_export->ex_path.mnt;
        path.dentry = fhp->fh_dentry;
        inode = d_inode(path.dentry);
@@ -798,67 +752,46 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 out_nfserr:
        err = nfserrno(host_err);
 out:
-       validate_process_creds();
        return err;
 }
 
-struct raparms *
-nfsd_init_raparms(struct file *file)
+__be32
+nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+               int may_flags, struct file **filp)
 {
-       struct inode *inode = file_inode(file);
-       dev_t dev = inode->i_sb->s_dev;
-       ino_t ino = inode->i_ino;
-       struct raparms  *ra, **rap, **frap = NULL;
-       int depth = 0;
-       unsigned int hash;
-       struct raparm_hbucket *rab;
-
-       hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
-       rab = &raparm_hash[hash];
-
-       spin_lock(&rab->pb_lock);
-       for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
-               if (ra->p_ino == ino && ra->p_dev == dev)
-                       goto found;
-               depth++;
-               if (ra->p_count == 0)
-                       frap = rap;
-       }
-       depth = nfsdstats.ra_size;
-       if (!frap) {    
-               spin_unlock(&rab->pb_lock);
-               return NULL;
-       }
-       rap = frap;
-       ra = *frap;
-       ra->p_dev = dev;
-       ra->p_ino = ino;
-       ra->p_set = 0;
-       ra->p_hindex = hash;
-found:
-       if (rap != &rab->pb_head) {
-               *rap = ra->p_next;
-               ra->p_next   = rab->pb_head;
-               rab->pb_head = ra;
-       }
-       ra->p_count++;
-       nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
-       spin_unlock(&rab->pb_lock);
+       __be32 err;
 
-       if (ra->p_set)
-               file->f_ra = ra->p_ra;
-       return ra;
+       validate_process_creds();
+       /*
+        * If we get here, then the client has already done an "open",
+        * and (hopefully) checked permission - so allow OWNER_OVERRIDE
+        * in case a chmod has now revoked permission.
+        *
+        * Arguably we should also allow the owner override for
+        * directories, but we never have and it doesn't seem to have
+        * caused anyone a problem.  If we were to change this, note
+        * also that our filldir callbacks would need a variant of
+        * lookup_one_len that doesn't check permissions.
+        */
+       if (type == S_IFREG)
+               may_flags |= NFSD_MAY_OWNER_OVERRIDE;
+       err = fh_verify(rqstp, fhp, type, may_flags);
+       if (!err)
+               err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+       validate_process_creds();
+       return err;
 }
 
-void nfsd_put_raparams(struct file *file, struct raparms *ra)
+__be32
+nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+               int may_flags, struct file **filp)
 {
-       struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
+       __be32 err;
 
-       spin_lock(&rab->pb_lock);
-       ra->p_ra = file->f_ra;
-       ra->p_set = 1;
-       ra->p_count--;
-       spin_unlock(&rab->pb_lock);
+       validate_process_creds();
+       err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+       validate_process_creds();
+       return err;
 }
 
 /*
@@ -901,12 +834,23 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
        return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
 }
 
+static u32 nfsd_eof_on_read(struct file *file, loff_t offset, ssize_t len,
+               size_t expected)
+{
+       if (expected != 0 && len == 0)
+               return 1;
+       if (offset+len >= i_size_read(file_inode(file)))
+               return 1;
+       return 0;
+}
+
 static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
                               struct file *file, loff_t offset,
-                              unsigned long *count, int host_err)
+                              unsigned long *count, u32 *eof, ssize_t host_err)
 {
        if (host_err >= 0) {
                nfsdstats.io_read += host_err;
+               *eof = nfsd_eof_on_read(file, offset, host_err, *count);
                *count = host_err;
                fsnotify_access(file);
                trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
@@ -918,7 +862,8 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 }
 
 __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
-                       struct file *file, loff_t offset, unsigned long *count)
+                       struct file *file, loff_t offset, unsigned long *count,
+                       u32 *eof)
 {
        struct splice_desc sd = {
                .len            = 0,
@@ -926,25 +871,27 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
                .pos            = offset,
                .u.data         = rqstp,
        };
-       int host_err;
+       ssize_t host_err;
 
        trace_nfsd_read_splice(rqstp, fhp, offset, *count);
        rqstp->rq_next_page = rqstp->rq_respages + 1;
        host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
-       return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
+       return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
 }
 
 __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
                  struct file *file, loff_t offset,
-                 struct kvec *vec, int vlen, unsigned long *count)
+                 struct kvec *vec, int vlen, unsigned long *count,
+                 u32 *eof)
 {
        struct iov_iter iter;
-       int host_err;
+       loff_t ppos = offset;
+       ssize_t host_err;
 
        trace_nfsd_read_vector(rqstp, fhp, offset, *count);
        iov_iter_kvec(&iter, READ, vec, vlen, *count);
-       host_err = vfs_iter_read(file, &iter, &offset, 0);
-       return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
+       host_err = vfs_iter_read(file, &iter, &ppos, 0);
+       return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
 }
 
 /*
@@ -1025,8 +972,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        nfsdstats.io_write += *cnt;
        fsnotify_modify(file);
 
-       if (stable && use_wgather)
+       if (stable && use_wgather) {
                host_err = wait_for_concurrent_writes(file);
+               if (host_err < 0)
+                       nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+                                                nfsd_net_id));
+       }
 
 out_nfserr:
        if (host_err >= 0) {
@@ -1047,27 +998,25 @@ out_nfserr:
  * N.B. After this call fhp needs an fh_put
  */
 __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
-       loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+       loff_t offset, struct kvec *vec, int vlen, unsigned long *count,
+       u32 *eof)
 {
+       struct nfsd_file        *nf;
        struct file *file;
-       struct raparms  *ra;
        __be32 err;
 
        trace_nfsd_read_start(rqstp, fhp, offset, *count);
-       err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+       err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
        if (err)
                return err;
 
-       ra = nfsd_init_raparms(file);
-
+       file = nf->nf_file;
        if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
-               err = nfsd_splice_read(rqstp, fhp, file, offset, count);
+               err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
        else
-               err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count);
+               err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof);
 
-       if (ra)
-               nfsd_put_raparams(file, ra);
-       fput(file);
+       nfsd_file_put(nf);
 
        trace_nfsd_read_done(rqstp, fhp, offset, *count);
 
@@ -1083,17 +1032,18 @@ __be32
 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
           struct kvec *vec, int vlen, unsigned long *cnt, int stable)
 {
-       struct file *file = NULL;
-       __be32 err = 0;
+       struct nfsd_file *nf;
+       __be32 err;
 
        trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
 
-       err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+       err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf);
        if (err)
                goto out;
 
-       err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
-       fput(file);
+       err = nfsd_vfs_write(rqstp, fhp, nf->nf_file, offset, vec,
+                       vlen, cnt, stable);
+       nfsd_file_put(nf);
 out:
        trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
        return err;
@@ -1113,9 +1063,9 @@ __be32
 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
                loff_t offset, unsigned long count)
 {
-       struct file     *file;
-       loff_t          end = LLONG_MAX;
-       __be32          err = nfserr_inval;
+       struct nfsd_file        *nf;
+       loff_t                  end = LLONG_MAX;
+       __be32                  err = nfserr_inval;
 
        if (offset < 0)
                goto out;
@@ -1125,20 +1075,27 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
                        goto out;
        }
 
-       err = nfsd_open(rqstp, fhp, S_IFREG,
-                       NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
+       err = nfsd_file_acquire(rqstp, fhp,
+                       NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
        if (err)
                goto out;
        if (EX_ISSYNC(fhp->fh_export)) {
-               int err2 = vfs_fsync_range(file, offset, end, 0);
+               int err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
 
-               if (err2 != -EINVAL)
-                       err = nfserrno(err2);
-               else
+               switch (err2) {
+               case 0:
+                       break;
+               case -EINVAL:
                        err = nfserr_notsupp;
+                       break;
+               default:
+                       err = nfserrno(err2);
+                       nfsd_reset_boot_verifier(net_generic(nf->nf_net,
+                                                nfsd_net_id));
+               }
        }
 
-       fput(file);
+       nfsd_file_put(nf);
 out:
        return err;
 }
@@ -1659,6 +1616,26 @@ out_nfserr:
        goto out_unlock;
 }
 
+static void
+nfsd_close_cached_files(struct dentry *dentry)
+{
+       struct inode *inode = d_inode(dentry);
+
+       if (inode && S_ISREG(inode->i_mode))
+               nfsd_file_close_inode_sync(inode);
+}
+
+static bool
+nfsd_has_cached_files(struct dentry *dentry)
+{
+       bool            ret = false;
+       struct inode *inode = d_inode(dentry);
+
+       if (inode && S_ISREG(inode->i_mode))
+               ret = nfsd_file_is_cached(inode);
+       return ret;
+}
+
 /*
  * Rename a file
  * N.B. After this call _both_ ffhp and tfhp need an fh_put
@@ -1671,6 +1648,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        struct inode    *fdir, *tdir;
        __be32          err;
        int             host_err;
+       bool            has_cached = false;
 
        err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
        if (err)
@@ -1689,6 +1667,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
                goto out;
 
+retry:
        host_err = fh_want_write(ffhp);
        if (host_err) {
                err = nfserrno(host_err);
@@ -1728,11 +1707,16 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
                goto out_dput_new;
 
-       host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
-       if (!host_err) {
-               host_err = commit_metadata(tfhp);
-               if (!host_err)
-                       host_err = commit_metadata(ffhp);
+       if (nfsd_has_cached_files(ndentry)) {
+               has_cached = true;
+               goto out_dput_old;
+       } else {
+               host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
+               if (!host_err) {
+                       host_err = commit_metadata(tfhp);
+                       if (!host_err)
+                               host_err = commit_metadata(ffhp);
+               }
        }
  out_dput_new:
        dput(ndentry);
@@ -1745,12 +1729,26 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
         * as that would do the wrong thing if the two directories
         * were the same, so again we do it by hand.
         */
-       fill_post_wcc(ffhp);
-       fill_post_wcc(tfhp);
+       if (!has_cached) {
+               fill_post_wcc(ffhp);
+               fill_post_wcc(tfhp);
+       }
        unlock_rename(tdentry, fdentry);
        ffhp->fh_locked = tfhp->fh_locked = false;
        fh_drop_write(ffhp);
 
+       /*
+        * If the target dentry has cached open files, then we need to try to
+        * close them prior to doing the rename. Flushing delayed fput
+        * shouldn't be done with locks held however, so we delay it until this
+        * point and then reattempt the whole shebang.
+        */
+       if (has_cached) {
+               has_cached = false;
+               nfsd_close_cached_files(ndentry);
+               dput(ndentry);
+               goto retry;
+       }
 out:
        return err;
 }
@@ -1797,10 +1795,13 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        if (!type)
                type = d_inode(rdentry)->i_mode & S_IFMT;
 
-       if (type != S_IFDIR)
+       if (type != S_IFDIR) {
+               nfsd_close_cached_files(rdentry);
                host_err = vfs_unlink(dirp, rdentry, NULL);
-       else
+       } else {
                host_err = vfs_rmdir(dirp, rdentry);
+       }
+
        if (!host_err)
                host_err = commit_metadata(fhp);
        dput(rdentry);
@@ -2074,63 +2075,3 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
 
        return err? nfserrno(err) : 0;
 }
-
-void
-nfsd_racache_shutdown(void)
-{
-       struct raparms *raparm, *last_raparm;
-       unsigned int i;
-
-       dprintk("nfsd: freeing readahead buffers.\n");
-
-       for (i = 0; i < RAPARM_HASH_SIZE; i++) {
-               raparm = raparm_hash[i].pb_head;
-               while(raparm) {
-                       last_raparm = raparm;
-                       raparm = raparm->p_next;
-                       kfree(last_raparm);
-               }
-               raparm_hash[i].pb_head = NULL;
-       }
-}
-/*
- * Initialize readahead param cache
- */
-int
-nfsd_racache_init(int cache_size)
-{
-       int     i;
-       int     j = 0;
-       int     nperbucket;
-       struct raparms **raparm = NULL;
-
-
-       if (raparm_hash[0].pb_head)
-               return 0;
-       nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
-       nperbucket = max(2, nperbucket);
-       cache_size = nperbucket * RAPARM_HASH_SIZE;
-
-       dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
-
-       for (i = 0; i < RAPARM_HASH_SIZE; i++) {
-               spin_lock_init(&raparm_hash[i].pb_lock);
-
-               raparm = &raparm_hash[i].pb_head;
-               for (j = 0; j < nperbucket; j++) {
-                       *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
-                       if (!*raparm)
-                               goto out_nomem;
-                       raparm = &(*raparm)->p_next;
-               }
-               *raparm = NULL;
-       }
-
-       nfsdstats.ra_size = cache_size;
-       return 0;
-
-out_nomem:
-       dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
-       nfsd_racache_shutdown();
-       return -ENOMEM;
-}
index db35124..a13fd9d 100644 (file)
@@ -40,8 +40,6 @@
 typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
 
 /* nfsd/vfs.c */
-int            nfsd_racache_init(int);
-void           nfsd_racache_shutdown(void);
 int            nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
                                struct svc_export **expp);
 __be32         nfsd_lookup(struct svc_rqst *, struct svc_fh *,
@@ -75,18 +73,23 @@ __be32              do_nfsd_create(struct svc_rqst *, struct svc_fh *,
 __be32         nfsd_commit(struct svc_rqst *, struct svc_fh *,
                                loff_t, unsigned long);
 #endif /* CONFIG_NFSD_V3 */
+int            nfsd_open_break_lease(struct inode *, int);
 __be32         nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
                                int, struct file **);
-struct raparms;
+__be32         nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
+                               int, struct file **);
 __be32         nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
                                struct file *file, loff_t offset,
-                               unsigned long *count);
+                               unsigned long *count,
+                               u32 *eof);
 __be32         nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
                                struct file *file, loff_t offset,
                                struct kvec *vec, int vlen,
-                               unsigned long *count);
+                               unsigned long *count,
+                               u32 *eof);
 __be32                 nfsd_read(struct svc_rqst *, struct svc_fh *,
-                               loff_t, struct kvec *, int, unsigned long *);
+                               loff_t, struct kvec *, int, unsigned long *,
+                               u32 *eof);
 __be32                 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
                                struct kvec *, int, unsigned long *, int);
 __be32         nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
@@ -115,9 +118,6 @@ __be32              nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 __be32         nfsd_permission(struct svc_rqst *, struct svc_export *,
                                struct dentry *, int);
 
-struct raparms *nfsd_init_raparms(struct file *file);
-void           nfsd_put_raparams(struct file *file, struct raparms *ra);
-
 static inline int fh_want_write(struct svc_fh *fh)
 {
        int ret;
@@ -152,23 +152,4 @@ static inline int nfsd_create_is_exclusive(int createmode)
               || createmode == NFS4_CREATE_EXCLUSIVE4_1;
 }
 
-static inline bool nfsd_eof_on_read(long requested, long read,
-                               loff_t offset, loff_t size)
-{
-       /* We assume a short read means eof: */
-       if (requested > read)
-               return true;
-       /*
-        * A non-short read might also reach end of file.  The spec
-        * still requires us to set eof in that case.
-        *
-        * Further operations may have modified the file size since
-        * the read, so the following check is not atomic with the read.
-        * We've only seen that cause a problem for a client in the case
-        * where the read returned a count of 0 without setting eof.
-        * That case was fixed by the addition of the above check.
-        */
-       return (offset + read >= size);
-}
-
 #endif /* LINUX_NFSD_VFS_H */
index 2cb29e9..99ff9f4 100644 (file)
@@ -151,7 +151,7 @@ struct nfsd3_readres {
        __be32                  status;
        struct svc_fh           fh;
        unsigned long           count;
-       int                     eof;
+       __u32                   eof;
 };
 
 struct nfsd3_writeres {
index d64c870..f4737d6 100644 (file)
@@ -273,15 +273,14 @@ struct nfsd4_open_downgrade {
 
 
 struct nfsd4_read {
-       stateid_t       rd_stateid;         /* request */
-       u64             rd_offset;          /* request */
-       u32             rd_length;          /* request */
-       int             rd_vlen;
-       struct file     *rd_filp;
-       bool            rd_tmp_file;
+       stateid_t               rd_stateid;         /* request */
+       u64                     rd_offset;          /* request */
+       u32                     rd_length;          /* request */
+       int                     rd_vlen;
+       struct nfsd_file        *rd_nf;
        
-       struct svc_rqst *rd_rqstp;          /* response */
-       struct svc_fh rd_fhp;             /* response */
+       struct svc_rqst         *rd_rqstp;          /* response */
+       struct svc_fh           *rd_fhp;             /* response */
 };
 
 struct nfsd4_readdir {
@@ -538,8 +537,8 @@ struct nfsd4_copy {
 
        struct nfs4_client      *cp_clp;
 
-       struct file             *file_src;
-       struct file             *file_dst;
+       struct nfsd_file        *nf_src;
+       struct nfsd_file        *nf_dst;
 
        stateid_t               cp_stateid;
 
index 5a00121..f346282 100644 (file)
@@ -54,8 +54,6 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
 {
        fsnotify_destroy_marks(&sb->s_fsnotify_marks);
 }
-/* Wait until all marks queued for destruction are destroyed */
-extern void fsnotify_wait_marks_destroyed(void);
 
 /*
  * update the dentry->d_flags of all of inode's children to indicate if inode cares
index 0391190..133f723 100644 (file)
@@ -108,6 +108,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
        if (refcount_dec_and_test(&group->refcnt))
                fsnotify_final_destroy_group(group);
 }
+EXPORT_SYMBOL_GPL(fsnotify_put_group);
 
 /*
  * Create a new fsnotify_group and hold a reference for the group returned.
@@ -137,6 +138,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 
        return group;
 }
+EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
 
 int fsnotify_fasync(int fd, struct file *file, int on)
 {
index 99ddd12..1d96216 100644 (file)
@@ -276,6 +276,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
        queue_delayed_work(system_unbound_wq, &reaper_work,
                           FSNOTIFY_REAPER_DELAY);
 }
+EXPORT_SYMBOL_GPL(fsnotify_put_mark);
 
 /*
  * Get mark reference when we found the mark via lockless traversal of object
@@ -430,6 +431,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
        mutex_unlock(&group->mark_mutex);
        fsnotify_free_mark(mark);
 }
+EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
 
 /*
  * Sorting function for lists of fsnotify marks.
@@ -685,6 +687,7 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
        mutex_unlock(&group->mark_mutex);
        return ret;
 }
+EXPORT_SYMBOL_GPL(fsnotify_add_mark);
 
 /*
  * Given a list of marks, find the mark associated with given group. If found
@@ -711,6 +714,7 @@ struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
        spin_unlock(&conn->lock);
        return NULL;
 }
+EXPORT_SYMBOL_GPL(fsnotify_find_mark);
 
 /* Clear any marks in a group with given type mask */
 void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
@@ -809,6 +813,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
        mark->group = group;
        WRITE_ONCE(mark->connector, NULL);
 }
+EXPORT_SYMBOL_GPL(fsnotify_init_mark);
 
 /*
  * Destroy all marks in destroy_list, waits for SRCU period to finish before
@@ -837,3 +842,4 @@ void fsnotify_wait_marks_destroyed(void)
 {
        flush_delayed_work(&reaper_work);
 }
+EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed);
index 20c841a..3aac5c9 100644 (file)
@@ -71,7 +71,7 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
        }
        /* Read, map, and pin the page. */
        page = ntfs_map_page(mft_vi->i_mapping, index);
-       if (likely(!IS_ERR(page))) {
+       if (!IS_ERR(page)) {
                /* Catch multi sector transfer fixup errors. */
                if (likely(ntfs_is_mft_recordp((le32*)(page_address(page) +
                                ofs)))) {
@@ -154,7 +154,7 @@ MFT_RECORD *map_mft_record(ntfs_inode *ni)
        mutex_lock(&ni->mrec_lock);
 
        m = map_mft_record_page(ni);
-       if (likely(!IS_ERR(m)))
+       if (!IS_ERR(m))
                return m;
 
        mutex_unlock(&ni->mrec_lock);
@@ -271,7 +271,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
                m = map_mft_record(ni);
                /* map_mft_record() has incremented this on success. */
                atomic_dec(&ni->count);
-               if (likely(!IS_ERR(m))) {
+               if (!IS_ERR(m)) {
                        /* Verify the sequence number. */
                        if (likely(le16_to_cpu(m->sequence_number) == seq_no)) {
                                ntfs_debug("Done 1.");
@@ -1303,7 +1303,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
        read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
        rl = ntfs_attr_find_vcn_nolock(mftbmp_ni,
                        (ll - 1) >> vol->cluster_size_bits, NULL);
-       if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
+       if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) {
                up_write(&mftbmp_ni->runlist.lock);
                ntfs_error(vol->sb, "Failed to determine last allocated "
                                "cluster of mft bitmap attribute.");
@@ -1734,7 +1734,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
        read_unlock_irqrestore(&mft_ni->size_lock, flags);
        rl = ntfs_attr_find_vcn_nolock(mft_ni,
                        (ll - 1) >> vol->cluster_size_bits, NULL);
-       if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
+       if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) {
                up_write(&mft_ni->runlist.lock);
                ntfs_error(vol->sb, "Failed to determine last allocated "
                                "cluster of mft data attribute.");
@@ -1776,7 +1776,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
        do {
                rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE,
                                true);
-               if (likely(!IS_ERR(rl2)))
+               if (!IS_ERR(rl2))
                        break;
                if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) {
                        ntfs_error(vol->sb, "Failed to allocate the minimal "
index 2d3cc9e..4e6a44b 100644 (file)
@@ -115,7 +115,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
                dent_ino = MREF(mref);
                ntfs_debug("Found inode 0x%lx. Calling ntfs_iget.", dent_ino);
                dent_inode = ntfs_iget(vol->sb, dent_ino);
-               if (likely(!IS_ERR(dent_inode))) {
+               if (!IS_ERR(dent_inode)) {
                        /* Consistency check. */
                        if (is_bad_inode(dent_inode) || MSEQNO(mref) ==
                                        NTFS_I(dent_inode)->seq_no ||
index 508744a..97932fb 100644 (file)
@@ -951,7 +951,7 @@ mpa_err:
        }
        /* Now combine the new and old runlists checking for overlaps. */
        old_rl = ntfs_runlists_merge(old_rl, rl);
-       if (likely(!IS_ERR(old_rl)))
+       if (!IS_ERR(old_rl))
                return old_rl;
        ntfs_free(rl);
        ntfs_error(vol->sb, "Failed to merge runlists.");
index 29621d4..7dc3bc6 100644 (file)
@@ -1475,7 +1475,7 @@ not_enabled:
        kfree(name);
        /* Get the inode. */
        tmp_ino = ntfs_iget(vol->sb, MREF(mref));
-       if (unlikely(IS_ERR(tmp_ino) || is_bad_inode(tmp_ino))) {
+       if (IS_ERR(tmp_ino) || unlikely(is_bad_inode(tmp_ino))) {
                if (!IS_ERR(tmp_ino))
                        iput(tmp_ino);
                ntfs_error(vol->sb, "Failed to load $UsnJrnl.");
index 0c335b5..f9baefc 100644 (file)
@@ -5993,6 +5993,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
        struct buffer_head *data_alloc_bh = NULL;
        struct ocfs2_dinode *di;
        struct ocfs2_truncate_log *tl;
+       struct ocfs2_journal *journal = osb->journal;
 
        BUG_ON(inode_trylock(tl_inode));
 
@@ -6013,6 +6014,20 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
                goto out;
        }
 
+       /* Appending truncate log(TA) and and flushing truncate log(TF) are
+        * two separated transactions. They can be both committed but not
+        * checkpointed. If crash occurs then, both two transaction will be
+        * replayed with several already released to global bitmap clusters.
+        * Then truncate log will be replayed resulting in cluster double free.
+        */
+       jbd2_journal_lock_updates(journal->j_journal);
+       status = jbd2_journal_flush(journal->j_journal);
+       jbd2_journal_unlock_updates(journal->j_journal);
+       if (status < 0) {
+               mlog_errno(status);
+               goto out;
+       }
+
        data_alloc_inode = ocfs2_get_system_file_inode(osb,
                                                       GLOBAL_BITMAP_SYSTEM_INODE,
                                                       OCFS2_INVALID_SLOT);
@@ -6792,6 +6807,8 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
                              struct page *page, int zero, u64 *phys)
 {
        int ret, partial = 0;
+       loff_t start_byte = ((loff_t)page->index << PAGE_SHIFT) + from;
+       loff_t length = to - from;
 
        ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
        if (ret)
@@ -6811,7 +6828,8 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
        if (ret < 0)
                mlog_errno(ret);
        else if (ocfs2_should_order_data(inode)) {
-               ret = ocfs2_jbd2_file_inode(handle, inode);
+               ret = ocfs2_jbd2_inode_add_write(handle, inode,
+                                                start_byte, length);
                if (ret < 0)
                        mlog_errno(ret);
        }
index a4c905d..9cd0a68 100644 (file)
@@ -942,7 +942,8 @@ static void ocfs2_write_failure(struct inode *inode,
 
                if (tmppage && page_has_buffers(tmppage)) {
                        if (ocfs2_should_order_data(inode))
-                               ocfs2_jbd2_file_inode(wc->w_handle, inode);
+                               ocfs2_jbd2_inode_add_write(wc->w_handle, inode,
+                                                          user_pos, user_len);
 
                        block_commit_write(tmppage, from, to);
                }
@@ -2023,8 +2024,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
                }
 
                if (page_has_buffers(tmppage)) {
-                       if (handle && ocfs2_should_order_data(inode))
-                               ocfs2_jbd2_file_inode(handle, inode);
+                       if (handle && ocfs2_should_order_data(inode)) {
+                               loff_t start_byte =
+                                       ((loff_t)tmppage->index << PAGE_SHIFT) +
+                                       from;
+                               loff_t length = to - from;
+                               ocfs2_jbd2_inode_add_write(handle, inode,
+                                                          start_byte, length);
+                       }
                        block_commit_write(tmppage, from, to);
                }
        }
@@ -2042,7 +2049,8 @@ out_write_size:
                inode->i_mtime = inode->i_ctime = current_time(inode);
                di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
                di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
-               ocfs2_update_inode_fsync_trans(handle, inode, 1);
+               if (handle)
+                       ocfs2_update_inode_fsync_trans(handle, inode, 1);
        }
        if (handle)
                ocfs2_journal_dirty(handle, wc->w_di_bh);
@@ -2139,13 +2147,30 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
        struct ocfs2_dio_write_ctxt *dwc = NULL;
        struct buffer_head *di_bh = NULL;
        u64 p_blkno;
-       loff_t pos = iblock << inode->i_sb->s_blocksize_bits;
+       unsigned int i_blkbits = inode->i_sb->s_blocksize_bits;
+       loff_t pos = iblock << i_blkbits;
+       sector_t endblk = (i_size_read(inode) - 1) >> i_blkbits;
        unsigned len, total_len = bh_result->b_size;
        int ret = 0, first_get_block = 0;
 
        len = osb->s_clustersize - (pos & (osb->s_clustersize - 1));
        len = min(total_len, len);
 
+       /*
+        * bh_result->b_size is count in get_more_blocks according to write
+        * "pos" and "end", we need map twice to return different buffer state:
+        * 1. area in file size, not set NEW;
+        * 2. area out file size, set  NEW.
+        *
+        *                 iblock    endblk
+        * |--------|---------|---------|---------
+        * |<-------area in file------->|
+        */
+
+       if ((iblock <= endblk) &&
+           ((iblock + ((len - 1) >> i_blkbits)) > endblk))
+               len = (endblk - iblock + 1) << i_blkbits;
+
        mlog(0, "get block of %lu at %llu:%u req %u\n",
                        inode->i_ino, pos, len, total_len);
 
@@ -2229,6 +2254,9 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
        if (desc->c_needs_zero)
                set_buffer_new(bh_result);
 
+       if (iblock > endblk)
+               set_buffer_new(bh_result);
+
        /* May sleep in end_io. It should not happen in a irq context. So defer
         * it to dio work queue. */
        set_buffer_defer_completion(bh_result);
index 429e6a8..eaf042f 100644 (file)
@@ -231,14 +231,6 @@ static int blockcheck_u64_get(void *data, u64 *val)
 }
 DEFINE_SIMPLE_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n");
 
-static struct dentry *blockcheck_debugfs_create(const char *name,
-                                               struct dentry *parent,
-                                               u64 *value)
-{
-       return debugfs_create_file(name, S_IFREG | S_IRUSR, parent, value,
-                                  &blockcheck_fops);
-}
-
 static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
 {
        if (stats) {
@@ -250,16 +242,20 @@ static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
 static void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
                                           struct dentry *parent)
 {
-       stats->b_debug_dir = debugfs_create_dir("blockcheck", parent);
+       struct dentry *dir;
+
+       dir = debugfs_create_dir("blockcheck", parent);
+       stats->b_debug_dir = dir;
+
+       debugfs_create_file("blocks_checked", S_IFREG | S_IRUSR, dir,
+                           &stats->b_check_count, &blockcheck_fops);
 
-       blockcheck_debugfs_create("blocks_checked", stats->b_debug_dir,
-                                 &stats->b_check_count);
+       debugfs_create_file("checksums_failed", S_IFREG | S_IRUSR, dir,
+                           &stats->b_failure_count, &blockcheck_fops);
 
-       blockcheck_debugfs_create("checksums_failed", stats->b_debug_dir,
-                                 &stats->b_failure_count);
+       debugfs_create_file("ecc_recoveries", S_IFREG | S_IRUSR, dir,
+                           &stats->b_recover_count, &blockcheck_fops);
 
-       blockcheck_debugfs_create("ecc_recoveries", stats->b_debug_dir,
-                                 &stats->b_recover_count);
 }
 #else
 static inline void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
index f1b6133..a368350 100644 (file)
@@ -225,10 +225,6 @@ struct o2hb_region {
        unsigned int            hr_region_num;
 
        struct dentry           *hr_debug_dir;
-       struct dentry           *hr_debug_livenodes;
-       struct dentry           *hr_debug_regnum;
-       struct dentry           *hr_debug_elapsed_time;
-       struct dentry           *hr_debug_pinned;
        struct o2hb_debug_buf   *hr_db_livenodes;
        struct o2hb_debug_buf   *hr_db_regnum;
        struct o2hb_debug_buf   *hr_db_elapsed_time;
@@ -1394,21 +1390,20 @@ void o2hb_exit(void)
        kfree(o2hb_db_failedregions);
 }
 
-static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
-                                       struct o2hb_debug_buf **db, int db_len,
-                                       int type, int size, int len, void *data)
+static void o2hb_debug_create(const char *name, struct dentry *dir,
+                             struct o2hb_debug_buf **db, int db_len, int type,
+                             int size, int len, void *data)
 {
        *db = kmalloc(db_len, GFP_KERNEL);
        if (!*db)
-               return NULL;
+               return;
 
        (*db)->db_type = type;
        (*db)->db_size = size;
        (*db)->db_len = len;
        (*db)->db_data = data;
 
-       return debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db,
-                                  &o2hb_debug_fops);
+       debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db, &o2hb_debug_fops);
 }
 
 static void o2hb_debug_init(void)
@@ -1525,11 +1520,7 @@ static void o2hb_region_release(struct config_item *item)
 
        kfree(reg->hr_slots);
 
-       debugfs_remove(reg->hr_debug_livenodes);
-       debugfs_remove(reg->hr_debug_regnum);
-       debugfs_remove(reg->hr_debug_elapsed_time);
-       debugfs_remove(reg->hr_debug_pinned);
-       debugfs_remove(reg->hr_debug_dir);
+       debugfs_remove_recursive(reg->hr_debug_dir);
        kfree(reg->hr_db_livenodes);
        kfree(reg->hr_db_regnum);
        kfree(reg->hr_db_elapsed_time);
@@ -1988,69 +1979,33 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group
                : NULL;
 }
 
-static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
+static void o2hb_debug_region_init(struct o2hb_region *reg,
+                                  struct dentry *parent)
 {
-       int ret = -ENOMEM;
+       struct dentry *dir;
 
-       reg->hr_debug_dir =
-               debugfs_create_dir(config_item_name(&reg->hr_item), dir);
-       if (!reg->hr_debug_dir) {
-               mlog_errno(ret);
-               goto bail;
-       }
+       dir = debugfs_create_dir(config_item_name(&reg->hr_item), parent);
+       reg->hr_debug_dir = dir;
 
-       reg->hr_debug_livenodes =
-                       o2hb_debug_create(O2HB_DEBUG_LIVENODES,
-                                         reg->hr_debug_dir,
-                                         &(reg->hr_db_livenodes),
-                                         sizeof(*(reg->hr_db_livenodes)),
-                                         O2HB_DB_TYPE_REGION_LIVENODES,
-                                         sizeof(reg->hr_live_node_bitmap),
-                                         O2NM_MAX_NODES, reg);
-       if (!reg->hr_debug_livenodes) {
-               mlog_errno(ret);
-               goto bail;
-       }
+       o2hb_debug_create(O2HB_DEBUG_LIVENODES, dir, &(reg->hr_db_livenodes),
+                         sizeof(*(reg->hr_db_livenodes)),
+                         O2HB_DB_TYPE_REGION_LIVENODES,
+                         sizeof(reg->hr_live_node_bitmap), O2NM_MAX_NODES,
+                         reg);
 
-       reg->hr_debug_regnum =
-                       o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER,
-                                         reg->hr_debug_dir,
-                                         &(reg->hr_db_regnum),
-                                         sizeof(*(reg->hr_db_regnum)),
-                                         O2HB_DB_TYPE_REGION_NUMBER,
-                                         0, O2NM_MAX_NODES, reg);
-       if (!reg->hr_debug_regnum) {
-               mlog_errno(ret);
-               goto bail;
-       }
+       o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER, dir, &(reg->hr_db_regnum),
+                         sizeof(*(reg->hr_db_regnum)),
+                         O2HB_DB_TYPE_REGION_NUMBER, 0, O2NM_MAX_NODES, reg);
 
-       reg->hr_debug_elapsed_time =
-                       o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME,
-                                         reg->hr_debug_dir,
-                                         &(reg->hr_db_elapsed_time),
-                                         sizeof(*(reg->hr_db_elapsed_time)),
-                                         O2HB_DB_TYPE_REGION_ELAPSED_TIME,
-                                         0, 0, reg);
-       if (!reg->hr_debug_elapsed_time) {
-               mlog_errno(ret);
-               goto bail;
-       }
+       o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME, dir,
+                         &(reg->hr_db_elapsed_time),
+                         sizeof(*(reg->hr_db_elapsed_time)),
+                         O2HB_DB_TYPE_REGION_ELAPSED_TIME, 0, 0, reg);
 
-       reg->hr_debug_pinned =
-                       o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
-                                         reg->hr_debug_dir,
-                                         &(reg->hr_db_pinned),
-                                         sizeof(*(reg->hr_db_pinned)),
-                                         O2HB_DB_TYPE_REGION_PINNED,
-                                         0, 0, reg);
-       if (!reg->hr_debug_pinned) {
-               mlog_errno(ret);
-               goto bail;
-       }
+       o2hb_debug_create(O2HB_DEBUG_REGION_PINNED, dir, &(reg->hr_db_pinned),
+                         sizeof(*(reg->hr_db_pinned)),
+                         O2HB_DB_TYPE_REGION_PINNED, 0, 0, reg);
 
-       ret = 0;
-bail:
-       return ret;
 }
 
 static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group,
@@ -2106,11 +2061,7 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
        if (ret)
                goto unregister_handler;
 
-       ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
-       if (ret) {
-               config_item_put(&reg->hr_item);
-               goto unregister_handler;
-       }
+       o2hb_debug_region_init(reg, o2hb_debug_dir);
 
        return &reg->hr_item;
 
index 784426d..bdef72c 100644 (file)
@@ -3636,7 +3636,7 @@ static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
        int i, j, num_used;
        u32 major_hash;
        struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf;
-       struct ocfs2_dx_entry_list *orig_list, *new_list, *tmp_list;
+       struct ocfs2_dx_entry_list *orig_list, *tmp_list;
        struct ocfs2_dx_entry *dx_entry;
 
        tmp_list = &tmp_dx_leaf->dl_list;
@@ -3645,7 +3645,6 @@ static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
                orig_dx_leaf = (struct ocfs2_dx_leaf *) orig_dx_leaves[i]->b_data;
                orig_list = &orig_dx_leaf->dl_list;
                new_dx_leaf = (struct ocfs2_dx_leaf *) new_dx_leaves[i]->b_data;
-               new_list = &new_dx_leaf->dl_list;
 
                num_used = le16_to_cpu(orig_list->de_num_used);
 
index 69a429b..aaf2454 100644 (file)
@@ -142,7 +142,6 @@ struct dlm_ctxt
        atomic_t res_tot_count;
        atomic_t res_cur_count;
 
-       struct dlm_debug_ctxt *dlm_debug_ctxt;
        struct dentry *dlm_debugfs_subroot;
 
        /* NOTE: Next three are protected by dlm_domain_lock */
index a4b58ba..4d0b452 100644 (file)
@@ -853,67 +853,34 @@ static const struct file_operations debug_state_fops = {
 /* files in subroot */
 void dlm_debug_init(struct dlm_ctxt *dlm)
 {
-       struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
-
        /* for dumping dlm_ctxt */
-       dc->debug_state_dentry = debugfs_create_file(DLM_DEBUGFS_DLM_STATE,
-                                                    S_IFREG|S_IRUSR,
-                                                    dlm->dlm_debugfs_subroot,
-                                                    dlm, &debug_state_fops);
+       debugfs_create_file(DLM_DEBUGFS_DLM_STATE, S_IFREG|S_IRUSR,
+                           dlm->dlm_debugfs_subroot, dlm, &debug_state_fops);
 
        /* for dumping lockres */
-       dc->debug_lockres_dentry =
-                       debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE,
-                                           S_IFREG|S_IRUSR,
-                                           dlm->dlm_debugfs_subroot,
-                                           dlm, &debug_lockres_fops);
+       debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE, S_IFREG|S_IRUSR,
+                           dlm->dlm_debugfs_subroot, dlm, &debug_lockres_fops);
 
        /* for dumping mles */
-       dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE,
-                                                  S_IFREG|S_IRUSR,
-                                                  dlm->dlm_debugfs_subroot,
-                                                  dlm, &debug_mle_fops);
+       debugfs_create_file(DLM_DEBUGFS_MLE_STATE, S_IFREG|S_IRUSR,
+                           dlm->dlm_debugfs_subroot, dlm, &debug_mle_fops);
 
        /* for dumping lockres on the purge list */
-       dc->debug_purgelist_dentry =
-                       debugfs_create_file(DLM_DEBUGFS_PURGE_LIST,
-                                           S_IFREG|S_IRUSR,
-                                           dlm->dlm_debugfs_subroot,
-                                           dlm, &debug_purgelist_fops);
-}
-
-void dlm_debug_shutdown(struct dlm_ctxt *dlm)
-{
-       struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
-
-       if (dc) {
-               debugfs_remove(dc->debug_purgelist_dentry);
-               debugfs_remove(dc->debug_mle_dentry);
-               debugfs_remove(dc->debug_lockres_dentry);
-               debugfs_remove(dc->debug_state_dentry);
-               kfree(dc);
-               dc = NULL;
-       }
+       debugfs_create_file(DLM_DEBUGFS_PURGE_LIST, S_IFREG|S_IRUSR,
+                           dlm->dlm_debugfs_subroot, dlm,
+                           &debug_purgelist_fops);
 }
 
 /* subroot - domain dir */
-int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
+void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
 {
-       dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt),
-                                     GFP_KERNEL);
-       if (!dlm->dlm_debug_ctxt) {
-               mlog_errno(-ENOMEM);
-               return -ENOMEM;
-       }
-
        dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name,
                                                      dlm_debugfs_root);
-       return 0;
 }
 
 void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
 {
-       debugfs_remove(dlm->dlm_debugfs_subroot);
+       debugfs_remove_recursive(dlm->dlm_debugfs_subroot);
 }
 
 /* debugfs root */
index 7d0c7c9..f8fd868 100644 (file)
@@ -14,13 +14,6 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle);
 
 #ifdef CONFIG_DEBUG_FS
 
-struct dlm_debug_ctxt {
-       struct dentry *debug_state_dentry;
-       struct dentry *debug_lockres_dentry;
-       struct dentry *debug_mle_dentry;
-       struct dentry *debug_purgelist_dentry;
-};
-
 struct debug_lockres {
        int dl_len;
        char *dl_buf;
@@ -29,9 +22,8 @@ struct debug_lockres {
 };
 
 void dlm_debug_init(struct dlm_ctxt *dlm);
-void dlm_debug_shutdown(struct dlm_ctxt *dlm);
 
-int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm);
+void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm);
 void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm);
 
 void dlm_create_debugfs_root(void);
@@ -42,12 +34,8 @@ void dlm_destroy_debugfs_root(void);
 static inline void dlm_debug_init(struct dlm_ctxt *dlm)
 {
 }
-static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm)
-{
-}
-static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
+static inline void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
 {
-       return 0;
 }
 static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
 {
index 7338b5d..ee6f459 100644 (file)
@@ -387,7 +387,6 @@ static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm)
 static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm)
 {
        dlm_unregister_domain_handlers(dlm);
-       dlm_debug_shutdown(dlm);
        dlm_complete_thread(dlm);
        dlm_complete_recovery_thread(dlm);
        dlm_destroy_dlm_worker(dlm);
@@ -1938,7 +1937,6 @@ bail:
 
        if (status) {
                dlm_unregister_domain_handlers(dlm);
-               dlm_debug_shutdown(dlm);
                dlm_complete_thread(dlm);
                dlm_complete_recovery_thread(dlm);
                dlm_destroy_dlm_worker(dlm);
@@ -1992,9 +1990,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
        dlm->key = key;
        dlm->node_num = o2nm_this_node();
 
-       ret = dlm_create_debugfs_subroot(dlm);
-       if (ret < 0)
-               goto leave;
+       dlm_create_debugfs_subroot(dlm);
 
        spin_lock_init(&dlm->spinlock);
        spin_lock_init(&dlm->master_lock);
@@ -2056,6 +2052,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
        mlog(0, "context init: refcount %u\n",
                  kref_read(&dlm->dlm_refs));
 
+       ret = 0;
 leave:
        if (ret < 0 && dlm) {
                if (dlm->master_hash)
index e786577..3883633 100644 (file)
@@ -90,7 +90,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
        enum dlm_status status;
        int actions = 0;
        int in_use;
-        u8 owner;
+       u8 owner;
+       int recovery_wait = 0;
 
        mlog(0, "master_node = %d, valblk = %d\n", master_node,
             flags & LKM_VALBLK);
@@ -193,9 +194,12 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
                }
                if (flags & LKM_CANCEL)
                        lock->cancel_pending = 0;
-               else
-                       lock->unlock_pending = 0;
-
+               else {
+                       if (!lock->unlock_pending)
+                               recovery_wait = 1;
+                       else
+                               lock->unlock_pending = 0;
+               }
        }
 
        /* get an extra ref on lock.  if we are just switching
@@ -229,6 +233,17 @@ leave:
        spin_unlock(&res->spinlock);
        wake_up(&res->wq);
 
+       if (recovery_wait) {
+               spin_lock(&res->spinlock);
+               /* Unlock request will directly succeed after owner dies,
+                * and the lock is already removed from grant list. We have to
+                * wait for RECOVERING done or we miss the chance to purge it
+                * since the removement is much faster than RECOVERING proc.
+                */
+               __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_RECOVERING);
+               spin_unlock(&res->spinlock);
+       }
+
        /* let the caller's final dlm_lock_put handle the actual kfree */
        if (actions & DLM_UNLOCK_FREE_LOCK) {
                /* this should always be coupled with list removal */
index 1420723..6e774c5 100644 (file)
@@ -2508,9 +2508,7 @@ bail:
                        ocfs2_inode_unlock(inode, ex);
        }
 
-       if (local_bh)
-               brelse(local_bh);
-
+       brelse(local_bh);
        return status;
 }
 
@@ -2593,8 +2591,7 @@ int ocfs2_inode_lock_atime(struct inode *inode,
                *level = 1;
                if (ocfs2_should_update_atime(inode, vfsmnt))
                        ocfs2_update_inode_atime(inode, bh);
-               if (bh)
-                       brelse(bh);
+               brelse(bh);
        } else
                *level = 0;
 
@@ -3012,8 +3009,6 @@ struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
 
        kref_init(&dlm_debug->d_refcnt);
        INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
-       dlm_debug->d_locking_state = NULL;
-       dlm_debug->d_locking_filter = NULL;
        dlm_debug->d_filter_secs = 0;
 out:
        return dlm_debug;
@@ -3282,27 +3277,19 @@ static void ocfs2_dlm_init_debug(struct ocfs2_super *osb)
 {
        struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
 
-       dlm_debug->d_locking_state = debugfs_create_file("locking_state",
-                                                        S_IFREG|S_IRUSR,
-                                                        osb->osb_debug_root,
-                                                        osb,
-                                                        &ocfs2_dlm_debug_fops);
+       debugfs_create_file("locking_state", S_IFREG|S_IRUSR,
+                           osb->osb_debug_root, osb, &ocfs2_dlm_debug_fops);
 
-       dlm_debug->d_locking_filter = debugfs_create_u32("locking_filter",
-                                               0600,
-                                               osb->osb_debug_root,
-                                               &dlm_debug->d_filter_secs);
+       debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root,
+                          &dlm_debug->d_filter_secs);
 }
 
 static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
 {
        struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
 
-       if (dlm_debug) {
-               debugfs_remove(dlm_debug->d_locking_state);
-               debugfs_remove(dlm_debug->d_locking_filter);
+       if (dlm_debug)
                ocfs2_put_dlm_debug(dlm_debug);
-       }
 }
 
 int ocfs2_dlm_init(struct ocfs2_super *osb)
index e66a249..e3e2d1b 100644 (file)
@@ -590,8 +590,7 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
                        *extent_flags = rec->e_flags;
        }
 out:
-       if (eb_bh)
-               brelse(eb_bh);
+       brelse(eb_bh);
        return ret;
 }
 
index 4435df3..9876db5 100644 (file)
@@ -706,7 +706,9 @@ leave:
  * Thus, we need to explicitly order the zeroed pages.
  */
 static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
-                                               struct buffer_head *di_bh)
+                                                     struct buffer_head *di_bh,
+                                                     loff_t start_byte,
+                                                     loff_t length)
 {
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        handle_t *handle = NULL;
@@ -722,7 +724,7 @@ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
                goto out;
        }
 
-       ret = ocfs2_jbd2_file_inode(handle, inode);
+       ret = ocfs2_jbd2_inode_add_write(handle, inode, start_byte, length);
        if (ret < 0) {
                mlog_errno(ret);
                goto out;
@@ -761,7 +763,9 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
        BUG_ON(abs_to > (((u64)index + 1) << PAGE_SHIFT));
        BUG_ON(abs_from & (inode->i_blkbits - 1));
 
-       handle = ocfs2_zero_start_ordered_transaction(inode, di_bh);
+       handle = ocfs2_zero_start_ordered_transaction(inode, di_bh,
+                                                     abs_from,
+                                                     abs_to - abs_from);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
                goto out;
@@ -1226,6 +1230,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
                        transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid));
                        if (IS_ERR(transfer_to[USRQUOTA])) {
                                status = PTR_ERR(transfer_to[USRQUOTA]);
+                               transfer_to[USRQUOTA] = NULL;
                                goto bail_unlock;
                        }
                }
@@ -1235,6 +1240,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
                        transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid));
                        if (IS_ERR(transfer_to[GRPQUOTA])) {
                                status = PTR_ERR(transfer_to[GRPQUOTA]);
+                               transfer_to[GRPQUOTA] = NULL;
                                goto bail_unlock;
                        }
                }
@@ -2092,54 +2098,89 @@ static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos)
        return 0;
 }
 
-static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
-                                           struct file *file,
-                                           loff_t pos, size_t count,
-                                           int *meta_level)
+static int ocfs2_inode_lock_for_extent_tree(struct inode *inode,
+                                           struct buffer_head **di_bh,
+                                           int meta_level,
+                                           int overwrite_io,
+                                           int write_sem,
+                                           int wait)
 {
-       int ret;
-       struct buffer_head *di_bh = NULL;
-       u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
-       u32 clusters =
-               ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
+       int ret = 0;
 
-       ret = ocfs2_inode_lock(inode, &di_bh, 1);
-       if (ret) {
-               mlog_errno(ret);
+       if (wait)
+               ret = ocfs2_inode_lock(inode, NULL, meta_level);
+       else
+               ret = ocfs2_try_inode_lock(inode,
+                       overwrite_io ? NULL : di_bh, meta_level);
+       if (ret < 0)
                goto out;
+
+       if (wait) {
+               if (write_sem)
+                       down_write(&OCFS2_I(inode)->ip_alloc_sem);
+               else
+                       down_read(&OCFS2_I(inode)->ip_alloc_sem);
+       } else {
+               if (write_sem)
+                       ret = down_write_trylock(&OCFS2_I(inode)->ip_alloc_sem);
+               else
+                       ret = down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem);
+
+               if (!ret) {
+                       ret = -EAGAIN;
+                       goto out_unlock;
+               }
        }
 
-       *meta_level = 1;
+       return ret;
 
-       ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
-       if (ret)
-               mlog_errno(ret);
+out_unlock:
+       brelse(*di_bh);
+       ocfs2_inode_unlock(inode, meta_level);
 out:
-       brelse(di_bh);
        return ret;
 }
 
+static void ocfs2_inode_unlock_for_extent_tree(struct inode *inode,
+                                              struct buffer_head **di_bh,
+                                              int meta_level,
+                                              int write_sem)
+{
+       if (write_sem)
+               up_write(&OCFS2_I(inode)->ip_alloc_sem);
+       else
+               up_read(&OCFS2_I(inode)->ip_alloc_sem);
+
+       brelse(*di_bh);
+       *di_bh = NULL;
+
+       if (meta_level >= 0)
+               ocfs2_inode_unlock(inode, meta_level);
+}
+
 static int ocfs2_prepare_inode_for_write(struct file *file,
                                         loff_t pos, size_t count, int wait)
 {
        int ret = 0, meta_level = 0, overwrite_io = 0;
+       int write_sem = 0;
        struct dentry *dentry = file->f_path.dentry;
        struct inode *inode = d_inode(dentry);
        struct buffer_head *di_bh = NULL;
-       loff_t end;
+       u32 cpos;
+       u32 clusters;
 
        /*
         * We start with a read level meta lock and only jump to an ex
         * if we need to make modifications here.
         */
        for(;;) {
-               if (wait)
-                       ret = ocfs2_inode_lock(inode, NULL, meta_level);
-               else
-                       ret = ocfs2_try_inode_lock(inode,
-                               overwrite_io ? NULL : &di_bh, meta_level);
+               ret = ocfs2_inode_lock_for_extent_tree(inode,
+                                                      &di_bh,
+                                                      meta_level,
+                                                      overwrite_io,
+                                                      write_sem,
+                                                      wait);
                if (ret < 0) {
-                       meta_level = -1;
                        if (ret != -EAGAIN)
                                mlog_errno(ret);
                        goto out;
@@ -2151,15 +2192,8 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
                 */
                if (!wait && !overwrite_io) {
                        overwrite_io = 1;
-                       if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
-                               ret = -EAGAIN;
-                               goto out_unlock;
-                       }
 
                        ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
-                       brelse(di_bh);
-                       di_bh = NULL;
-                       up_read(&OCFS2_I(inode)->ip_alloc_sem);
                        if (ret < 0) {
                                if (ret != -EAGAIN)
                                        mlog_errno(ret);
@@ -2178,7 +2212,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
                 * set inode->i_size at the end of a write. */
                if (should_remove_suid(dentry)) {
                        if (meta_level == 0) {
-                               ocfs2_inode_unlock(inode, meta_level);
+                               ocfs2_inode_unlock_for_extent_tree(inode,
+                                                                  &di_bh,
+                                                                  meta_level,
+                                                                  write_sem);
                                meta_level = 1;
                                continue;
                        }
@@ -2190,22 +2227,34 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
                        }
                }
 
-               end = pos + count;
-
                ret = ocfs2_check_range_for_refcount(inode, pos, count);
                if (ret == 1) {
-                       ocfs2_inode_unlock(inode, meta_level);
-                       meta_level = -1;
-
-                       ret = ocfs2_prepare_inode_for_refcount(inode,
-                                                              file,
-                                                              pos,
-                                                              count,
-                                                              &meta_level);
+                       ocfs2_inode_unlock_for_extent_tree(inode,
+                                                          &di_bh,
+                                                          meta_level,
+                                                          write_sem);
+                       ret = ocfs2_inode_lock_for_extent_tree(inode,
+                                                              &di_bh,
+                                                              meta_level,
+                                                              overwrite_io,
+                                                              1,
+                                                              wait);
+                       write_sem = 1;
+                       if (ret < 0) {
+                               if (ret != -EAGAIN)
+                                       mlog_errno(ret);
+                               goto out;
+                       }
+
+                       cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+                       clusters =
+                               ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
+                       ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
                }
 
                if (ret < 0) {
-                       mlog_errno(ret);
+                       if (ret != -EAGAIN)
+                               mlog_errno(ret);
                        goto out_unlock;
                }
 
@@ -2216,10 +2265,10 @@ out_unlock:
        trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
                                            pos, count, wait);
 
-       brelse(di_bh);
-
-       if (meta_level >= 0)
-               ocfs2_inode_unlock(inode, meta_level);
+       ocfs2_inode_unlock_for_extent_tree(inode,
+                                          &di_bh,
+                                          meta_level,
+                                          write_sem);
 
 out:
        return ret;
index 7ad9d65..7c9dfd5 100644 (file)
@@ -534,7 +534,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
         */
        mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
                        !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
-                       "Inode %llu: system file state is ambigous\n",
+                       "Inode %llu: system file state is ambiguous\n",
                        (unsigned long long)args->fi_blkno);
 
        if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
index d6f7b29..efeea20 100644 (file)
@@ -283,7 +283,7 @@ static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
        if (inode_alloc)
                inode_lock(inode_alloc);
 
-       if (o2info_coherent(&fi->ifi_req)) {
+       if (inode_alloc && o2info_coherent(&fi->ifi_req)) {
                status = ocfs2_inode_lock(inode_alloc, &bh, 0);
                if (status < 0) {
                        mlog_errno(status);
index 930e3d3..699a560 100644 (file)
@@ -217,7 +217,8 @@ void ocfs2_recovery_exit(struct ocfs2_super *osb)
        /* At this point, we know that no more recovery threads can be
         * launched, so wait for any recovery completion work to
         * complete. */
-       flush_workqueue(osb->ocfs2_wq);
+       if (osb->ocfs2_wq)
+               flush_workqueue(osb->ocfs2_wq);
 
        /*
         * Now that recovery is shut down, and the osb is about to be
index c0fe6ed..3103ba7 100644 (file)
@@ -144,7 +144,6 @@ static inline void ocfs2_ci_set_new(struct ocfs2_super *osb,
 void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
 void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
 void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
-void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
 
 void ocfs2_complete_recovery(struct work_struct *work);
 void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
@@ -232,8 +231,8 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
  *                          ocfs2_journal_access_*() unless you intend to
  *                          manage the checksum by hand.
  *  ocfs2_journal_dirty    - Mark a journalled buffer as having dirty data.
- *  ocfs2_jbd2_file_inode  - Mark an inode so that its data goes out before
- *                           the current handle commits.
+ *  ocfs2_jbd2_inode_add_write  - Mark an inode with range so that its data goes
+ *                                out before the current handle commits.
  */
 
 /* You must always start_trans with a number of buffs > 0, but it's
@@ -441,7 +440,7 @@ static inline int ocfs2_mknod_credits(struct super_block *sb, int is_dir,
  * previous dirblock update in the free list */
 static inline int ocfs2_link_credits(struct super_block *sb)
 {
-       return 2*OCFS2_INODE_UPDATE_CREDITS + 4 +
+       return 2 * OCFS2_INODE_UPDATE_CREDITS + 4 +
               ocfs2_quota_trans_credits(sb);
 }
 
@@ -575,37 +574,12 @@ static inline int ocfs2_calc_bg_discontig_credits(struct super_block *sb)
        return ocfs2_extent_recs_per_gd(sb);
 }
 
-static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
-                                               unsigned int clusters_to_del,
-                                               struct ocfs2_dinode *fe,
-                                               struct ocfs2_extent_list *last_el)
+static inline int ocfs2_jbd2_inode_add_write(handle_t *handle, struct inode *inode,
+                                            loff_t start_byte, loff_t length)
 {
-       /* for dinode + all headers in this pass + update to next leaf */
-       u16 next_free = le16_to_cpu(last_el->l_next_free_rec);
-       u16 tree_depth = le16_to_cpu(fe->id2.i_list.l_tree_depth);
-       int credits = 1 + tree_depth + 1;
-       int i;
-
-       i = next_free - 1;
-       BUG_ON(i < 0);
-
-       /* We may be deleting metadata blocks, so metadata alloc dinode +
-          one desc. block for each possible delete. */
-       if (tree_depth && next_free == 1 &&
-           ocfs2_rec_clusters(last_el, &last_el->l_recs[i]) == clusters_to_del)
-               credits += 1 + tree_depth;
-
-       /* update to the truncate log. */
-       credits += OCFS2_TRUNCATE_LOG_UPDATE;
-
-       credits += ocfs2_quota_trans_credits(sb);
-
-       return credits;
-}
-
-static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
-{
-       return jbd2_journal_inode_add_write(handle, &OCFS2_I(inode)->ip_jinode);
+       return jbd2_journal_inode_ranged_write(handle,
+                                              &OCFS2_I(inode)->ip_jinode,
+                                              start_byte, length);
 }
 
 static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
index 158e5af..720e9f9 100644 (file)
@@ -377,7 +377,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
        struct ocfs2_dinode *alloc = NULL;
 
        cancel_delayed_work(&osb->la_enable_wq);
-       flush_workqueue(osb->ocfs2_wq);
+       if (osb->ocfs2_wq)
+               flush_workqueue(osb->ocfs2_wq);
 
        if (osb->local_alloc_state == OCFS2_LA_UNUSED)
                goto out;
index 6f8e1c4..8ea51cf 100644 (file)
@@ -2486,7 +2486,6 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
        struct inode *inode = NULL;
        struct inode *orphan_dir = NULL;
        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
-       struct ocfs2_dinode *di = NULL;
        handle_t *handle = NULL;
        char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
        struct buffer_head *parent_di_bh = NULL;
@@ -2552,7 +2551,6 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
                goto leave;
        }
 
-       di = (struct ocfs2_dinode *)new_di_bh->b_data;
        status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name,
                                  &orphan_insert, orphan_dir, false);
        if (status < 0) {
index fddbbd6..9150cfa 100644 (file)
@@ -223,8 +223,6 @@ struct ocfs2_orphan_scan {
 
 struct ocfs2_dlm_debug {
        struct kref d_refcnt;
-       struct dentry *d_locking_state;
-       struct dentry *d_locking_filter;
        u32 d_filter_secs;
        struct list_head d_lockres_tracking;
 };
@@ -401,7 +399,6 @@ struct ocfs2_super
        struct ocfs2_dlm_debug *osb_dlm_debug;
 
        struct dentry *osb_debug_root;
-       struct dentry *osb_ctxt;
 
        wait_queue_head_t recovery_event;
 
index 8b2f395..c81e86c 100644 (file)
@@ -1080,10 +1080,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
        osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
                                                 ocfs2_debugfs_root);
 
-       osb->osb_ctxt = debugfs_create_file("fs_state", S_IFREG|S_IRUSR,
-                                           osb->osb_debug_root,
-                                           osb,
-                                           &ocfs2_osb_debug_fops);
+       debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root,
+                           osb, &ocfs2_osb_debug_fops);
 
        if (ocfs2_meta_ecc(osb))
                ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats,
@@ -1861,8 +1859,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
        kset_unregister(osb->osb_dev_kset);
 
-       debugfs_remove(osb->osb_ctxt);
-
        /* Orphan scan should be stopped as early as possible */
        ocfs2_orphan_scan_stop(osb);
 
@@ -1918,7 +1914,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
                ocfs2_dlm_shutdown(osb, hangup_needed);
 
        ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);
-       debugfs_remove(osb->osb_debug_root);
+       debugfs_remove_recursive(osb->osb_debug_root);
 
        if (hangup_needed)
                ocfs2_cluster_hangup(osb->uuid_str, strlen(osb->uuid_str));
index 90c830e..d850797 100644 (file)
@@ -1490,18 +1490,6 @@ static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
        return loc->xl_ops->xlo_check_space(loc, xi);
 }
 
-static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
-{
-       loc->xl_ops->xlo_add_entry(loc, name_hash);
-       loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
-       /*
-        * We can't leave the new entry's xe_name_offset at zero or
-        * add_namevalue() will go nuts.  We set it to the size of our
-        * storage so that it can never be less than any other entry.
-        */
-       loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
-}
-
 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
                                   struct ocfs2_xattr_info *xi)
 {
@@ -2133,29 +2121,31 @@ static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
        if (rc)
                goto out;
 
-       if (loc->xl_entry) {
-               if (ocfs2_xa_can_reuse_entry(loc, xi)) {
-                       orig_value_size = loc->xl_entry->xe_value_size;
-                       rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
-                       if (rc)
-                               goto out;
-                       goto alloc_value;
-               }
+       if (!loc->xl_entry) {
+               rc = -EINVAL;
+               goto out;
+       }
 
-               if (!ocfs2_xattr_is_local(loc->xl_entry)) {
-                       orig_clusters = ocfs2_xa_value_clusters(loc);
-                       rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
-                       if (rc) {
-                               mlog_errno(rc);
-                               ocfs2_xa_cleanup_value_truncate(loc,
-                                                               "overwriting",
-                                                               orig_clusters);
-                               goto out;
-                       }
+       if (ocfs2_xa_can_reuse_entry(loc, xi)) {
+               orig_value_size = loc->xl_entry->xe_value_size;
+               rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
+               if (rc)
+                       goto out;
+               goto alloc_value;
+       }
+
+       if (!ocfs2_xattr_is_local(loc->xl_entry)) {
+               orig_clusters = ocfs2_xa_value_clusters(loc);
+               rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
+               if (rc) {
+                       mlog_errno(rc);
+                       ocfs2_xa_cleanup_value_truncate(loc,
+                                                       "overwriting",
+                                                       orig_clusters);
+                       goto out;
                }
-               ocfs2_xa_wipe_namevalue(loc);
-       } else
-               ocfs2_xa_add_entry(loc, name_hash);
+       }
+       ocfs2_xa_wipe_namevalue(loc);
 
        /*
         * If we get here, we have a blank entry.  Fill it.  We grow our
index a59abe3..b62f5c0 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -776,7 +776,7 @@ static int do_dentry_open(struct file *f,
                f->f_mode |= FMODE_ATOMIC_POS;
 
        f->f_op = fops_get(inode->i_fop);
-       if (unlikely(WARN_ON(!f->f_op))) {
+       if (WARN_ON(!f->f_op)) {
                error = -ENODEV;
                goto cleanup_all;
        }
@@ -818,6 +818,14 @@ static int do_dentry_open(struct file *f,
                if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
                        return -EINVAL;
        }
+
+       /*
+        * XXX: Huge page cache doesn't support writing yet. Drop all page
+        * cache for this file before processing writes.
+        */
+       if ((f->f_mode & FMODE_WRITE) && filemap_nr_thps(inode->i_mapping))
+               truncate_pagecache(inode, 0);
+
        return 0;
 
 cleanup_all:
index f583448..e2ed8e0 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/ioport.h>
 #include <linux/memory.h>
 #include <linux/sched/task.h>
+#include <linux/security.h>
 #include <asm/sections.h>
 #include "internal.h"
 
@@ -545,9 +546,14 @@ out:
 
 static int open_kcore(struct inode *inode, struct file *filp)
 {
+       int ret = security_locked_down(LOCKDOWN_KCORE);
+
        if (!capable(CAP_SYS_RAWIO))
                return -EPERM;
 
+       if (ret)
+               return ret;
+
        filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
        if (!filp->private_data)
                return -ENOMEM;
index 465ea01..8c1f1bb 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/mmzone.h>
 #include <linux/proc_fs.h>
 #include <linux/percpu.h>
-#include <linux/quicklist.h>
 #include <linux/seq_file.h>
 #include <linux/swap.h>
 #include <linux/vmstat.h>
@@ -106,9 +105,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                   global_zone_page_state(NR_KERNEL_STACK_KB));
        show_val_kb(m, "PageTables:     ",
                    global_zone_page_state(NR_PAGETABLE));
-#ifdef CONFIG_QUICKLIST
-       show_val_kb(m, "Quicklists:     ", quicklist_total_size());
-#endif
 
        show_val_kb(m, "NFS_Unstable:   ",
                    global_node_page_state(NR_UNSTABLE_NFS));
@@ -136,6 +132,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                    global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
        show_val_kb(m, "ShmemPmdMapped: ",
                    global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
+       show_val_kb(m, "FileHugePages:  ",
+                   global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR);
+       show_val_kb(m, "FilePmdMapped:  ",
+                   global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR);
 #endif
 
 #ifdef CONFIG_CMA
index 544d1ee..7c952ee 100644 (file)
@@ -42,10 +42,12 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
                return -EINVAL;
 
        while (count > 0) {
-               if (pfn_valid(pfn))
-                       ppage = pfn_to_page(pfn);
-               else
-                       ppage = NULL;
+               /*
+                * TODO: ZONE_DEVICE support requires to identify
+                * memmaps that were actually initialized.
+                */
+               ppage = pfn_to_online_page(pfn);
+
                if (!ppage || PageSlab(ppage) || page_has_type(ppage))
                        pcount = 0;
                else
@@ -216,10 +218,11 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
                return -EINVAL;
 
        while (count > 0) {
-               if (pfn_valid(pfn))
-                       ppage = pfn_to_page(pfn);
-               else
-                       ppage = NULL;
+               /*
+                * TODO: ZONE_DEVICE support requires to identify
+                * memmaps that were actually initialized.
+                */
+               ppage = pfn_to_online_page(pfn);
 
                if (put_user(stable_page_flags(ppage), out)) {
                        ret = -EFAULT;
@@ -261,10 +264,11 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
                return -EINVAL;
 
        while (count > 0) {
-               if (pfn_valid(pfn))
-                       ppage = pfn_to_page(pfn);
-               else
-                       ppage = NULL;
+               /*
+                * TODO: ZONE_DEVICE support requires to identify
+                * memmaps that were actually initialized.
+                */
+               ppage = pfn_to_online_page(pfn);
 
                if (ppage)
                        ino = page_cgroup_ino(ppage);
index bf43d1d..9442631 100644 (file)
@@ -417,6 +417,7 @@ struct mem_size_stats {
        unsigned long lazyfree;
        unsigned long anonymous_thp;
        unsigned long shmem_thp;
+       unsigned long file_thp;
        unsigned long swap;
        unsigned long shared_hugetlb;
        unsigned long private_hugetlb;
@@ -461,7 +462,7 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
 static void smaps_account(struct mem_size_stats *mss, struct page *page,
                bool compound, bool young, bool dirty, bool locked)
 {
-       int i, nr = compound ? 1 << compound_order(page) : 1;
+       int i, nr = compound ? compound_nr(page) : 1;
        unsigned long size = nr * PAGE_SIZE;
 
        /*
@@ -588,7 +589,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
        else if (is_zone_device_page(page))
                /* pass */;
        else
-               VM_BUG_ON_PAGE(1, page);
+               mss->file_thp += HPAGE_PMD_SIZE;
        smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
 }
 #else
@@ -809,6 +810,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
        SEQ_PUT_DEC(" kB\nLazyFree:       ", mss->lazyfree);
        SEQ_PUT_DEC(" kB\nAnonHugePages:  ", mss->anonymous_thp);
        SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+       SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp);
        SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
        seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
                                  mss->private_hugetlb >> 10, 7);
index e16fb8f..273ee82 100644 (file)
@@ -88,7 +88,7 @@ static inline void mangle(struct seq_file *m, const char *s)
 static void show_type(struct seq_file *m, struct super_block *sb)
 {
        mangle(m, sb->s_type->name);
-       if (sb->s_subtype && sb->s_subtype[0]) {
+       if (sb->s_subtype) {
                seq_putc(m, '.');
                mangle(m, sb->s_subtype);
        }
index 2f6a453..d26d5ea 100644 (file)
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
 #include <linux/compat.h>
-
 #include <linux/uaccess.h>
 
+#include <asm/unaligned.h>
+
+/*
+ * Note the "unsafe_put_user() semantics: we goto a
+ * label for errors.
+ */
+#define unsafe_copy_dirent_name(_dst, _src, _len, label) do {  \
+       char __user *dst = (_dst);                              \
+       const char *src = (_src);                               \
+       size_t len = (_len);                                    \
+       unsafe_put_user(0, dst+len, label);                     \
+       unsafe_copy_to_user(dst, src, len, label);              \
+} while (0)
+
+
 int iterate_dir(struct file *file, struct dir_context *ctx)
 {
        struct inode *inode = file_inode(file);
@@ -64,6 +78,40 @@ out:
 }
 EXPORT_SYMBOL(iterate_dir);
 
+/*
+ * POSIX says that a dirent name cannot contain NULL or a '/'.
+ *
+ * It's not 100% clear what we should really do in this case.
+ * The filesystem is clearly corrupted, but returning a hard
+ * error means that you now don't see any of the other names
+ * either, so that isn't a perfect alternative.
+ *
+ * And if you return an error, what error do you use? Several
+ * filesystems seem to have decided on EUCLEAN being the error
+ * code for EFSCORRUPTED, and that may be the error to use. Or
+ * just EIO, which is perhaps more obvious to users.
+ *
+ * In order to see the other file names in the directory, the
+ * caller might want to make this a "soft" error: skip the
+ * entry, and return the error at the end instead.
+ *
+ * Note that this should likely do a "memchr(name, 0, len)"
+ * check too, since that would be filesystem corruption as
+ * well. However, that case can't actually confuse user space,
+ * which has to do a strlen() on the name anyway to find the
+ * filename length, and the above "soft error" worry means
+ * that it's probably better left alone until we have that
+ * issue clarified.
+ */
+static int verify_dirent_name(const char *name, int len)
+{
+       if (!len)
+               return -EIO;
+       if (memchr(name, '/', len))
+               return -EIO;
+       return 0;
+}
+
 /*
  * Traditional linux readdir() handling..
  *
@@ -173,6 +221,9 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
        int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
                sizeof(long));
 
+       buf->error = verify_dirent_name(name, namlen);
+       if (unlikely(buf->error))
+               return buf->error;
        buf->error = -EINVAL;   /* only used if we fail.. */
        if (reclen > buf->count)
                return -EINVAL;
@@ -182,28 +233,31 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
                return -EOVERFLOW;
        }
        dirent = buf->previous;
-       if (dirent) {
-               if (signal_pending(current))
-                       return -EINTR;
-               if (__put_user(offset, &dirent->d_off))
-                       goto efault;
-       }
-       dirent = buf->current_dir;
-       if (__put_user(d_ino, &dirent->d_ino))
-               goto efault;
-       if (__put_user(reclen, &dirent->d_reclen))
-               goto efault;
-       if (copy_to_user(dirent->d_name, name, namlen))
-               goto efault;
-       if (__put_user(0, dirent->d_name + namlen))
-               goto efault;
-       if (__put_user(d_type, (char __user *) dirent + reclen - 1))
+       if (dirent && signal_pending(current))
+               return -EINTR;
+
+       /*
+        * Note! This range-checks 'previous' (which may be NULL).
+        * The real range was checked in getdents
+        */
+       if (!user_access_begin(dirent, sizeof(*dirent)))
                goto efault;
+       if (dirent)
+               unsafe_put_user(offset, &dirent->d_off, efault_end);
+       dirent = buf->current_dir;
+       unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
+       unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
+       unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
+       unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
+       user_access_end();
+
        buf->previous = dirent;
        dirent = (void __user *)dirent + reclen;
        buf->current_dir = dirent;
        buf->count -= reclen;
        return 0;
+efault_end:
+       user_access_end();
 efault:
        buf->error = -EFAULT;
        return -EFAULT;
@@ -259,34 +313,38 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
        int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
                sizeof(u64));
 
+       buf->error = verify_dirent_name(name, namlen);
+       if (unlikely(buf->error))
+               return buf->error;
        buf->error = -EINVAL;   /* only used if we fail.. */
        if (reclen > buf->count)
                return -EINVAL;
        dirent = buf->previous;
-       if (dirent) {
-               if (signal_pending(current))
-                       return -EINTR;
-               if (__put_user(offset, &dirent->d_off))
-                       goto efault;
-       }
-       dirent = buf->current_dir;
-       if (__put_user(ino, &dirent->d_ino))
-               goto efault;
-       if (__put_user(0, &dirent->d_off))
-               goto efault;
-       if (__put_user(reclen, &dirent->d_reclen))
-               goto efault;
-       if (__put_user(d_type, &dirent->d_type))
-               goto efault;
-       if (copy_to_user(dirent->d_name, name, namlen))
-               goto efault;
-       if (__put_user(0, dirent->d_name + namlen))
+       if (dirent && signal_pending(current))
+               return -EINTR;
+
+       /*
+        * Note! This range-checks 'previous' (which may be NULL).
+        * The real range was checked in getdents
+        */
+       if (!user_access_begin(dirent, sizeof(*dirent)))
                goto efault;
+       if (dirent)
+               unsafe_put_user(offset, &dirent->d_off, efault_end);
+       dirent = buf->current_dir;
+       unsafe_put_user(ino, &dirent->d_ino, efault_end);
+       unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
+       unsafe_put_user(d_type, &dirent->d_type, efault_end);
+       unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
+       user_access_end();
+
        buf->previous = dirent;
        dirent = (void __user *)dirent + reclen;
        buf->current_dir = dirent;
        buf->count -= reclen;
        return 0;
+efault_end:
+       user_access_end();
 efault:
        buf->error = -EFAULT;
        return -EFAULT;
index 9c02d96..4075e41 100644 (file)
@@ -239,10 +239,8 @@ static int balance_leaf_when_delete_left(struct tree_balance *tb)
 static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
 {
        struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
-       int item_pos = PATH_LAST_POSITION(tb->tb_path);
        struct buffer_info bi;
        int n;
-       struct item_head *ih;
 
        RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
               "vs- 12000: level: wrong FR %z", tb->FR[0]);
@@ -251,7 +249,6 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
        RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
               "PAP-12010: tree can not be empty");
 
-       ih = item_head(tbS0, item_pos);
        buffer_info_init_tbS0(tb, &bi);
 
        /* Delete or truncate the item */
@@ -298,7 +295,6 @@ static unsigned int balance_leaf_insert_left(struct tree_balance *tb,
        if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
                /* part of new item falls into L[0] */
                int new_item_len, shift;
-               int version;
 
                ret = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
 
@@ -317,8 +313,6 @@ static unsigned int balance_leaf_insert_left(struct tree_balance *tb,
                leaf_insert_into_buf(&bi, n + tb->item_pos - ret, ih, body,
                             min_t(int, tb->zeroes_num, ih_item_len(ih)));
 
-               version = ih_version(ih);
-
                /*
                 * Calculate key component, item length and body to
                 * insert into S[0]
@@ -632,7 +626,6 @@ static void balance_leaf_insert_right(struct tree_balance *tb,
        struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
        int n = B_NR_ITEMS(tbS0);
        struct buffer_info bi;
-       int ret;
 
        /* new item or part of it doesn't fall into R[0] */
        if (n - tb->rnum[0] >= tb->item_pos) {
@@ -646,13 +639,11 @@ static void balance_leaf_insert_right(struct tree_balance *tb,
        if (tb->item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) {
                loff_t old_key_comp, old_len, r_zeroes_number;
                const char *r_body;
-               int version, shift;
+               int shift;
                loff_t offset;
 
                leaf_shift_right(tb, tb->rnum[0] - 1, -1);
 
-               version = ih_version(ih);
-
                /* Remember key component and item length */
                old_key_comp = le_ih_k_offset(ih);
                old_len = ih_item_len(ih);
@@ -698,7 +689,7 @@ static void balance_leaf_insert_right(struct tree_balance *tb,
                /* whole new item falls into R[0] */
 
                /* Shift rnum[0]-1 items to R[0] */
-               ret = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
+               leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
 
                /* Insert new item into R[0] */
                buffer_info_init_right(tb, &bi);
@@ -950,14 +941,12 @@ static void balance_leaf_new_nodes_insert(struct tree_balance *tb,
        if (tb->item_pos == n - tb->snum[i] + 1 && tb->sbytes[i] != -1) {
                int old_key_comp, old_len, r_zeroes_number;
                const char *r_body;
-               int version;
 
                /* Move snum[i]-1 items from S[0] to S_new[i] */
                leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i] - 1, -1,
                                tb->S_new[i]);
 
                /* Remember key component and item length */
-               version = ih_version(ih);
                old_key_comp = le_ih_k_offset(ih);
                old_len = ih_item_len(ih);
 
index 6b0ddb2..1170922 100644 (file)
@@ -376,7 +376,6 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
                       int to, int to_bytes, short *snum012, int flow)
 {
        int i;
-       int cur_free;
        int units;
        struct virtual_node *vn = tb->tb_vn;
        int total_node_size, max_node_size, current_item_size;
@@ -438,7 +437,6 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
        /* leaf level */
        needed_nodes = 1;
        total_node_size = 0;
-       cur_free = max_node_size;
 
        /* start from 'from'-th item */
        start_item = from;
@@ -1734,14 +1732,12 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
         * and Fh is its father.
         */
        struct buffer_head *Sh, *Fh;
-       int maxsize, ret;
+       int ret;
        int lfree, rfree /* free space in L and R */ ;
 
        Sh = PATH_H_PBUFFER(tb->tb_path, h);
        Fh = PATH_H_PPARENT(tb->tb_path, h);
 
-       maxsize = MAX_CHILD_SIZE(Sh);
-
        /*
         * using tb->insert_size[h], which is negative in this case,
         * create_virtual_node calculates:
index 4517a13..4b3e3e7 100644 (file)
@@ -891,7 +891,6 @@ static int flush_older_commits(struct super_block *s,
        struct list_head *entry;
        unsigned int trans_id = jl->j_trans_id;
        unsigned int other_trans_id;
-       unsigned int first_trans_id;
 
 find_first:
        /*
@@ -914,8 +913,6 @@ find_first:
                return 0;
        }
 
-       first_trans_id = first_jl->j_trans_id;
-
        entry = &first_jl->j_list;
        while (1) {
                other_jl = JOURNAL_LIST_ENTRY(entry);
@@ -1351,7 +1348,7 @@ static int flush_journal_list(struct super_block *s,
                              struct reiserfs_journal_list *jl, int flushall)
 {
        struct reiserfs_journal_list *pjl;
-       struct reiserfs_journal_cnode *cn, *last;
+       struct reiserfs_journal_cnode *cn;
        int count;
        int was_jwait = 0;
        int was_dirty = 0;
@@ -1509,7 +1506,6 @@ static int flush_journal_list(struct super_block *s,
                                         b_blocknr, __func__);
                }
 free_cnode:
-               last = cn;
                cn = cn->next;
                if (saved_bh) {
                        /*
@@ -1792,7 +1788,6 @@ static int flush_used_journal_lists(struct super_block *s,
 {
        unsigned long len = 0;
        unsigned long cur_len;
-       int ret;
        int i;
        int limit = 256;
        struct reiserfs_journal_list *tjl;
@@ -1829,9 +1824,9 @@ static int flush_used_journal_lists(struct super_block *s,
         * transactions, but only bother if we've actually spanned
         * across multiple lists
         */
-       if (flush_jl != jl) {
-               ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
-       }
+       if (flush_jl != jl)
+               kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
+
        flush_journal_list(s, flush_jl, 1);
        put_journal_list(s, flush_jl);
        put_journal_list(s, jl);
@@ -1911,7 +1906,6 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
                              struct super_block *sb, int error)
 {
        struct reiserfs_transaction_handle myth;
-       int flushed = 0;
        struct reiserfs_journal *journal = SB_JOURNAL(sb);
 
        /*
@@ -1933,7 +1927,6 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
                                                     1);
                        journal_mark_dirty(&myth, SB_BUFFER_WITH_SB(sb));
                        do_journal_end(&myth, FLUSH_ALL);
-                       flushed = 1;
                }
        }
 
@@ -3444,9 +3437,8 @@ static int remove_from_transaction(struct super_block *sb,
        if (cn == journal->j_last) {
                journal->j_last = cn->prev;
        }
-       if (bh)
-               remove_journal_hash(sb, journal->j_hash_table, NULL,
-                                   bh->b_blocknr, 0);
+       remove_journal_hash(sb, journal->j_hash_table, NULL,
+                           bh->b_blocknr, 0);
        clear_buffer_journaled(bh);     /* don't log this one */
 
        if (!already_cleaned) {
@@ -3988,7 +3980,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, int flags)
        struct buffer_head *c_bh;       /* commit bh */
        struct buffer_head *d_bh;       /* desc bh */
        int cur_write_start = 0;        /* start index of current log write */
-       int old_start;
        int i;
        int flush;
        int wait_on_commit;
@@ -4245,7 +4236,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, int flags)
        journal->j_num_work_lists++;
 
        /* reset journal values for the next transaction */
-       old_start = journal->j_start;
        journal->j_start =
            (journal->j_start + journal->j_len +
             2) % SB_ONDISK_JOURNAL_SIZE(sb);
index f5cebd7..7f86856 100644 (file)
@@ -1322,7 +1322,7 @@ void leaf_paste_entries(struct buffer_info *bi,
        char *item;
        struct reiserfs_de_head *deh;
        char *insert_point;
-       int i, old_entry_num;
+       int i;
        struct buffer_head *bh = bi->bi_bh;
 
        if (new_entry_count == 0)
@@ -1362,7 +1362,6 @@ void leaf_paste_entries(struct buffer_info *bi,
                put_deh_location(&deh[i],
                                 deh_location(&deh[i]) + paste_size);
 
-       old_entry_num = ih_entry_count(ih);
        put_ih_entry_count(ih, ih_entry_count(ih) + new_entry_count);
 
        /* prepare space for pasted records */
index 415d66c..34baf5c 100644 (file)
@@ -183,13 +183,12 @@ int reiserfs_convert_objectid_map_v1(struct super_block *s)
        int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2;
        int old_max = sb_oid_maxsize(disk_sb);
        struct reiserfs_super_block_v1 *disk_sb_v1;
-       __le32 *objectid_map, *new_objectid_map;
+       __le32 *objectid_map;
        int i;
 
        disk_sb_v1 =
            (struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data);
        objectid_map = (__le32 *) (disk_sb_v1 + 1);
-       new_objectid_map = (__le32 *) (disk_sb + 1);
 
        if (cur_size > new_size) {
                /*
index 9fed1c0..500f200 100644 (file)
@@ -746,9 +746,6 @@ static void check_leaf_block_head(struct buffer_head *bh)
 
 static void check_internal_block_head(struct buffer_head *bh)
 {
-       struct block_head *blkh;
-
-       blkh = B_BLK_HEAD(bh);
        if (!(B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL(bh) <= MAX_HEIGHT))
                reiserfs_panic(NULL, "vs-6025", "invalid level %z", bh);
 
index 0037aea..da9ebe3 100644 (file)
@@ -593,7 +593,6 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key,
        struct buffer_head *bh;
        struct path_element *last_element;
        int node_level, retval;
-       int right_neighbor_of_leaf_node;
        int fs_gen;
        struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
        b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
@@ -614,8 +613,6 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key,
 
        pathrelse(search_path);
 
-       right_neighbor_of_leaf_node = 0;
-
        /*
         * With each iteration of this loop we search through the items in the
         * current node, and calculate the next current node(next path element)
@@ -701,7 +698,6 @@ io_error:
                         */
                        block_number = SB_ROOT_BLOCK(sb);
                        expected_level = -1;
-                       right_neighbor_of_leaf_node = 0;
 
                        /* repeat search from the root */
                        continue;
index eea7af6..2616424 100644 (file)
@@ -318,19 +318,10 @@ COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *,
 static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf)
 {
        struct compat_statfs64 buf;
-       if (sizeof(ubuf->f_bsize) == 4) {
-               if ((kbuf->f_type | kbuf->f_bsize | kbuf->f_namelen |
-                    kbuf->f_frsize | kbuf->f_flags) & 0xffffffff00000000ULL)
-                       return -EOVERFLOW;
-               /* f_files and f_ffree may be -1; it's okay
-                * to stuff that into 32 bits */
-               if (kbuf->f_files != 0xffffffffffffffffULL
-                && (kbuf->f_files & 0xffffffff00000000ULL))
-                       return -EOVERFLOW;
-               if (kbuf->f_ffree != 0xffffffffffffffffULL
-                && (kbuf->f_ffree & 0xffffffff00000000ULL))
-                       return -EOVERFLOW;
-       }
+
+       if ((kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
+               return -EOVERFLOW;
+
        memset(&buf, 0, sizeof(struct compat_statfs64));
        buf.f_type = kbuf->f_type;
        buf.f_bsize = kbuf->f_bsize;
index 8020974..cfadab2 100644 (file)
@@ -1300,6 +1300,7 @@ int get_tree_bdev(struct fs_context *fc,
        mutex_lock(&bdev->bd_fsfreeze_mutex);
        if (bdev->bd_fsfreeze_count > 0) {
                mutex_unlock(&bdev->bd_fsfreeze_mutex);
+               blkdev_put(bdev, mode);
                warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
                return -EBUSY;
        }
@@ -1308,8 +1309,10 @@ int get_tree_bdev(struct fs_context *fc,
        fc->sget_key = bdev;
        s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
        mutex_unlock(&bdev->bd_fsfreeze_mutex);
-       if (IS_ERR(s))
+       if (IS_ERR(s)) {
+               blkdev_put(bdev, mode);
                return PTR_ERR(s);
+       }
 
        if (s->s_root) {
                /* Don't summarily change the RO/RW state. */
@@ -1555,11 +1558,6 @@ int vfs_get_tree(struct fs_context *fc)
        sb = fc->root->d_sb;
        WARN_ON(!sb->s_bdi);
 
-       if (fc->subtype && !sb->s_subtype) {
-               sb->s_subtype = fc->subtype;
-               fc->subtype = NULL;
-       }
-
        /*
         * Write barrier is for super_cache_count(). We place it before setting
         * SB_BORN as the data dependency between the two functions is the
index eeeae04..0caa151 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/namei.h>
 #include <linux/tracefs.h>
 #include <linux/fsnotify.h>
+#include <linux/security.h>
 #include <linux/seq_file.h>
 #include <linux/parser.h>
 #include <linux/magic.h>
@@ -390,6 +391,9 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
        struct dentry *dentry;
        struct inode *inode;
 
+       if (security_locked_down(LOCKDOWN_TRACEFS))
+               return NULL;
+
        if (!(mode & S_IFMT))
                mode |= S_IFREG;
        BUG_ON(!S_ISREG(mode));
index fe6d804..f9fd186 100644 (file)
@@ -1272,21 +1272,23 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
 }
 
 static __always_inline int validate_range(struct mm_struct *mm,
-                                         __u64 start, __u64 len)
+                                         __u64 *start, __u64 len)
 {
        __u64 task_size = mm->task_size;
 
-       if (start & ~PAGE_MASK)
+       *start = untagged_addr(*start);
+
+       if (*start & ~PAGE_MASK)
                return -EINVAL;
        if (len & ~PAGE_MASK)
                return -EINVAL;
        if (!len)
                return -EINVAL;
-       if (start < mmap_min_addr)
+       if (*start < mmap_min_addr)
                return -EINVAL;
-       if (start >= task_size)
+       if (*start >= task_size)
                return -EINVAL;
-       if (len > task_size - start)
+       if (len > task_size - *start)
                return -EINVAL;
        return 0;
 }
@@ -1336,7 +1338,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
                goto out;
        }
 
-       ret = validate_range(mm, uffdio_register.range.start,
+       ret = validate_range(mm, &uffdio_register.range.start,
                             uffdio_register.range.len);
        if (ret)
                goto out;
@@ -1525,7 +1527,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
        if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
                goto out;
 
-       ret = validate_range(mm, uffdio_unregister.start,
+       ret = validate_range(mm, &uffdio_unregister.start,
                             uffdio_unregister.len);
        if (ret)
                goto out;
@@ -1676,7 +1678,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx,
        if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake)))
                goto out;
 
-       ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
+       ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len);
        if (ret)
                goto out;
 
@@ -1716,7 +1718,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
                           sizeof(uffdio_copy)-sizeof(__s64)))
                goto out;
 
-       ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
+       ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len);
        if (ret)
                goto out;
        /*
@@ -1772,7 +1774,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
                           sizeof(uffdio_zeropage)-sizeof(__s64)))
                goto out;
 
-       ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
+       ret = validate_range(ctx->mm, &uffdio_zeropage.range.start,
                             uffdio_zeropage.range.len);
        if (ret)
                goto out;
index 5de296b..14fbdf2 100644 (file)
@@ -28,12 +28,11 @@ xfs_get_aghdr_buf(
        struct xfs_mount        *mp,
        xfs_daddr_t             blkno,
        size_t                  numblks,
-       int                     flags,
        const struct xfs_buf_ops *ops)
 {
        struct xfs_buf          *bp;
 
-       bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags);
+       bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0);
        if (!bp)
                return NULL;
 
@@ -345,7 +344,7 @@ xfs_ag_init_hdr(
 {
        struct xfs_buf          *bp;
 
-       bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, 0, ops);
+       bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, ops);
        if (!bp)
                return -ENOMEM;
 
index 58fa85c..d6ed5d2 100644 (file)
@@ -81,9 +81,10 @@ typedef struct xfs_alloc_arg {
 /*
  * Defines for datatype
  */
-#define XFS_ALLOC_INITIAL_USER_DATA    (1 << 0)/* special case start of file */
-#define XFS_ALLOC_USERDATA_ZERO                (1 << 1)/* zero extent on allocation */
-#define XFS_ALLOC_NOBUSY               (1 << 2)/* Busy extents not allowed */
+#define XFS_ALLOC_USERDATA             (1 << 0)/* allocation is for user data*/
+#define XFS_ALLOC_INITIAL_USER_DATA    (1 << 1)/* special case start of file */
+#define XFS_ALLOC_USERDATA_ZERO                (1 << 2)/* zero extent on allocation */
+#define XFS_ALLOC_NOBUSY               (1 << 3)/* Busy extents not allowed */
 
 static inline bool
 xfs_alloc_is_userdata(int datatype)
index b9f0196..f0089e8 100644 (file)
@@ -826,32 +826,17 @@ xfs_attr_shortform_to_leaf(
        sf = (xfs_attr_shortform_t *)tmpbuffer;
 
        xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
-       xfs_bmap_local_to_extents_empty(dp, XFS_ATTR_FORK);
+       xfs_bmap_local_to_extents_empty(args->trans, dp, XFS_ATTR_FORK);
 
        bp = NULL;
        error = xfs_da_grow_inode(args, &blkno);
-       if (error) {
-               /*
-                * If we hit an IO error middle of the transaction inside
-                * grow_inode(), we may have inconsistent data. Bail out.
-                */
-               if (error == -EIO)
-                       goto out;
-               xfs_idata_realloc(dp, size, XFS_ATTR_FORK);     /* try to put */
-               memcpy(ifp->if_u1.if_data, tmpbuffer, size);    /* it back */
+       if (error)
                goto out;
-       }
 
        ASSERT(blkno == 0);
        error = xfs_attr3_leaf_create(args, blkno, &bp);
-       if (error) {
-               /* xfs_attr3_leaf_create may not have instantiated a block */
-               if (bp && (xfs_da_shrink_inode(args, 0, bp) != 0))
-                       goto out;
-               xfs_idata_realloc(dp, size, XFS_ATTR_FORK);     /* try to put */
-               memcpy(ifp->if_u1.if_data, tmpbuffer, size);    /* it back */
+       if (error)
                goto out;
-       }
 
        memset((char *)&nargs, 0, sizeof(nargs));
        nargs.dp = dp;
index 054b4ce..02469d5 100644 (file)
@@ -792,6 +792,7 @@ out_root_realloc:
  */
 void
 xfs_bmap_local_to_extents_empty(
+       struct xfs_trans        *tp,
        struct xfs_inode        *ip,
        int                     whichfork)
 {
@@ -808,6 +809,7 @@ xfs_bmap_local_to_extents_empty(
        ifp->if_u1.if_root = NULL;
        ifp->if_height = 0;
        XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 }
 
 
@@ -840,7 +842,7 @@ xfs_bmap_local_to_extents(
        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
 
        if (!ifp->if_bytes) {
-               xfs_bmap_local_to_extents_empty(ip, whichfork);
+               xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
                flags = XFS_ILOG_CORE;
                goto done;
        }
@@ -887,7 +889,7 @@ xfs_bmap_local_to_extents(
 
        /* account for the change in fork size */
        xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
-       xfs_bmap_local_to_extents_empty(ip, whichfork);
+       xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
        flags |= XFS_ILOG_CORE;
 
        ifp->if_u1.if_root = NULL;
@@ -4042,8 +4044,12 @@ xfs_bmapi_allocate(
         */
        if (!(bma->flags & XFS_BMAPI_METADATA)) {
                bma->datatype = XFS_ALLOC_NOBUSY;
-               if (whichfork == XFS_DATA_FORK && bma->offset == 0)
-                       bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
+               if (whichfork == XFS_DATA_FORK) {
+                       if (bma->offset == 0)
+                               bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
+                       else
+                               bma->datatype |= XFS_ALLOC_USERDATA;
+               }
                if (bma->flags & XFS_BMAPI_ZERO)
                        bma->datatype |= XFS_ALLOC_USERDATA_ZERO;
        }
@@ -5621,6 +5627,11 @@ xfs_bmse_merge(
        if (error)
                return error;
 
+       /* change to extent format if required after extent removal */
+       error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
+       if (error)
+               return error;
+
 done:
        xfs_iext_remove(ip, icur, 0);
        xfs_iext_prev(XFS_IFORK_PTR(ip, whichfork), icur);
index 5bb446d..e2798c6 100644 (file)
@@ -182,7 +182,8 @@ void        xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
                xfs_filblks_t len);
 int    xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
 int    xfs_bmap_set_attrforkoff(struct xfs_inode *ip, int size, int *version);
-void   xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
+void   xfs_bmap_local_to_extents_empty(struct xfs_trans *tp,
+               struct xfs_inode *ip, int whichfork);
 void   __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno,
                xfs_filblks_t len, const struct xfs_owner_info *oinfo,
                bool skip_discard);
index 9595ced..49e4bc3 100644 (file)
@@ -1096,7 +1096,7 @@ xfs_dir2_sf_to_block(
        memcpy(sfp, oldsfp, ifp->if_bytes);
 
        xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK);
-       xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK);
+       xfs_bmap_local_to_extents_empty(tp, dp, XFS_DATA_FORK);
        dp->i_d.di_size = 0;
 
        /*
index 39dd2b9..e9371a8 100644 (file)
@@ -366,11 +366,11 @@ struct xfs_bulkstat {
        uint64_t        bs_blocks;      /* number of blocks             */
        uint64_t        bs_xflags;      /* extended flags               */
 
-       uint64_t        bs_atime;       /* access time, seconds         */
-       uint64_t        bs_mtime;       /* modify time, seconds         */
+       int64_t         bs_atime;       /* access time, seconds         */
+       int64_t         bs_mtime;       /* modify time, seconds         */
 
-       uint64_t        bs_ctime;       /* inode change time, seconds   */
-       uint64_t        bs_btime;       /* creation time, seconds       */
+       int64_t         bs_ctime;       /* inode change time, seconds   */
+       int64_t         bs_btime;       /* creation time, seconds       */
 
        uint32_t        bs_gen;         /* generation count             */
        uint32_t        bs_uid;         /* user id                      */
index a08dd8f..ac6cdca 100644 (file)
@@ -928,7 +928,7 @@ xfs_log_sb(
 
        xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
-       xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb));
+       xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1);
 }
 
 /*
index a43d181..5533e48 100644 (file)
@@ -97,7 +97,6 @@ xchk_allocbt_rec(
        xfs_agnumber_t          agno = bs->cur->bc_private.a.agno;
        xfs_agblock_t           bno;
        xfs_extlen_t            len;
-       int                     error = 0;
 
        bno = be32_to_cpu(rec->alloc.ar_startblock);
        len = be32_to_cpu(rec->alloc.ar_blockcount);
@@ -109,7 +108,7 @@ xchk_allocbt_rec(
 
        xchk_allocbt_xref(bs->sc, bno, len);
 
-       return error;
+       return 0;
 }
 
 /* Scrub the freespace btrees for some AG. */
index 93b3793..0cab11a 100644 (file)
@@ -341,7 +341,6 @@ xchk_refcountbt_rec(
        xfs_extlen_t            len;
        xfs_nlink_t             refcount;
        bool                    has_cowflag;
-       int                     error = 0;
 
        bno = be32_to_cpu(rec->refc.rc_startblock);
        len = be32_to_cpu(rec->refc.rc_blockcount);
@@ -366,7 +365,7 @@ xchk_refcountbt_rec(
 
        xchk_refcountbt_xref(bs->sc, bno, len, refcount);
 
-       return error;
+       return 0;
 }
 
 /* Make sure we have as many refc blocks as the rmap says. */
index 0910cb7..4f44370 100644 (file)
@@ -864,6 +864,7 @@ xfs_alloc_file_space(
        xfs_filblks_t           allocatesize_fsb;
        xfs_extlen_t            extsz, temp;
        xfs_fileoff_t           startoffset_fsb;
+       xfs_fileoff_t           endoffset_fsb;
        int                     nimaps;
        int                     quota_flag;
        int                     rt;
@@ -891,7 +892,8 @@ xfs_alloc_file_space(
        imapp = &imaps[0];
        nimaps = 1;
        startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
-       allocatesize_fsb = XFS_B_TO_FSB(mp, count);
+       endoffset_fsb = XFS_B_TO_FSB(mp, offset + count);
+       allocatesize_fsb = endoffset_fsb - startoffset_fsb;
 
        /*
         * Allocate file space until done or until there is an error
index 120ef99..0abba17 100644 (file)
@@ -345,6 +345,15 @@ xfs_buf_allocate_memory(
        unsigned short          page_count, i;
        xfs_off_t               start, end;
        int                     error;
+       xfs_km_flags_t          kmflag_mask = 0;
+
+       /*
+        * assure zeroed buffer for non-read cases.
+        */
+       if (!(flags & XBF_READ)) {
+               kmflag_mask |= KM_ZERO;
+               gfp_mask |= __GFP_ZERO;
+       }
 
        /*
         * for buffers that are contained within a single page, just allocate
@@ -354,7 +363,8 @@ xfs_buf_allocate_memory(
        size = BBTOB(bp->b_length);
        if (size < PAGE_SIZE) {
                int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
-               bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS);
+               bp->b_addr = kmem_alloc_io(size, align_mask,
+                                          KM_NOFS | kmflag_mask);
                if (!bp->b_addr) {
                        /* low memory - use alloc_page loop instead */
                        goto use_alloc_page;
@@ -2097,7 +2107,7 @@ xfs_verify_magic(
        int                     idx;
 
        idx = xfs_sb_version_hascrc(&mp->m_sb);
-       if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx])))
+       if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))
                return false;
        return dmagic == bp->b_ops->magic[idx];
 }
@@ -2115,7 +2125,7 @@ xfs_verify_magic16(
        int                     idx;
 
        idx = xfs_sb_version_hascrc(&mp->m_sb);
-       if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx])))
+       if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))
                return false;
        return dmagic == bp->b_ops->magic16[idx];
 }
index d952d59..1ffb179 100644 (file)
@@ -370,21 +370,23 @@ static int
 xfs_dio_write_end_io(
        struct kiocb            *iocb,
        ssize_t                 size,
+       int                     error,
        unsigned                flags)
 {
        struct inode            *inode = file_inode(iocb->ki_filp);
        struct xfs_inode        *ip = XFS_I(inode);
        loff_t                  offset = iocb->ki_pos;
        unsigned int            nofs_flag;
-       int                     error = 0;
 
        trace_xfs_end_io_direct_write(ip, offset, size);
 
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                return -EIO;
 
-       if (size <= 0)
-               return size;
+       if (error)
+               return error;
+       if (!size)
+               return 0;
 
        /*
         * Capture amount written on completion as we can't reliably account
@@ -441,6 +443,10 @@ out:
        return error;
 }
 
+static const struct iomap_dio_ops xfs_dio_write_ops = {
+       .end_io         = xfs_dio_write_end_io,
+};
+
 /*
  * xfs_file_dio_aio_write - handle direct IO writes
  *
@@ -541,7 +547,7 @@ xfs_file_dio_aio_write(
        }
 
        trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
-       ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
+       ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops);
 
        /*
         * If unaligned, this is the only IO in-flight. If it has not yet
index a2beee9..641d07f 100644 (file)
@@ -1443,7 +1443,7 @@ xlog_alloc_log(
                prev_iclog = iclog;
 
                iclog->ic_data = kmem_alloc_io(log->l_iclog_size, align_mask,
-                                               KM_MAYFAIL);
+                                               KM_MAYFAIL | KM_ZERO);
                if (!iclog->ic_data)
                        goto out_free_iclog;
 #ifdef DEBUG
index 5083190..c1a514f 100644 (file)
@@ -127,7 +127,7 @@ xlog_alloc_buffer(
        if (nbblks > 1 && log->l_sectBBsize > 1)
                nbblks += log->l_sectBBsize;
        nbblks = round_up(nbblks, log->l_sectBBsize);
-       return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL);
+       return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL | KM_ZERO);
 }
 
 /*
index ddd0bf7..f1bc88f 100644 (file)
@@ -63,19 +63,6 @@ static const struct sysfs_ops xfs_sysfs_ops = {
        .store = xfs_sysfs_object_store,
 };
 
-/*
- * xfs_mount kobject. The mp kobject also serves as the per-mount parent object
- * that is identified by the fsname under sysfs.
- */
-
-static inline struct xfs_mount *
-to_mp(struct kobject *kobject)
-{
-       struct xfs_kobj *kobj = to_kobj(kobject);
-
-       return container_of(kobj, struct xfs_mount, m_kobj);
-}
-
 static struct attribute *xfs_mp_attrs[] = {
        NULL,
 };
index 4ae65e1..ffba794 100644 (file)
@@ -312,7 +312,6 @@ header-test-                        += linux/mfd/as3711.h
 header-test-                   += linux/mfd/as3722.h
 header-test-                   += linux/mfd/da903x.h
 header-test-                   += linux/mfd/da9055/pdata.h
-header-test-                   += linux/mfd/da9063/pdata.h
 header-test-                   += linux/mfd/db8500-prcmu.h
 header-test-                   += linux/mfd/dbx500-prcmu.h
 header-test-                   += linux/mfd/dln2.h
index f936033..4780517 100644 (file)
@@ -232,8 +232,8 @@ struct acpi_processor {
        struct acpi_processor_limit limit;
        struct thermal_cooling_device *cdev;
        struct device *dev; /* Processor device. */
-       struct dev_pm_qos_request perflib_req;
-       struct dev_pm_qos_request thermal_req;
+       struct freq_qos_request perflib_req;
+       struct freq_qos_request thermal_req;
 };
 
 struct acpi_processor_errata {
@@ -302,8 +302,8 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx
 #ifdef CONFIG_CPU_FREQ
 extern bool acpi_processor_cpufreq_init;
 void acpi_processor_ignore_ppc_init(void);
-void acpi_processor_ppc_init(int cpu);
-void acpi_processor_ppc_exit(int cpu);
+void acpi_processor_ppc_init(struct cpufreq_policy *policy);
+void acpi_processor_ppc_exit(struct cpufreq_policy *policy);
 void acpi_processor_ppc_has_changed(struct acpi_processor *pr, int event_flag);
 extern int acpi_processor_get_bios_limit(int cpu, unsigned int *limit);
 #else
@@ -311,11 +311,11 @@ static inline void acpi_processor_ignore_ppc_init(void)
 {
        return;
 }
-static inline void acpi_processor_ppc_init(int cpu)
+static inline void acpi_processor_ppc_init(struct cpufreq_policy *policy)
 {
        return;
 }
-static inline void acpi_processor_ppc_exit(int cpu)
+static inline void acpi_processor_ppc_exit(struct cpufreq_policy *policy)
 {
        return;
 }
@@ -431,14 +431,14 @@ static inline int acpi_processor_hotplug(struct acpi_processor *pr)
 int acpi_processor_get_limit_info(struct acpi_processor *pr);
 extern const struct thermal_cooling_device_ops processor_cooling_ops;
 #if defined(CONFIG_ACPI_CPU_FREQ_PSS) & defined(CONFIG_CPU_FREQ)
-void acpi_thermal_cpufreq_init(int cpu);
-void acpi_thermal_cpufreq_exit(int cpu);
+void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy);
+void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy);
 #else
-static inline void acpi_thermal_cpufreq_init(int cpu)
+static inline void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy)
 {
        return;
 }
-static inline void acpi_thermal_cpufreq_exit(int cpu)
+static inline void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy)
 {
        return;
 }
index 7357a3c..384b5c8 100644 (file)
@@ -10,6 +10,7 @@
 #define BUGFLAG_WARNING                (1 << 0)
 #define BUGFLAG_ONCE           (1 << 1)
 #define BUGFLAG_DONE           (1 << 2)
+#define BUGFLAG_NO_CUT_HERE    (1 << 3)        /* CUT_HERE already sent */
 #define BUGFLAG_TAINT(taint)   ((taint) << 8)
 #define BUG_GET_TAINT(bug)     ((bug)->flags >> 8)
 #endif
@@ -61,18 +62,6 @@ struct bug_entry {
 #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)
 #endif
 
-#ifdef __WARN_FLAGS
-#define __WARN_TAINT(taint)            __WARN_FLAGS(BUGFLAG_TAINT(taint))
-#define __WARN_ONCE_TAINT(taint)       __WARN_FLAGS(BUGFLAG_ONCE|BUGFLAG_TAINT(taint))
-
-#define WARN_ON_ONCE(condition) ({                             \
-       int __ret_warn_on = !!(condition);                      \
-       if (unlikely(__ret_warn_on))                            \
-               __WARN_ONCE_TAINT(TAINT_WARN);                  \
-       unlikely(__ret_warn_on);                                \
-})
-#endif
-
 /*
  * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
  * significant kernel issues that need prompt attention if they should ever
@@ -89,27 +78,27 @@ struct bug_entry {
  *
  * Use the versions with printk format strings to provide better diagnostics.
  */
-#ifndef __WARN_TAINT
-extern __printf(3, 4)
-void warn_slowpath_fmt(const char *file, const int line,
-                      const char *fmt, ...);
+#ifndef __WARN_FLAGS
 extern __printf(4, 5)
-void warn_slowpath_fmt_taint(const char *file, const int line, unsigned taint,
-                            const char *fmt, ...);
-extern void warn_slowpath_null(const char *file, const int line);
-#define WANT_WARN_ON_SLOWPATH
-#define __WARN()               warn_slowpath_null(__FILE__, __LINE__)
-#define __WARN_printf(arg...)  warn_slowpath_fmt(__FILE__, __LINE__, arg)
-#define __WARN_printf_taint(taint, arg...)                             \
-       warn_slowpath_fmt_taint(__FILE__, __LINE__, taint, arg)
+void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
+                      const char *fmt, ...);
+#define __WARN()               __WARN_printf(TAINT_WARN, NULL)
+#define __WARN_printf(taint, arg...)                                   \
+       warn_slowpath_fmt(__FILE__, __LINE__, taint, arg)
 #else
 extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
-#define __WARN() do { \
-       printk(KERN_WARNING CUT_HERE); __WARN_TAINT(TAINT_WARN); \
-} while (0)
-#define __WARN_printf(arg...)  __WARN_printf_taint(TAINT_WARN, arg)
-#define __WARN_printf_taint(taint, arg...)                             \
-       do { __warn_printk(arg); __WARN_TAINT(taint); } while (0)
+#define __WARN()               __WARN_FLAGS(BUGFLAG_TAINT(TAINT_WARN))
+#define __WARN_printf(taint, arg...) do {                              \
+               __warn_printk(arg);                                     \
+               __WARN_FLAGS(BUGFLAG_NO_CUT_HERE | BUGFLAG_TAINT(taint));\
+       } while (0)
+#define WARN_ON_ONCE(condition) ({                             \
+       int __ret_warn_on = !!(condition);                      \
+       if (unlikely(__ret_warn_on))                            \
+               __WARN_FLAGS(BUGFLAG_ONCE |                     \
+                            BUGFLAG_TAINT(TAINT_WARN));        \
+       unlikely(__ret_warn_on);                                \
+})
 #endif
 
 /* used internally by panic.c */
@@ -132,7 +121,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 #define WARN(condition, format...) ({                                  \
        int __ret_warn_on = !!(condition);                              \
        if (unlikely(__ret_warn_on))                                    \
-               __WARN_printf(format);                                  \
+               __WARN_printf(TAINT_WARN, format);                      \
        unlikely(__ret_warn_on);                                        \
 })
 #endif
@@ -140,7 +129,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 #define WARN_TAINT(condition, taint, format...) ({                     \
        int __ret_warn_on = !!(condition);                              \
        if (unlikely(__ret_warn_on))                                    \
-               __WARN_printf_taint(taint, format);                     \
+               __WARN_printf(taint, format);                           \
        unlikely(__ret_warn_on);                                        \
 })
 
index 8476175..73f7421 100644 (file)
@@ -49,7 +49,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
  * @mm: the mm_struct of the current context
  * @gfp: GFP flags to use for the allocation
  *
- * Allocates a page and runs the pgtable_page_ctor().
+ * Allocates a page and runs the pgtable_pte_page_ctor().
  *
  * This function is intended for architectures that need
  * anything beyond simple page allocation or must have custom GFP flags.
@@ -63,7 +63,7 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp)
        pte = alloc_page(gfp);
        if (!pte)
                return NULL;
-       if (!pgtable_page_ctor(pte)) {
+       if (!pgtable_pte_page_ctor(pte)) {
                __free_page(pte);
                return NULL;
        }
@@ -76,7 +76,7 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp)
  * pte_alloc_one - allocate a page for PTE-level user page table
  * @mm: the mm_struct of the current context
  *
- * Allocates a page and runs the pgtable_page_ctor().
+ * Allocates a page and runs the pgtable_pte_page_ctor().
  *
  * Return: `struct page` initialized as page table or %NULL on error
  */
@@ -98,15 +98,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
  */
 static inline void pte_free(struct mm_struct *mm, struct page *pte_page)
 {
-       pgtable_page_dtor(pte_page);
+       pgtable_pte_page_dtor(pte_page);
        __free_page(pte_page);
 }
 
-#else /* CONFIG_MMU */
-
-/* This is enough for a nommu architecture */
-#define check_pgt_cache()          do { } while (0)
-
 #endif /* CONFIG_MMU */
 
 #endif /* __ASM_GENERIC_PGALLOC_H */
index 75d9d68..8186918 100644 (file)
@@ -1002,9 +1002,8 @@ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
  * need this). If THP is not enabled, the pmd can't go away under the
  * code even if MADV_DONTNEED runs, but if THP is enabled we need to
  * run a pmd_trans_unstable before walking the ptes after
- * split_huge_page_pmd returns (because it may have run when the pmd
- * become null, but then a page fault can map in a THP and not a
- * regular page).
+ * split_huge_pmd returns (because it may have run when the pmd become
+ * null, but then a page fault can map in a THP and not a regular page).
  */
 static inline int pmd_trans_unstable(pmd_t *pmd)
 {
@@ -1126,7 +1125,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 static inline void init_espfix_bsp(void) { }
 #endif
 
-extern void __init pgd_cache_init(void);
+extern void __init pgtable_cache_init(void);
 
 #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
 static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
index cd28f63..dae6460 100644 (file)
                        __start_lsm_info = .;                           \
                        KEEP(*(.lsm_info.init))                         \
                        __end_lsm_info = .;
+#define EARLY_LSM_TABLE()      . = ALIGN(8);                           \
+                       __start_early_lsm_info = .;                     \
+                       KEEP(*(.early_lsm_info.init))                   \
+                       __end_early_lsm_info = .;
 #else
 #define LSM_TABLE()
+#define EARLY_LSM_TABLE()
 #endif
 
 #define ___OF_TABLE(cfg, name) _OF_TABLE_##cfg(name)
        ACPI_PROBE_TABLE(timer)                                         \
        THERMAL_TABLE(governor)                                         \
        EARLYCON_TABLE()                                                \
-       LSM_TABLE()
+       LSM_TABLE()                                                     \
+       EARLY_LSM_TABLE()
 
 #define INIT_TEXT                                                      \
        *(.init.text .init.text.*)                                      \
index 96071be..38ec7f5 100644 (file)
@@ -9,6 +9,7 @@
 #define _CRYPTO_PKCS7_H
 
 #include <linux/verification.h>
+#include <linux/hash_info.h>
 #include <crypto/public_key.h>
 
 struct key;
@@ -40,4 +41,7 @@ extern int pkcs7_verify(struct pkcs7_message *pkcs7,
 extern int pkcs7_supply_detached_data(struct pkcs7_message *pkcs7,
                                      const void *data, size_t datalen);
 
+extern int pkcs7_get_digest(struct pkcs7_message *pkcs7, const u8 **buf,
+                           u32 *len, enum hash_algo *hash_algo);
+
 #endif /* _CRYPTO_PKCS7_H */
index 7d14c11..408b6f4 100644 (file)
@@ -285,12 +285,12 @@ struct drm_crtc_state {
        u32 target_vblank;
 
        /**
-        * @pageflip_flags:
+        * @async_flip:
         *
-        * DRM_MODE_PAGE_FLIP_* flags, as passed to the page flip ioctl.
-        * Zero in any other case.
+        * This is set when DRM_MODE_PAGE_FLIP_ASYNC is set in the legacy
+        * PAGE_FLIP IOCTL. It's not wired up for the atomic IOCTL itself yet.
         */
-       u32 pageflip_flags;
+       bool async_flip;
 
        /**
         * @vrr_enabled:
@@ -1108,7 +1108,7 @@ struct drm_crtc {
        /**
         * @self_refresh_data: Holds the state for the self refresh helpers
         *
-        * Initialized via drm_self_refresh_helper_register().
+        * Initialized via drm_self_refresh_helper_init().
         */
        struct drm_self_refresh_data *self_refresh_data;
 };
index 397a583..5b79d25 100644 (file)
@@ -12,9 +12,9 @@ struct drm_atomic_state;
 struct drm_crtc;
 
 void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state);
+void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
+                                             unsigned int commit_time_ms);
 
-int drm_self_refresh_helper_init(struct drm_crtc *crtc,
-                                unsigned int entry_delay_ms);
-
+int drm_self_refresh_helper_init(struct drm_crtc *crtc);
 void drm_self_refresh_helper_cleanup(struct drm_crtc *crtc);
 #endif
index 978cc23..8b4e516 100644 (file)
@@ -643,6 +643,12 @@ bool acpi_gtdt_c3stop(int type);
 int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count);
 #endif
 
+#ifndef ACPI_HAVE_ARCH_SET_ROOT_POINTER
+static inline void acpi_arch_set_root_pointer(u64 addr)
+{
+}
+#endif
+
 #ifndef ACPI_HAVE_ARCH_GET_ROOT_POINTER
 static inline u64 acpi_arch_get_root_pointer(void)
 {
index 0b58974..c7d6b2e 100644 (file)
@@ -46,6 +46,12 @@ enum backlight_notification {
        BACKLIGHT_UNREGISTERED,
 };
 
+enum backlight_scale {
+       BACKLIGHT_SCALE_UNKNOWN = 0,
+       BACKLIGHT_SCALE_LINEAR,
+       BACKLIGHT_SCALE_NON_LINEAR,
+};
+
 struct backlight_device;
 struct fb_info;
 
@@ -80,6 +86,8 @@ struct backlight_properties {
        enum backlight_type type;
        /* Flags used to signal drivers of state changes */
        unsigned int state;
+       /* Type of the brightness scale (linear, non-linear, ...) */
+       enum backlight_scale scale;
 
 #define BL_CORE_SUSPENDED      (1 << 0)        /* backlight is suspended */
 #define BL_CORE_FBBLANK                (1 << 1)        /* backlight is under an fb blank event */
index 90528f1..29fc933 100644 (file)
@@ -326,10 +326,11 @@ static inline int bitmap_equal(const unsigned long *src1,
 }
 
 /**
- * bitmap_or_equal - Check whether the or of two bitnaps is equal to a third
+ * bitmap_or_equal - Check whether the or of two bitmaps is equal to a third
  * @src1:      Pointer to bitmap 1
  * @src2:      Pointer to bitmap 2 will be or'ed with bitmap 1
  * @src3:      Pointer to bitmap 3. Compare to the result of *@src1 | *@src2
+ * @nbits:     number of bits in each of these bitmaps
  *
  * Returns: True if (*@src1 | *@src2) == *@src3, false otherwise
  */
index cf074bc..c94a9ff 100644 (file)
@@ -4,6 +4,13 @@
 #include <asm/types.h>
 #include <linux/bits.h>
 
+/* Set bits in the first 'n' bytes when loaded from memory */
+#ifdef __LITTLE_ENDIAN
+#  define aligned_byte_mask(n) ((1UL << 8*(n))-1)
+#else
+#  define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n)))
+#endif
+
 #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
 #define BITS_TO_LONGS(nr)      DIV_ROUND_UP(nr, BITS_PER_TYPE(long))
 
index d9db32f..f3ea78b 100644 (file)
@@ -1524,10 +1524,14 @@ struct blk_integrity_iter {
 };
 
 typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
+typedef void (integrity_prepare_fn) (struct request *);
+typedef void (integrity_complete_fn) (struct request *, unsigned int);
 
 struct blk_integrity_profile {
        integrity_processing_fn         *generate_fn;
        integrity_processing_fn         *verify_fn;
+       integrity_prepare_fn            *prepare_fn;
+       integrity_complete_fn           *complete_fn;
        const char                      *name;
 };
 
index 82156da..b9dbda1 100644 (file)
@@ -293,6 +293,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private);
 struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client);
 u64 ceph_client_gid(struct ceph_client *client);
 extern void ceph_destroy_client(struct ceph_client *client);
+extern void ceph_reset_client_addr(struct ceph_client *client);
 extern int __ceph_open_session(struct ceph_client *client,
                               unsigned long started);
 extern int ceph_open_session(struct ceph_client *client);
index 23895d1..c4458dc 100644 (file)
@@ -337,6 +337,7 @@ extern void ceph_msgr_flush(void);
 extern void ceph_messenger_init(struct ceph_messenger *msgr,
                                struct ceph_entity_addr *myaddr);
 extern void ceph_messenger_fini(struct ceph_messenger *msgr);
+extern void ceph_messenger_reset_nonce(struct ceph_messenger *msgr);
 
 extern void ceph_con_init(struct ceph_connection *con, void *private,
                        const struct ceph_connection_operations *ops,
index b4d134d..dbb8a69 100644 (file)
@@ -109,6 +109,7 @@ extern int ceph_monmap_contains(struct ceph_monmap *m,
 
 extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
 extern void ceph_monc_stop(struct ceph_mon_client *monc);
+extern void ceph_monc_reopen_session(struct ceph_mon_client *monc);
 
 enum {
        CEPH_SUB_MONMAP = 0,
index ad7fe5d..eaffbdd 100644 (file)
@@ -381,6 +381,7 @@ extern void ceph_osdc_cleanup(void);
 extern int ceph_osdc_init(struct ceph_osd_client *osdc,
                          struct ceph_client *client);
 extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
+extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
 
 extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
                                   struct ceph_msg *msg);
@@ -388,6 +389,7 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
                                 struct ceph_msg *msg);
 void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
 void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err);
+void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc);
 
 #define osd_req_op_data(oreq, whch, typ, fld)                          \
 ({                                                                     \
index 9569e7c..4b898cd 100644 (file)
@@ -129,11 +129,8 @@ static inline bool compaction_failed(enum compact_result result)
        return false;
 }
 
-/*
- * Compaction  has backed off for some reason. It might be throttling or
- * lock contention. Retrying is still worthwhile.
- */
-static inline bool compaction_withdrawn(enum compact_result result)
+/* Compaction needs reclaim to be performed first, so it can continue. */
+static inline bool compaction_needs_reclaim(enum compact_result result)
 {
        /*
         * Compaction backed off due to watermark checks for order-0
@@ -142,6 +139,16 @@ static inline bool compaction_withdrawn(enum compact_result result)
        if (result == COMPACT_SKIPPED)
                return true;
 
+       return false;
+}
+
+/*
+ * Compaction has backed off for some reason after doing some work or none
+ * at all. It might be throttling or lock contention. Retrying might be still
+ * worthwhile, but with a higher priority if allowed.
+ */
+static inline bool compaction_withdrawn(enum compact_result result)
+{
        /*
         * If compaction is deferred for high-order allocations, it is
         * because sync compaction recently failed. If this is the case
@@ -207,6 +214,11 @@ static inline bool compaction_failed(enum compact_result result)
        return false;
 }
 
+static inline bool compaction_needs_reclaim(enum compact_result result)
+{
+       return false;
+}
+
 static inline bool compaction_withdrawn(enum compact_result result)
 {
        return true;
index 6b318ef..cdf0165 100644 (file)
@@ -40,6 +40,7 @@
 # define __GCC4_has_attribute___noclone__             1
 # define __GCC4_has_attribute___nonstring__           0
 # define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8)
+# define __GCC4_has_attribute___fallthrough__         0
 #endif
 
 /*
 # define __noclone
 #endif
 
+/*
+ * Add the pseudo keyword 'fallthrough' so case statement blocks
+ * must end with any of these keywords:
+ *   break;
+ *   fallthrough;
+ *   goto <label>;
+ *   return [expression];
+ *
+ *  gcc: https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html#Statement-Attributes
+ */
+#if __has_attribute(__fallthrough__)
+# define fallthrough                    __attribute__((__fallthrough__))
+#else
+# define fallthrough                    do {} while (0)  /* fallthrough */
+#endif
+
 /*
  * Note the missing underscores.
  *
index 88dc0c6..d0633eb 100644 (file)
@@ -201,12 +201,14 @@ enum cpuhp_smt_control {
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
 extern void cpu_smt_check_topology(void);
+extern bool cpu_smt_possible(void);
 extern int cpuhp_smt_enable(void);
 extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval);
 #else
 # define cpu_smt_control               (CPU_SMT_NOT_IMPLEMENTED)
 static inline void cpu_smt_disable(bool force) { }
 static inline void cpu_smt_check_topology(void) { }
+static inline bool cpu_smt_possible(void) { return false; }
 static inline int cpuhp_smt_enable(void) { return 0; }
 static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
 #endif
index c57e88e..92d5fdc 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/completion.h>
 #include <linux/kobject.h>
 #include <linux/notifier.h>
+#include <linux/pm_qos.h>
 #include <linux/spinlock.h>
 #include <linux/sysfs.h>
 
@@ -76,8 +77,10 @@ struct cpufreq_policy {
        struct work_struct      update; /* if update_policy() needs to be
                                         * called, but you're in IRQ context */
 
-       struct dev_pm_qos_request *min_freq_req;
-       struct dev_pm_qos_request *max_freq_req;
+       struct freq_constraints constraints;
+       struct freq_qos_request *min_freq_req;
+       struct freq_qos_request *max_freq_req;
+
        struct cpufreq_frequency_table  *freq_table;
        enum cpufreq_table_sorting freq_table_sorted;
 
index b5a5a1e..78a73eb 100644 (file)
@@ -200,8 +200,8 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node)
        for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
 #define for_each_cpu_wrap(cpu, mask, start)    \
        for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
-#define for_each_cpu_and(cpu, mask, and)       \
-       for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
+#define for_each_cpu_and(cpu, mask1, mask2)    \
+       for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask1, (void)mask2)
 #else
 /**
  * cpumask_first - get the first cpu in a cpumask
@@ -290,20 +290,20 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool
 /**
  * for_each_cpu_and - iterate over every cpu in both masks
  * @cpu: the (optionally unsigned) integer iterator
- * @mask: the first cpumask pointer
- * @and: the second cpumask pointer
+ * @mask1: the first cpumask pointer
+ * @mask2: the second cpumask pointer
  *
  * This saves a temporary CPU mask in many places.  It is equivalent to:
  *     struct cpumask tmp;
- *     cpumask_and(&tmp, &mask, &and);
+ *     cpumask_and(&tmp, &mask1, &mask2);
  *     for_each_cpu(cpu, &tmp)
  *             ...
  *
  * After the loop, cpu is >= nr_cpu_ids.
  */
-#define for_each_cpu_and(cpu, mask, and)                               \
+#define for_each_cpu_and(cpu, mask1, mask2)                            \
        for ((cpu) = -1;                                                \
-               (cpu) = cpumask_next_and((cpu), (mask), (and)),         \
+               (cpu) = cpumask_next_and((cpu), (mask1), (mask2)),      \
                (cpu) < nr_cpu_ids;)
 #endif /* SMP */
 
index 79435cf..897e799 100644 (file)
@@ -31,6 +31,8 @@
 #define SJA1105_META_SMAC                      0x222222222222ull
 #define SJA1105_META_DMAC                      0x0180C200000Eull
 
+#define SJA1105_HWTS_RX_EN                     0
+
 /* Global tagger data: each struct sja1105_port has a reference to
  * the structure defined in struct sja1105_private.
  */
@@ -42,7 +44,7 @@ struct sja1105_tagger_data {
         * from taggers running on multiple ports on SMP systems
         */
        spinlock_t meta_lock;
-       bool hwts_rx_en;
+       unsigned long state;
 };
 
 struct sja1105_skb_cb {
index 6c80944..4cf02ec 100644 (file)
@@ -204,6 +204,12 @@ static inline int ddebug_dyndbg_module_param_cb(char *param, char *val,
        do { if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0)
 #define dynamic_dev_dbg(dev, fmt, ...)                                 \
        do { if (0) dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); } while (0)
+#define dynamic_hex_dump(prefix_str, prefix_type, rowsize,             \
+                        groupsize, buf, len, ascii)                    \
+       do { if (0)                                                     \
+               print_hex_dump(KERN_DEBUG, prefix_str, prefix_type,     \
+                               rowsize, groupsize, buf, len, ascii);   \
+       } while (0)
 #endif
 
 #endif
index bd38370..d87acf6 100644 (file)
@@ -1579,9 +1579,22 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
 efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
                                struct efi_boot_memmap *map);
 
+efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg,
+                                unsigned long size, unsigned long align,
+                                unsigned long *addr, unsigned long min);
+
+static inline
 efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
                           unsigned long size, unsigned long align,
-                          unsigned long *addr);
+                          unsigned long *addr)
+{
+       /*
+        * Don't allocate at 0x0. It will confuse code that
+        * checks pointers against NULL. Skip the first 8
+        * bytes so we start at a nice even number.
+        */
+       return efi_low_alloc_above(sys_table_arg, size, align, addr, 0x8);
+}
 
 efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg,
                            unsigned long size, unsigned long align,
@@ -1592,7 +1605,8 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
                                 unsigned long image_size,
                                 unsigned long alloc_size,
                                 unsigned long preferred_addr,
-                                unsigned long alignment);
+                                unsigned long alignment,
+                                unsigned long min_addr);
 
 efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
                                  efi_loaded_image_t *image,
index 95f55b7..941d075 100644 (file)
@@ -18,8 +18,6 @@ extern struct module __this_module;
 #define THIS_MODULE ((struct module *)0)
 #endif
 
-#define NS_SEPARATOR "."
-
 #ifdef CONFIG_MODVERSIONS
 /* Mark the CRC weak since genksyms apparently decides not to
  * generate a checksums for some symbols */
@@ -48,14 +46,14 @@ extern struct module __this_module;
  * absolute relocations that require runtime processing on relocatable
  * kernels.
  */
-#define __KSYMTAB_ENTRY_NS(sym, sec, ns)                               \
+#define __KSYMTAB_ENTRY_NS(sym, sec)                                   \
        __ADDRESSABLE(sym)                                              \
        asm("   .section \"___ksymtab" sec "+" #sym "\", \"a\"  \n"     \
            "   .balign 4                                       \n"     \
-           "__ksymtab_" #sym NS_SEPARATOR #ns ":               \n"     \
+           "__ksymtab_" #sym ":                                \n"     \
            "   .long   " #sym "- .                             \n"     \
            "   .long   __kstrtab_" #sym "- .                   \n"     \
-           "   .long   __kstrtab_ns_" #sym "- .                \n"     \
+           "   .long   __kstrtabns_" #sym "- .                 \n"     \
            "   .previous                                       \n")
 
 #define __KSYMTAB_ENTRY(sym, sec)                                      \
@@ -74,16 +72,14 @@ struct kernel_symbol {
        int namespace_offset;
 };
 #else
-#define __KSYMTAB_ENTRY_NS(sym, sec, ns)                               \
-       static const struct kernel_symbol __ksymtab_##sym##__##ns       \
-       asm("__ksymtab_" #sym NS_SEPARATOR #ns)                         \
+#define __KSYMTAB_ENTRY_NS(sym, sec)                                   \
+       static const struct kernel_symbol __ksymtab_##sym               \
        __attribute__((section("___ksymtab" sec "+" #sym), used))       \
        __aligned(sizeof(void *))                                       \
-       = { (unsigned long)&sym, __kstrtab_##sym, __kstrtab_ns_##sym }
+       = { (unsigned long)&sym, __kstrtab_##sym, __kstrtabns_##sym }
 
 #define __KSYMTAB_ENTRY(sym, sec)                                      \
        static const struct kernel_symbol __ksymtab_##sym               \
-       asm("__ksymtab_" #sym)                                          \
        __attribute__((section("___ksymtab" sec "+" #sym), used))       \
        __aligned(sizeof(void *))                                       \
        = { (unsigned long)&sym, __kstrtab_##sym, NULL }
@@ -112,10 +108,10 @@ struct kernel_symbol {
 /* For every exported symbol, place a struct in the __ksymtab section */
 #define ___EXPORT_SYMBOL_NS(sym, sec, ns)                              \
        ___export_symbol_common(sym, sec);                              \
-       static const char __kstrtab_ns_##sym[]                          \
+       static const char __kstrtabns_##sym[]                           \
        __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
        = #ns;                                                          \
-       __KSYMTAB_ENTRY_NS(sym, sec, ns)
+       __KSYMTAB_ENTRY_NS(sym, sec)
 
 #define ___EXPORT_SYMBOL(sym, sec)                                     \
        ___export_symbol_common(sym, sec);                              \
index 2ce5764..0367a75 100644 (file)
@@ -1099,7 +1099,6 @@ static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
 
 #endif /* CONFIG_BPF_JIT */
 
-void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp);
 void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);
 
 #define BPF_ANC                BIT(15)
index 866268c..e0d909d 100644 (file)
@@ -429,6 +429,7 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
  * @i_pages: Cached pages.
  * @gfp_mask: Memory allocation flags to use for allocating pages.
  * @i_mmap_writable: Number of VM_SHARED mappings.
+ * @nr_thps: Number of THPs in the pagecache (non-shmem only).
  * @i_mmap: Tree of private and shared mappings.
  * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
  * @nrpages: Number of page entries, protected by the i_pages lock.
@@ -446,6 +447,10 @@ struct address_space {
        struct xarray           i_pages;
        gfp_t                   gfp_mask;
        atomic_t                i_mmap_writable;
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       /* number of thp, only for non-shmem files */
+       atomic_t                nr_thps;
+#endif
        struct rb_root_cached   i_mmap;
        struct rw_semaphore     i_mmap_rwsem;
        unsigned long           nrpages;
@@ -1163,6 +1168,11 @@ extern void lease_get_mtime(struct inode *, struct timespec64 *time);
 extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
 extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
 extern int lease_modify(struct file_lock *, int, struct list_head *);
+
+struct notifier_block;
+extern int lease_register_notifier(struct notifier_block *);
+extern void lease_unregister_notifier(struct notifier_block *);
+
 struct files_struct;
 extern void show_fd_locks(struct seq_file *f,
                         struct file *filp, struct files_struct *files);
@@ -2798,6 +2808,33 @@ static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
        return errseq_sample(&mapping->wb_err);
 }
 
+static inline int filemap_nr_thps(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       return atomic_read(&mapping->nr_thps);
+#else
+       return 0;
+#endif
+}
+
+static inline void filemap_nr_thps_inc(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       atomic_inc(&mapping->nr_thps);
+#else
+       WARN_ON_ONCE(1);
+#endif
+}
+
+static inline void filemap_nr_thps_dec(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       atomic_dec(&mapping->nr_thps);
+#else
+       WARN_ON_ONCE(1);
+#endif
+}
+
 extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
                           int datasync);
 extern int vfs_fsync(struct file *file, int datasync);
index 0424df7..e5c14e2 100644 (file)
@@ -95,7 +95,6 @@ struct fs_context {
        const struct cred       *cred;          /* The mounter's credentials */
        struct fc_log           *log;           /* Logging buffer */
        const char              *source;        /* The source name (eg. dev path) */
-       const char              *subtype;       /* The subtype to set on the superblock */
        void                    *security;      /* Linux S&M options */
        void                    *s_fs_info;     /* Proposed s_fs_info */
        unsigned int            sb_flags;       /* Proposed superblock flags (SB_*) */
index 2de3b2d..1915bdb 100644 (file)
@@ -475,6 +475,8 @@ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 extern void fsnotify_detach_mark(struct fsnotify_mark *mark);
 /* free mark */
 extern void fsnotify_free_mark(struct fsnotify_mark *mark);
+/* Wait until all marks queued for destruction are destroyed */
+extern void fsnotify_wait_marks_destroyed(void);
 /* run all the marks in a group, and clear all of the marks attached to given object type */
 extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type);
 /* run all the marks in a group, and clear all of the vfsmount marks */
index f338816..61f2f6f 100644 (file)
@@ -325,6 +325,29 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
        return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
 }
 
+/**
+ * gfpflags_normal_context - is gfp_flags a normal sleepable context?
+ * @gfp_flags: gfp_flags to test
+ *
+ * Test whether @gfp_flags indicates that the allocation is from the
+ * %current context and allowed to sleep.
+ *
+ * An allocation being allowed to block doesn't mean it owns the %current
+ * context.  When direct reclaim path tries to allocate memory, the
+ * allocation context is nested inside whatever %current was doing at the
+ * time of the original allocation.  The nested allocation may be allowed
+ * to block but modifying anything %current owns can corrupt the outer
+ * context's expectations.
+ *
+ * %true result from this function indicates that the allocation context
+ * can sleep and use anything that's associated with %current.
+ */
+static inline bool gfpflags_normal_context(const gfp_t gfp_flags)
+{
+       return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) ==
+               __GFP_DIRECT_RECLAIM;
+}
+
 #ifdef CONFIG_HIGHMEM
 #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
 #else
@@ -510,18 +533,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
 }
 extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
                        struct vm_area_struct *vma, unsigned long addr,
-                       int node);
+                       int node, bool hugepage);
+#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
+       alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
 #else
 #define alloc_pages(gfp_mask, order) \
                alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
+       alloc_pages(gfp_mask, order)
+#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
        alloc_pages(gfp_mask, order)
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 #define alloc_page_vma(gfp_mask, vma, addr)                    \
-       alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
+       alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
 #define alloc_page_vma_node(gfp_mask, vma, addr, node)         \
-       alloc_pages_vma(gfp_mask, 0, vma, addr, node)
+       alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
 
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
index f8245d6..5dd9c98 100644 (file)
@@ -201,6 +201,14 @@ struct gpio_irq_chip {
         */
        bool threaded;
 
+       /**
+        * @init_hw: optional routine to initialize hardware before
+        * an IRQ chip will be added. This is quite useful when
+        * a particular driver wants to clear IRQ related registers
+        * in order to avoid undesired events.
+        */
+       int (*init_hw)(struct gpio_chip *chip);
+
        /**
         * @init_valid_mask: optional routine to initialize @valid_mask, to be
         * used if not all GPIO lines are valid interrupts. Sometimes some
index 45ede62..93d5cf0 100644 (file)
@@ -108,7 +108,12 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
 
        if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
                return true;
-
+       /*
+        * For dax vmas, try to always use hugepage mappings. If the kernel does
+        * not support hugepages, fsdax mappings will fallback to PAGE_SIZE
+        * mappings, and device-dax namespaces, that try to guarantee a given
+        * mapping size, will fail to enable
+        */
        if (vma_is_dax(vma))
                return true;
 
@@ -267,6 +272,15 @@ static inline bool thp_migration_supported(void)
        return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
 }
 
+static inline struct list_head *page_deferred_list(struct page *page)
+{
+       /*
+        * Global or memcg deferred list in the second tail pages is
+        * occupied by compound_head.
+        */
+       return &page[2].deferred_list;
+}
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
index edfca42..53fc34f 100644 (file)
@@ -454,7 +454,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 static inline struct hstate *page_hstate(struct page *page)
 {
        VM_BUG_ON_PAGE(!PageHuge(page), page);
-       return size_to_hstate(PAGE_SIZE << compound_order(page));
+       return size_to_hstate(page_size(page));
 }
 
 static inline unsigned hstate_index_to_shift(unsigned index)
index 04c36b7..7257916 100644 (file)
@@ -235,7 +235,7 @@ enum hwmon_power_attributes {
 #define HWMON_P_LABEL                  BIT(hwmon_power_label)
 #define HWMON_P_ALARM                  BIT(hwmon_power_alarm)
 #define HWMON_P_CAP_ALARM              BIT(hwmon_power_cap_alarm)
-#define HWMON_P_MIN_ALARM              BIT(hwmon_power_max_alarm)
+#define HWMON_P_MIN_ALARM              BIT(hwmon_power_min_alarm)
 #define HWMON_P_MAX_ALARM              BIT(hwmon_power_max_alarm)
 #define HWMON_P_LCRIT_ALARM            BIT(hwmon_power_lcrit_alarm)
 #define HWMON_P_CRIT_ALARM             BIT(hwmon_power_crit_alarm)
index 2afe6fd..b4a0170 100644 (file)
@@ -245,7 +245,10 @@ struct vmbus_channel_offer {
                } pipe;
        } u;
        /*
-        * The sub_channel_index is defined in win8.
+        * The sub_channel_index is defined in Win8: a value of zero means a
+        * primary channel and a value of non-zero means a sub-channel.
+        *
+        * Before Win8, the field is reserved, meaning it's always zero.
         */
        u16 sub_channel_index;
        u16 reserved3;
@@ -423,6 +426,9 @@ enum vmbus_channel_message_type {
        CHANNELMSG_COUNT
 };
 
+/* Hyper-V supports about 2048 channels, and the RELIDs start with 1. */
+#define INVALID_RELID  U32_MAX
+
 struct vmbus_channel_message_header {
        enum vmbus_channel_message_type msgtype;
        u32 padding;
@@ -934,6 +940,11 @@ static inline bool is_hvsock_channel(const struct vmbus_channel *c)
                  VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER);
 }
 
+static inline bool is_sub_channel(const struct vmbus_channel *c)
+{
+       return c->offermsg.offer.sub_channel_index != 0;
+}
+
 static inline void set_channel_affinity_state(struct vmbus_channel *c,
                                              enum hv_numa_policy policy)
 {
@@ -1149,6 +1160,9 @@ struct hv_driver {
        int (*remove)(struct hv_device *);
        void (*shutdown)(struct hv_device *);
 
+       int (*suspend)(struct hv_device *);
+       int (*resume)(struct hv_device *);
+
 };
 
 /* Base device object */
index c0a78c0..1361637 100644 (file)
@@ -473,7 +473,7 @@ extern struct i2c_client *
 devm_i2c_new_dummy_device(struct device *dev, struct i2c_adapter *adap, u16 address);
 
 extern struct i2c_client *
-i2c_new_secondary_device(struct i2c_client *client,
+i2c_new_ancillary_device(struct i2c_client *client,
                                const char *name,
                                u16 default_addr);
 
index 2e55e4c..a367ead 100644 (file)
@@ -29,7 +29,6 @@ struct macvlan_dev {
        netdev_features_t       set_features;
        enum macvlan_mode       mode;
        u16                     flags;
-       int                     nest_level;
        unsigned int            macaddr_count;
 #ifdef CONFIG_NET_POLL_CONTROLLER
        struct netpoll          *netpoll;
index 06faa06..ec7e4bd 100644 (file)
@@ -223,6 +223,7 @@ struct team {
                atomic_t count_pending;
                struct delayed_work dw;
        } mcast_rejoin;
+       struct lock_class_key team_lock_key;
        long mode_priv[TEAM_MODE_PRIV_LONGS];
 };
 
index 244278d..b05e855 100644 (file)
@@ -182,7 +182,6 @@ struct vlan_dev_priv {
 #ifdef CONFIG_NET_POLL_CONTROLLER
        struct netpoll                          *netpoll;
 #endif
-       unsigned int                            nest_level;
 };
 
 static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
@@ -221,11 +220,6 @@ extern void vlan_vids_del_by_dev(struct net_device *dev,
 
 extern bool vlan_uses_dev(const struct net_device *dev);
 
-static inline int vlan_get_encap_level(struct net_device *dev)
-{
-       BUG_ON(!is_vlan_dev(dev));
-       return vlan_dev_priv(dev)->nest_level;
-}
 #else
 static inline struct net_device *
 __vlan_find_dev_deep_rcu(struct net_device *real_dev,
@@ -295,11 +289,6 @@ static inline bool vlan_uses_dev(const struct net_device *dev)
 {
        return false;
 }
-static inline int vlan_get_encap_level(struct net_device *dev)
-{
-       BUG();
-       return 0;
-}
 #endif
 
 /**
index a20ad39..1c37f17 100644 (file)
@@ -131,4 +131,13 @@ static inline int ima_inode_removexattr(struct dentry *dentry,
        return 0;
 }
 #endif /* CONFIG_IMA_APPRAISE */
+
+#if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING)
+extern bool ima_appraise_signature(enum kernel_read_file_id func);
+#else
+static inline bool ima_appraise_signature(enum kernel_read_file_id func)
+{
+       return false;
+}
+#endif /* CONFIG_IMA_APPRAISE && CONFIG_INTEGRITY_TRUSTED_KEYRING */
 #endif /* _LINUX_IMA_H */
index 8554761..aaa8a07 100644 (file)
                                                                              \
 /* Callbacks for augmented rbtree insert and remove */                       \
                                                                              \
-static inline ITTYPE ITPREFIX ## _compute_subtree_last(ITSTRUCT *node)       \
-{                                                                            \
-       ITTYPE max = ITLAST(node), subtree_last;                              \
-       if (node->ITRB.rb_left) {                                             \
-               subtree_last = rb_entry(node->ITRB.rb_left,                   \
-                                       ITSTRUCT, ITRB)->ITSUBTREE;           \
-               if (max < subtree_last)                                       \
-                       max = subtree_last;                                   \
-       }                                                                     \
-       if (node->ITRB.rb_right) {                                            \
-               subtree_last = rb_entry(node->ITRB.rb_right,                  \
-                                       ITSTRUCT, ITRB)->ITSUBTREE;           \
-               if (max < subtree_last)                                       \
-                       max = subtree_last;                                   \
-       }                                                                     \
-       return max;                                                           \
-}                                                                            \
-                                                                             \
-RB_DECLARE_CALLBACKS(static, ITPREFIX ## _augment, ITSTRUCT, ITRB,           \
-                    ITTYPE, ITSUBTREE, ITPREFIX ## _compute_subtree_last)    \
+RB_DECLARE_CALLBACKS_MAX(static, ITPREFIX ## _augment,                       \
+                        ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, ITLAST)           \
                                                                              \
 /* Insert / remove interval nodes from the tree */                           \
                                                                              \
index bc499ce..7aa5d61 100644 (file)
@@ -188,10 +188,14 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
  */
 #define IOMAP_DIO_UNWRITTEN    (1 << 0)        /* covers unwritten extent(s) */
 #define IOMAP_DIO_COW          (1 << 1)        /* covers COW extent(s) */
-typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret,
-               unsigned flags);
+
+struct iomap_dio_ops {
+       int (*end_io)(struct kiocb *iocb, ssize_t size, int error,
+                     unsigned flags);
+};
+
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
-               const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
+               const struct iomap_ops *ops, const struct iomap_dio_ops *dops);
 int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
 
 #ifdef CONFIG_SWAP
index df03825..603fbc4 100644 (file)
@@ -1410,8 +1410,6 @@ extern int           jbd2_journal_clear_err  (journal_t *);
 extern int        jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
 extern int        jbd2_journal_force_commit(journal_t *);
 extern int        jbd2_journal_force_commit_nested(journal_t *);
-extern int        jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode);
-extern int        jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode);
 extern int        jbd2_journal_inode_ranged_write(handle_t *handle,
                        struct jbd2_inode *inode, loff_t start_byte,
                        loff_t length);
index f0b8092..1776eb2 100644 (file)
@@ -125,7 +125,7 @@ typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
                             unsigned long cmdline_len);
 typedef int (kexec_cleanup_t)(void *loader_data);
 
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
 typedef int (kexec_verify_sig_t)(const char *kernel_buf,
                                 unsigned long kernel_len);
 #endif
@@ -134,7 +134,7 @@ struct kexec_file_ops {
        kexec_probe_t *probe;
        kexec_load_t *load;
        kexec_cleanup_t *cleanup;
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
        kexec_verify_sig_t *verify_sig;
 #endif
 };
@@ -183,6 +183,8 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
                                   bool get_value);
 void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name);
 
+int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+                                        unsigned long buf_len);
 void * __weak arch_kexec_kernel_image_load(struct kimage *image);
 int __weak arch_kexec_apply_relocations_add(struct purgatory_info *pi,
                                            Elf_Shdr *section,
index fbf144a..b072aeb 100644 (file)
@@ -326,8 +326,10 @@ extern atomic_t                    kgdb_active;
        (raw_smp_processor_id() == atomic_read(&kgdb_active))
 extern bool dbg_is_early;
 extern void __init dbg_late_init(void);
+extern void kgdb_panic(const char *msg);
 #else /* ! CONFIG_KGDB */
 #define in_dbg_master() (0)
 #define dbg_late_init()
+static inline void kgdb_panic(const char *msg) {}
 #endif /* ! CONFIG_KGDB */
 #endif /* _KGDB_H_ */
index 082d1d2..bc45ea1 100644 (file)
@@ -15,6 +15,14 @@ extern int __khugepaged_enter(struct mm_struct *mm);
 extern void __khugepaged_exit(struct mm_struct *mm);
 extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
                                      unsigned long vm_flags);
+#ifdef CONFIG_SHMEM
+extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr);
+#else
+static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
+                                          unsigned long addr)
+{
+}
+#endif
 
 #define khugepaged_enabled()                                          \
        (transparent_hugepage_flags &                                  \
@@ -73,6 +81,10 @@ static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 {
        return 0;
 }
+static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
+                                          unsigned long addr)
+{
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_KHUGEPAGED_H */
index fcb46b3..719fc3e 100644 (file)
@@ -1090,6 +1090,7 @@ enum kvm_stat_kind {
 
 struct kvm_stat_data {
        int offset;
+       int mode;
        struct kvm *kvm;
 };
 
@@ -1097,6 +1098,7 @@ struct kvm_stats_debugfs_item {
        const char *name;
        int offset;
        enum kvm_stat_kind kind;
+       int mode;
 };
 extern struct kvm_stats_debugfs_item debugfs_entries[];
 extern struct dentry *kvm_debugfs_dir;
index b8df711..efb309d 100644 (file)
@@ -247,7 +247,7 @@ extern void led_set_brightness(struct led_classdev *led_cdev,
 /**
  * led_set_brightness_sync - set LED brightness synchronously
  * @led_cdev: the LED to set
- * @brightness: the brightness to set it to
+ * @value: the brightness to set it to
  *
  * Set an LED's brightness immediately. This function will block
  * the caller for the time required for accessing device registers,
@@ -301,8 +301,7 @@ extern void led_sysfs_enable(struct led_classdev *led_cdev);
 /**
  * led_compose_name - compose LED class device name
  * @dev: LED controller device object
- * @child: child fwnode_handle describing a LED or a group of synchronized LEDs;
- *        it must be provided only for fwnode based LEDs
+ * @init_data: the LED class device initialization data
  * @led_classdev_name: composed LED class device name
  *
  * Create LED class device name basing on the provided init_data argument.
index 3fced58..a376324 100644 (file)
  * @bpf_prog_free_security:
  *     Clean up the security information stored inside bpf prog.
  *
+ * @locked_down
+ *     Determine whether a kernel feature that potentially enables arbitrary
+ *     code execution in kernel space should be permitted.
+ *
+ *     @what: kernel feature being accessed
  */
 union security_list_options {
        int (*binder_set_context_mgr)(struct task_struct *mgr);
@@ -1812,6 +1817,7 @@ union security_list_options {
        int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux);
        void (*bpf_prog_free_security)(struct bpf_prog_aux *aux);
 #endif /* CONFIG_BPF_SYSCALL */
+       int (*locked_down)(enum lockdown_reason what);
 };
 
 struct security_hook_heads {
@@ -2053,6 +2059,7 @@ struct security_hook_heads {
        struct hlist_head bpf_prog_alloc_security;
        struct hlist_head bpf_prog_free_security;
 #endif /* CONFIG_BPF_SYSCALL */
+       struct hlist_head locked_down;
 } __randomize_layout;
 
 /*
@@ -2111,12 +2118,18 @@ struct lsm_info {
 };
 
 extern struct lsm_info __start_lsm_info[], __end_lsm_info[];
+extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[];
 
 #define DEFINE_LSM(lsm)                                                        \
        static struct lsm_info __lsm_##lsm                              \
                __used __section(.lsm_info.init)                        \
                __aligned(sizeof(unsigned long))
 
+#define DEFINE_EARLY_LSM(lsm)                                          \
+       static struct lsm_info __early_lsm_##lsm                        \
+               __used __section(.early_lsm_info.init)                  \
+               __aligned(sizeof(unsigned long))
+
 #ifdef CONFIG_SECURITY_SELINUX_DISABLE
 /*
  * Assuring the safety of deleting a security module is up to
index ad8f1a3..ae703ea 100644 (file)
@@ -128,9 +128,8 @@ struct mem_cgroup_per_node {
 
        struct mem_cgroup_reclaim_iter  iter[DEF_PRIORITY + 1];
 
-#ifdef CONFIG_MEMCG_KMEM
        struct memcg_shrinker_map __rcu *shrinker_map;
-#endif
+
        struct rb_node          tree_node;      /* RB tree node */
        unsigned long           usage_in_excess;/* Set to the value by which */
                                                /* the soft limit is exceeded*/
@@ -331,6 +330,10 @@ struct mem_cgroup {
        struct list_head event_list;
        spinlock_t event_list_lock;
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       struct deferred_split deferred_split_queue;
+#endif
+
        struct mem_cgroup_per_node *nodeinfo[0];
        /* WARNING: nodeinfo must be the last member here */
 };
@@ -353,6 +356,19 @@ static inline bool mem_cgroup_disabled(void)
        return !cgroup_subsys_enabled(memory_cgrp_subsys);
 }
 
+static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
+                                                 bool in_low_reclaim)
+{
+       if (mem_cgroup_disabled())
+               return 0;
+
+       if (in_low_reclaim)
+               return READ_ONCE(memcg->memory.emin);
+
+       return max(READ_ONCE(memcg->memory.emin),
+                  READ_ONCE(memcg->memory.elow));
+}
+
 enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
                                                struct mem_cgroup *memcg);
 
@@ -534,6 +550,8 @@ void mem_cgroup_handle_over_high(void);
 
 unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
 
+unsigned long mem_cgroup_size(struct mem_cgroup *memcg);
+
 void mem_cgroup_print_oom_context(struct mem_cgroup *memcg,
                                struct task_struct *p);
 
@@ -826,6 +844,12 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
 {
 }
 
+static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
+                                                 bool in_low_reclaim)
+{
+       return 0;
+}
+
 static inline enum mem_cgroup_protection mem_cgroup_protected(
        struct mem_cgroup *root, struct mem_cgroup *memcg)
 {
@@ -965,6 +989,11 @@ static inline unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg)
        return 0;
 }
 
+static inline unsigned long mem_cgroup_size(struct mem_cgroup *memcg)
+{
+       return 0;
+}
+
 static inline void
 mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct *p)
 {
@@ -1261,6 +1290,9 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct page *page,
 static inline void mem_cgroup_track_foreign_dirty(struct page *page,
                                                  struct bdi_writeback *wb)
 {
+       if (mem_cgroup_disabled())
+               return;
+
        if (unlikely(&page->mem_cgroup->css != wb->memcg_css))
                mem_cgroup_track_foreign_dirty_slowpath(page, wb);
 }
@@ -1311,6 +1343,11 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
        } while ((memcg = parent_mem_cgroup(memcg)));
        return false;
 }
+
+extern int memcg_expand_shrinker_maps(int new_id);
+
+extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
+                                  int nid, int shrinker_id);
 #else
 #define mem_cgroup_sockets_enabled 0
 static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
@@ -1319,6 +1356,11 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
        return false;
 }
+
+static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
+                                         int nid, int shrinker_id)
+{
+}
 #endif
 
 struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
@@ -1390,10 +1432,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
        return memcg ? memcg->kmemcg_id : -1;
 }
 
-extern int memcg_expand_shrinker_maps(int new_id);
-
-extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
-                                  int nid, int shrinker_id);
 #else
 
 static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
@@ -1435,8 +1473,6 @@ static inline void memcg_put_cache_ids(void)
 {
 }
 
-static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
-                                         int nid, int shrinker_id) { }
 #endif /* CONFIG_MEMCG_KMEM */
 
 #endif /* _LINUX_MEMCONTROL_H */
index 02e633f..0ebb105 100644 (file)
@@ -25,7 +25,6 @@
 
 struct memory_block {
        unsigned long start_section_nr;
-       unsigned long end_section_nr;
        unsigned long state;            /* serialized by the dev->lock */
        int section_count;              /* serialized by mem_sysfs_mutex */
        int online_type;                /* for passing data to online routine */
@@ -80,9 +79,9 @@ struct mem_section;
 #define IPC_CALLBACK_PRI        10
 
 #ifndef CONFIG_MEMORY_HOTPLUG_SPARSE
-static inline int memory_dev_init(void)
+static inline void memory_dev_init(void)
 {
-       return 0;
+       return;
 }
 static inline int register_memory_notifier(struct notifier_block *nb)
 {
@@ -113,7 +112,7 @@ extern int register_memory_isolate_notifier(struct notifier_block *nb);
 extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
 int create_memory_block_devices(unsigned long start, unsigned long size);
 void remove_memory_block_devices(unsigned long start, unsigned long size);
-extern int memory_dev_init(void);
+extern void memory_dev_init(void);
 extern int memory_notify(unsigned long val, void *v);
 extern int memory_isolate_notify(unsigned long val, void *v);
 extern struct memory_block *find_memory_block(struct mem_section *);
index bac395f..5228c62 100644 (file)
@@ -139,8 +139,6 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 struct mempolicy *get_task_policy(struct task_struct *p);
 struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
                unsigned long addr);
-struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
-                                               unsigned long addr);
 bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
index fb2a0bd..6fefb09 100644 (file)
@@ -17,6 +17,7 @@ struct device;
  */
 struct vmem_altmap {
        const unsigned long base_pfn;
+       const unsigned long end_pfn;
        const unsigned long reserve;
        unsigned long free;
        unsigned long align;
@@ -111,7 +112,6 @@ struct dev_pagemap {
        struct completion done;
        enum memory_type type;
        unsigned int flags;
-       u64 pci_p2pdma_bus_offset;
        const struct dev_pagemap_ops *ops;
 };
 
diff --git a/include/linux/mfd/da9063/pdata.h b/include/linux/mfd/da9063/pdata.h
deleted file mode 100644 (file)
index 085edbf..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Platform configuration options for DA9063
- *
- * Copyright 2012 Dialog Semiconductor Ltd.
- *
- * Author: Michal Hajduk, Dialog Semiconductor
- * Author: Krystian Garbaciak, Dialog Semiconductor
- */
-
-#ifndef __MFD_DA9063_PDATA_H__
-#define __MFD_DA9063_PDATA_H__
-
-/*
- * RGB LED configuration
- */
-/* LED IDs for flags in struct led_info. */
-enum {
-       DA9063_GPIO11_LED,
-       DA9063_GPIO14_LED,
-       DA9063_GPIO15_LED,
-
-       DA9063_LED_NUM
-};
-#define DA9063_LED_ID_MASK             0x3
-
-/* LED polarity for flags in struct led_info. */
-#define DA9063_LED_HIGH_LEVEL_ACTIVE   0x0
-#define DA9063_LED_LOW_LEVEL_ACTIVE    0x4
-
-
-/*
- * General PMIC configuration
- */
-/* HWMON ADC channels configuration */
-#define DA9063_FLG_FORCE_IN0_MANUAL_MODE       0x0010
-#define DA9063_FLG_FORCE_IN0_AUTO_MODE         0x0020
-#define DA9063_FLG_FORCE_IN1_MANUAL_MODE       0x0040
-#define DA9063_FLG_FORCE_IN1_AUTO_MODE         0x0080
-#define DA9063_FLG_FORCE_IN2_MANUAL_MODE       0x0100
-#define DA9063_FLG_FORCE_IN2_AUTO_MODE         0x0200
-#define DA9063_FLG_FORCE_IN3_MANUAL_MODE       0x0400
-#define DA9063_FLG_FORCE_IN3_AUTO_MODE         0x0800
-
-/* Disable register caching. */
-#define DA9063_FLG_NO_CACHE                    0x0008
-
-struct da9063;
-
-/* DA9063 platform data */
-struct da9063_pdata {
-       int                             (*init)(struct da9063 *da9063);
-       int                             irq_base;
-       bool                            key_power;
-       unsigned                        flags;
-       struct da9063_regulators_pdata  *regulators_pdata;
-       struct led_platform_data        *leds_pdata;
-};
-
-#endif /* __MFD_DA9063_PDATA_H__ */
diff --git a/include/linux/mfd/intel_soc_pmic_mrfld.h b/include/linux/mfd/intel_soc_pmic_mrfld.h
new file mode 100644 (file)
index 0000000..4daecd6
--- /dev/null
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Header file for Intel Merrifield Basin Cove PMIC
+ *
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ */
+
+#ifndef __INTEL_SOC_PMIC_MRFLD_H__
+#define __INTEL_SOC_PMIC_MRFLD_H__
+
+#include <linux/bits.h>
+
+#define BCOVE_ID               0x00
+
+#define BCOVE_ID_MINREV0       GENMASK(2, 0)
+#define BCOVE_ID_MAJREV0       GENMASK(5, 3)
+#define BCOVE_ID_VENDID0       GENMASK(7, 6)
+
+#define BCOVE_MINOR(x)         (unsigned int)(((x) & BCOVE_ID_MINREV0) >> 0)
+#define BCOVE_MAJOR(x)         (unsigned int)(((x) & BCOVE_ID_MAJREV0) >> 3)
+#define BCOVE_VENDOR(x)                (unsigned int)(((x) & BCOVE_ID_VENDID0) >> 6)
+
+#define BCOVE_IRQLVL1          0x01
+
+#define BCOVE_PBIRQ            0x02
+#define BCOVE_TMUIRQ           0x03
+#define BCOVE_THRMIRQ          0x04
+#define BCOVE_BCUIRQ           0x05
+#define BCOVE_ADCIRQ           0x06
+#define BCOVE_CHGRIRQ0         0x07
+#define BCOVE_CHGRIRQ1         0x08
+#define BCOVE_GPIOIRQ          0x09
+#define BCOVE_CRITIRQ          0x0B
+
+#define BCOVE_MIRQLVL1         0x0C
+
+#define BCOVE_MPBIRQ           0x0D
+#define BCOVE_MTMUIRQ          0x0E
+#define BCOVE_MTHRMIRQ         0x0F
+#define BCOVE_MBCUIRQ          0x10
+#define BCOVE_MADCIRQ          0x11
+#define BCOVE_MCHGRIRQ0                0x12
+#define BCOVE_MCHGRIRQ1                0x13
+#define BCOVE_MGPIOIRQ         0x14
+#define BCOVE_MCRITIRQ         0x16
+
+#define BCOVE_SCHGRIRQ0                0x4E
+#define BCOVE_SCHGRIRQ1                0x4F
+
+/* Level 1 IRQs */
+#define BCOVE_LVL1_PWRBTN      BIT(0)  /* power button */
+#define BCOVE_LVL1_TMU         BIT(1)  /* time management unit */
+#define BCOVE_LVL1_THRM                BIT(2)  /* thermal */
+#define BCOVE_LVL1_BCU         BIT(3)  /* burst control unit */
+#define BCOVE_LVL1_ADC         BIT(4)  /* ADC */
+#define BCOVE_LVL1_CHGR                BIT(5)  /* charger */
+#define BCOVE_LVL1_GPIO                BIT(6)  /* GPIO */
+#define BCOVE_LVL1_CRIT                BIT(7)  /* critical event */
+
+/* Level 2 IRQs: power button */
+#define BCOVE_PBIRQ_PBTN       BIT(0)
+#define BCOVE_PBIRQ_UBTN       BIT(1)
+
+/* Level 2 IRQs: ADC */
+#define BCOVE_ADCIRQ_BATTEMP   BIT(2)
+#define BCOVE_ADCIRQ_SYSTEMP   BIT(3)
+#define BCOVE_ADCIRQ_BATTID    BIT(4)
+#define BCOVE_ADCIRQ_VIBATT    BIT(5)
+#define BCOVE_ADCIRQ_CCTICK    BIT(7)
+
+/* Level 2 IRQs: charger */
+#define BCOVE_CHGRIRQ_BAT0ALRT BIT(4)
+#define BCOVE_CHGRIRQ_BAT1ALRT BIT(5)
+#define BCOVE_CHGRIRQ_BATCRIT  BIT(6)
+
+#define BCOVE_CHGRIRQ_VBUSDET  BIT(0)
+#define BCOVE_CHGRIRQ_DCDET    BIT(1)
+#define BCOVE_CHGRIRQ_BATTDET  BIT(2)
+#define BCOVE_CHGRIRQ_USBIDDET BIT(3)
+
+#endif /* __INTEL_SOC_PMIC_MRFLD_H__ */
index 25a95e7..fc88d31 100644 (file)
@@ -7,6 +7,14 @@
 #ifndef __MFD_MT6397_CORE_H__
 #define __MFD_MT6397_CORE_H__
 
+#include <linux/mutex.h>
+
+enum chip_id {
+       MT6323_CHIP_ID = 0x23,
+       MT6391_CHIP_ID = 0x91,
+       MT6397_CHIP_ID = 0x97,
+};
+
 enum mt6397_irq_numbers {
        MT6397_IRQ_SPKL_AB = 0,
        MT6397_IRQ_SPKR_AB,
@@ -54,6 +62,9 @@ struct mt6397_chip {
        u16 irq_masks_cache[2];
        u16 int_con[2];
        u16 int_status[2];
+       u16 chip_id;
 };
 
+int mt6397_irq_init(struct mt6397_chip *chip);
+
 #endif /* __MFD_MT6397_CORE_H__ */
index ad24554..75f880c 100644 (file)
@@ -31,7 +31,7 @@
 #define PHY_ID_KSZ886X         0x00221430
 #define PHY_ID_KSZ8863         0x00221435
 
-#define PHY_ID_KSZ8795         0x00221550
+#define PHY_ID_KSZ87XX         0x00221550
 
 #define        PHY_ID_KSZ9477          0x00221631
 
index 5cd824c..4ce8901 100644 (file)
@@ -455,6 +455,15 @@ static inline void mii_lpa_mod_linkmode_lpa_t(unsigned long *lp_advertising,
                         lp_advertising, lpa & LPA_LPACK);
 }
 
+static inline void mii_ctrl1000_mod_linkmode_adv_t(unsigned long *advertising,
+                                                  u32 ctrl1000)
+{
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, advertising,
+                        ctrl1000 & ADVERTISE_1000HALF);
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, advertising,
+                        ctrl1000 & ADVERTISE_1000FULL);
+}
+
 /**
  * linkmode_adv_to_lcl_adv_t
  * @advertising:pointer to linkmode advertising
index a487b68..0836fe2 100644 (file)
@@ -282,7 +282,6 @@ enum {
        MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
        MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
        MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT   = 0x942,
-       MLX5_CMD_OP_SYNC_STEERING                 = 0xb00,
        MLX5_CMD_OP_FPGA_CREATE_QP                = 0x960,
        MLX5_CMD_OP_FPGA_MODIFY_QP                = 0x961,
        MLX5_CMD_OP_FPGA_QUERY_QP                 = 0x962,
@@ -296,6 +295,7 @@ enum {
        MLX5_CMD_OP_DESTROY_UCTX                  = 0xa06,
        MLX5_CMD_OP_CREATE_UMEM                   = 0xa08,
        MLX5_CMD_OP_DESTROY_UMEM                  = 0xa0a,
+       MLX5_CMD_OP_SYNC_STEERING                 = 0xb00,
        MLX5_CMD_OP_MAX
 };
 
@@ -487,7 +487,7 @@ union mlx5_ifc_gre_key_bits {
 
 struct mlx5_ifc_fte_match_set_misc_bits {
        u8         gre_c_present[0x1];
-       u8         reserved_auto1[0x1];
+       u8         reserved_at_1[0x1];
        u8         gre_k_present[0x1];
        u8         gre_s_present[0x1];
        u8         source_vhca_port[0x4];
@@ -1545,9 +1545,8 @@ struct mlx5_ifc_extended_dest_format_bits {
 };
 
 union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits {
-       struct mlx5_ifc_dest_format_struct_bits dest_format_struct;
+       struct mlx5_ifc_extended_dest_format_bits extended_dest_format;
        struct mlx5_ifc_flow_counter_list_bits flow_counter_list;
-       u8         reserved_at_0[0x40];
 };
 
 struct mlx5_ifc_fte_match_param_bits {
@@ -5054,50 +5053,50 @@ struct mlx5_ifc_query_hca_cap_in_bits {
 
 struct mlx5_ifc_other_hca_cap_bits {
        u8         roce[0x1];
-       u8         reserved_0[0x27f];
+       u8         reserved_at_1[0x27f];
 };
 
 struct mlx5_ifc_query_other_hca_cap_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct     mlx5_ifc_other_hca_cap_bits other_capability;
 };
 
 struct mlx5_ifc_query_other_hca_cap_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x10];
+       u8         reserved_at_40[0x10];
        u8         function_id[0x10];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_modify_other_hca_cap_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_other_hca_cap_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x10];
+       u8         reserved_at_40[0x10];
        u8         function_id[0x10];
        u8         field_select[0x20];
 
index 7cf955f..a2adf95 100644 (file)
@@ -695,11 +695,6 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
 
 extern void kvfree(const void *addr);
 
-static inline atomic_t *compound_mapcount_ptr(struct page *page)
-{
-       return &page[1].compound_mapcount;
-}
-
 static inline int compound_mapcount(struct page *page)
 {
        VM_BUG_ON_PAGE(!PageCompound(page), page);
@@ -805,6 +800,24 @@ static inline void set_compound_order(struct page *page, unsigned int order)
        page[1].compound_order = order;
 }
 
+/* Returns the number of pages in this potentially compound page. */
+static inline unsigned long compound_nr(struct page *page)
+{
+       return 1UL << compound_order(page);
+}
+
+/* Returns the number of bytes in this potentially compound page. */
+static inline unsigned long page_size(struct page *page)
+{
+       return PAGE_SIZE << compound_order(page);
+}
+
+/* Returns the number of bits needed for the number of bytes in a page */
+static inline unsigned int page_shift(struct page *page)
+{
+       return PAGE_SHIFT + compound_order(page);
+}
+
 void free_compound_page(struct page *page);
 
 #ifdef CONFIG_MMU
@@ -1057,8 +1070,9 @@ static inline void put_user_page(struct page *page)
        put_page(page);
 }
 
-void put_user_pages_dirty(struct page **pages, unsigned long npages);
-void put_user_pages_dirty_lock(struct page **pages, unsigned long npages);
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+                              bool make_dirty);
+
 void put_user_pages(struct page **pages, unsigned long npages);
 
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
@@ -1405,7 +1419,11 @@ extern void pagefault_out_of_memory(void);
 
 extern void show_free_areas(unsigned int flags, nodemask_t *nodemask);
 
+#ifdef CONFIG_MMU
 extern bool can_do_mlock(void);
+#else
+static inline bool can_do_mlock(void) { return false; }
+#endif
 extern int user_shm_lock(size_t, struct user_struct *);
 extern void user_shm_unlock(size_t, struct user_struct *);
 
@@ -1926,7 +1944,7 @@ static inline void pgtable_init(void)
        pgtable_cache_init();
 }
 
-static inline bool pgtable_page_ctor(struct page *page)
+static inline bool pgtable_pte_page_ctor(struct page *page)
 {
        if (!ptlock_init(page))
                return false;
@@ -1935,7 +1953,7 @@ static inline bool pgtable_page_ctor(struct page *page)
        return true;
 }
 
-static inline void pgtable_page_dtor(struct page *page)
+static inline void pgtable_pte_page_dtor(struct page *page)
 {
        ptlock_free(page);
        __ClearPageTable(page);
@@ -2305,6 +2323,8 @@ extern int install_special_mapping(struct mm_struct *mm,
                                   unsigned long addr, unsigned long len,
                                   unsigned long flags, struct page **pages);
 
+unsigned long randomize_stack_top(unsigned long stack_top);
+
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
@@ -2568,6 +2588,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 #define FOLL_COW       0x4000  /* internal GUP flag */
 #define FOLL_ANON      0x8000  /* don't do file mappings */
 #define FOLL_LONGTERM  0x10000 /* mapping lifetime is indefinite: see below */
+#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
 
 /*
  * NOTE on FOLL_LONGTERM:
@@ -2845,5 +2866,12 @@ void __init setup_nr_node_ids(void);
 static inline void setup_nr_node_ids(void) {}
 #endif
 
+extern int memcmp_pages(struct page *page1, struct page *page2);
+
+static inline int pages_identical(struct page *page1, struct page *page2)
+{
+       return !memcmp_pages(page1, page2);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
index 0b739f3..270aa8f 100644 (file)
@@ -138,6 +138,7 @@ struct page {
                struct {        /* Second tail page of compound page */
                        unsigned long _compound_pad_1;  /* compound_head */
                        unsigned long _compound_pad_2;
+                       /* For both global and memcg */
                        struct list_head deferred_list;
                };
                struct {        /* Page table pages */
@@ -220,6 +221,11 @@ struct page {
 #endif
 } _struct_page_alignment;
 
+static inline atomic_t *compound_mapcount_ptr(struct page *page)
+{
+       return &page[1].compound_mapcount;
+}
+
 /*
  * Used for sizing the vmemmap region on some architectures
  */
@@ -382,6 +388,16 @@ struct mm_struct {
                unsigned long highest_vm_end;   /* highest vma end address */
                pgd_t * pgd;
 
+#ifdef CONFIG_MEMBARRIER
+               /**
+                * @membarrier_state: Flags controlling membarrier behavior.
+                *
+                * This field is close to @pgd to hopefully fit in the same
+                * cache-line, which needs to be touched by switch_mm().
+                */
+               atomic_t membarrier_state;
+#endif
+
                /**
                 * @mm_users: The number of users including userspace.
                 *
@@ -451,9 +467,7 @@ struct mm_struct {
                unsigned long flags; /* Must use atomic bitops to access */
 
                struct core_state *core_state; /* coredumping support */
-#ifdef CONFIG_MEMBARRIER
-               atomic_t membarrier_state;
-#endif
+
 #ifdef CONFIG_AIO
                spinlock_t                      ioctx_lock;
                struct kioctx_table __rcu       *ioctx_table;
index d7016dc..c1bc673 100644 (file)
@@ -36,6 +36,10 @@ struct vmacache {
        struct vm_area_struct *vmas[VMACACHE_SIZE];
 };
 
+/*
+ * When updating this, please also update struct resident_page_types[] in
+ * kernel/fork.c
+ */
 enum {
        MM_FILEPAGES,   /* Resident file mapping pages */
        MM_ANONPAGES,   /* Resident anonymous pages */
index 3f38c30..bda2028 100644 (file)
@@ -235,6 +235,8 @@ enum node_stat_item {
        NR_SHMEM,               /* shmem pages (included tmpfs/GEM pages) */
        NR_SHMEM_THPS,
        NR_SHMEM_PMDMAPPED,
+       NR_FILE_THPS,
+       NR_FILE_PMDMAPPED,
        NR_ANON_THPS,
        NR_UNSTABLE_NFS,        /* NFS unstable pages */
        NR_VMSCAN_WRITE,
@@ -677,6 +679,14 @@ struct zonelist {
 extern struct page *mem_map;
 #endif
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+struct deferred_split {
+       spinlock_t split_queue_lock;
+       struct list_head split_queue;
+       unsigned long split_queue_len;
+};
+#endif
+
 /*
  * On NUMA machines, each NUMA node would have a pg_data_t to describe
  * it's memory layout. On UMA machines there is a single pglist_data which
@@ -756,9 +766,7 @@ typedef struct pglist_data {
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       spinlock_t split_queue_lock;
-       struct list_head split_queue;
-       unsigned long split_queue_len;
+       struct deferred_split deferred_split_queue;
 #endif
 
        /* Fields commonly accessed by the page reclaim scanner */
index b1a6735..6d20895 100644 (file)
@@ -26,9 +26,6 @@
 #include <linux/percpu.h>
 #include <asm/module.h>
 
-/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
-#define MODULE_SIG_STRING "~Module signature appended~\n"
-
 /* Not Yet Implemented */
 #define MODULE_SUPPORTED_DEVICE(name)
 
diff --git a/include/linux/module_signature.h b/include/linux/module_signature.h
new file mode 100644 (file)
index 0000000..7eb4b00
--- /dev/null
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Module signature handling.
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef _LINUX_MODULE_SIGNATURE_H
+#define _LINUX_MODULE_SIGNATURE_H
+
+#include <linux/types.h>
+
+/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
+#define MODULE_SIG_STRING "~Module signature appended~\n"
+
+enum pkey_id_type {
+       PKEY_ID_PGP,            /* OpenPGP generated key ID */
+       PKEY_ID_X509,           /* X.509 arbitrary subjectKeyIdentifier */
+       PKEY_ID_PKCS7,          /* Signature in PKCS#7 message */
+};
+
+/*
+ * Module signature information block.
+ *
+ * The constituents of the signature section are, in order:
+ *
+ *     - Signer's name
+ *     - Key identifier
+ *     - Signature data
+ *     - Information block
+ */
+struct module_signature {
+       u8      algo;           /* Public-key crypto algorithm [0] */
+       u8      hash;           /* Digest algorithm [0] */
+       u8      id_type;        /* Key identifier type [PKEY_ID_PKCS7] */
+       u8      signer_len;     /* Length of signer's name [0] */
+       u8      key_id_len;     /* Length of key identifier [0] */
+       u8      __pad[3];
+       __be32  sig_len;        /* Length of signature data */
+};
+
+int mod_check_sig(const struct module_signature *ms, size_t file_len,
+                 const char *name);
+
+#endif /* _LINUX_MODULE_SIGNATURE_H */
index 9eda1c3..c20f190 100644 (file)
@@ -925,6 +925,7 @@ struct dev_ifalias {
 struct devlink;
 struct tlsdev_ops;
 
+
 /*
  * This structure defines the management hooks for network devices.
  * The following hooks can be defined; unless noted otherwise, they are
@@ -1421,7 +1422,6 @@ struct net_device_ops {
        void                    (*ndo_dfwd_del_station)(struct net_device *pdev,
                                                        void *priv);
 
-       int                     (*ndo_get_lock_subclass)(struct net_device *dev);
        int                     (*ndo_set_tx_maxrate)(struct net_device *dev,
                                                      int queue_index,
                                                      u32 maxrate);
@@ -1649,6 +1649,8 @@ enum netdev_priv_flags {
  *     @perm_addr:             Permanent hw address
  *     @addr_assign_type:      Hw address assignment type
  *     @addr_len:              Hardware address length
+ *     @upper_level:           Maximum depth level of upper devices.
+ *     @lower_level:           Maximum depth level of lower devices.
  *     @neigh_priv_len:        Used in neigh_alloc()
  *     @dev_id:                Used to differentiate devices that share
  *                             the same link layer address
@@ -1758,9 +1760,13 @@ enum netdev_priv_flags {
  *     @phydev:        Physical device may attach itself
  *                     for hardware timestamping
  *     @sfp_bus:       attached &struct sfp_bus structure.
- *
- *     @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
- *     @qdisc_running_key: lockdep class annotating Qdisc->running seqcount
+ *     @qdisc_tx_busylock_key: lockdep class annotating Qdisc->busylock
+                               spinlock
+ *     @qdisc_running_key:     lockdep class annotating Qdisc->running seqcount
+ *     @qdisc_xmit_lock_key:   lockdep class annotating
+ *                             netdev_queue->_xmit_lock spinlock
+ *     @addr_list_lock_key:    lockdep class annotating
+ *                             net_device->addr_list_lock spinlock
  *
  *     @proto_down:    protocol port state information can be sent to the
  *                     switch driver and used to set the phys state of the
@@ -1875,6 +1881,8 @@ struct net_device {
        unsigned char           perm_addr[MAX_ADDR_LEN];
        unsigned char           addr_assign_type;
        unsigned char           addr_len;
+       unsigned char           upper_level;
+       unsigned char           lower_level;
        unsigned short          neigh_priv_len;
        unsigned short          dev_id;
        unsigned short          dev_port;
@@ -2045,8 +2053,10 @@ struct net_device {
 #endif
        struct phy_device       *phydev;
        struct sfp_bus          *sfp_bus;
-       struct lock_class_key   *qdisc_tx_busylock;
-       struct lock_class_key   *qdisc_running_key;
+       struct lock_class_key   qdisc_tx_busylock_key;
+       struct lock_class_key   qdisc_running_key;
+       struct lock_class_key   qdisc_xmit_lock_key;
+       struct lock_class_key   addr_list_lock_key;
        bool                    proto_down;
        unsigned                wol_enabled:1;
 };
@@ -2124,23 +2134,6 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
                f(dev, &dev->_tx[i], arg);
 }
 
-#define netdev_lockdep_set_classes(dev)                                \
-{                                                              \
-       static struct lock_class_key qdisc_tx_busylock_key;     \
-       static struct lock_class_key qdisc_running_key;         \
-       static struct lock_class_key qdisc_xmit_lock_key;       \
-       static struct lock_class_key dev_addr_list_lock_key;    \
-       unsigned int i;                                         \
-                                                               \
-       (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key;      \
-       (dev)->qdisc_running_key = &qdisc_running_key;          \
-       lockdep_set_class(&(dev)->addr_list_lock,               \
-                         &dev_addr_list_lock_key);             \
-       for (i = 0; i < (dev)->num_tx_queues; i++)              \
-               lockdep_set_class(&(dev)->_tx[i]._xmit_lock,    \
-                                 &qdisc_xmit_lock_key);        \
-}
-
 u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
                     struct net_device *sb_dev);
 struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
@@ -3139,6 +3132,7 @@ static inline void netif_stop_queue(struct net_device *dev)
 }
 
 void netif_tx_stop_all_queues(struct net_device *dev);
+void netdev_update_lockdep_key(struct net_device *dev);
 
 static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
 {
@@ -4056,16 +4050,6 @@ static inline void netif_addr_lock(struct net_device *dev)
        spin_lock(&dev->addr_list_lock);
 }
 
-static inline void netif_addr_lock_nested(struct net_device *dev)
-{
-       int subclass = SINGLE_DEPTH_NESTING;
-
-       if (dev->netdev_ops->ndo_get_lock_subclass)
-               subclass = dev->netdev_ops->ndo_get_lock_subclass(dev);
-
-       spin_lock_nested(&dev->addr_list_lock, subclass);
-}
-
 static inline void netif_addr_lock_bh(struct net_device *dev)
 {
        spin_lock_bh(&dev->addr_list_lock);
@@ -4329,6 +4313,16 @@ int netdev_master_upper_dev_link(struct net_device *dev,
                                 struct netlink_ext_ack *extack);
 void netdev_upper_dev_unlink(struct net_device *dev,
                             struct net_device *upper_dev);
+int netdev_adjacent_change_prepare(struct net_device *old_dev,
+                                  struct net_device *new_dev,
+                                  struct net_device *dev,
+                                  struct netlink_ext_ack *extack);
+void netdev_adjacent_change_commit(struct net_device *old_dev,
+                                  struct net_device *new_dev,
+                                  struct net_device *dev);
+void netdev_adjacent_change_abort(struct net_device *old_dev,
+                                 struct net_device *new_dev,
+                                 struct net_device *dev);
 void netdev_adjacent_rename_links(struct net_device *dev, char *oldname);
 void *netdev_lower_dev_get_private(struct net_device *dev,
                                   struct net_device *lower_dev);
@@ -4340,7 +4334,6 @@ void netdev_lower_state_changed(struct net_device *lower_dev,
 extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
 void netdev_rss_key_fill(void *buffer, size_t len);
 
-int dev_get_nest_level(struct net_device *dev);
 int skb_checksum_help(struct sk_buff *skb);
 int skb_crc32c_csum_help(struct sk_buff *skb);
 int skb_csum_hwoffload_help(struct sk_buff *skb,
index 0a11712..570a60c 100644 (file)
@@ -490,6 +490,9 @@ extern const struct file_operations nfs_dir_operations;
 extern const struct dentry_operations nfs_dentry_operations;
 
 extern void nfs_force_lookup_revalidate(struct inode *dir);
+extern struct dentry *nfs_add_or_obtain(struct dentry *dentry,
+                       struct nfs_fh *fh, struct nfs_fattr *fattr,
+                       struct nfs4_label *label);
 extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh,
                        struct nfs_fattr *fattr, struct nfs4_label *label);
 extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags);
index f91cb88..1bf83c8 100644 (file)
@@ -622,12 +622,28 @@ static inline int PageTransCompound(struct page *page)
  *
  * Unlike PageTransCompound, this is safe to be called only while
  * split_huge_pmd() cannot run from under us, like if protected by the
- * MMU notifier, otherwise it may result in page->_mapcount < 0 false
+ * MMU notifier, otherwise it may result in page->_mapcount check false
  * positives.
+ *
+ * We have to treat page cache THP differently since every subpage of it
+ * would get _mapcount inc'ed once it is PMD mapped.  But, it may be PTE
+ * mapped in the current process so comparing subpage's _mapcount to
+ * compound_mapcount to filter out PTE mapped case.
  */
 static inline int PageTransCompoundMap(struct page *page)
 {
-       return PageTransCompound(page) && atomic_read(&page->_mapcount) < 0;
+       struct page *head;
+
+       if (!PageTransCompound(page))
+               return 0;
+
+       if (PageAnon(page))
+               return atomic_read(&page->_mapcount) < 0;
+
+       head = compound_head(page);
+       /* File THP is PMD mapped and not PTE mapped */
+       return atomic_read(&page->_mapcount) ==
+              atomic_read(compound_mapcount_ptr(head));
 }
 
 /*
index 0959295..cfce186 100644 (file)
@@ -18,6 +18,7 @@ struct page_ext_operations {
 
 enum page_ext_flags {
        PAGE_EXT_OWNER,
+       PAGE_EXT_OWNER_ALLOCATED,
 #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
        PAGE_EXT_YOUNG,
        PAGE_EXT_IDLE,
@@ -35,6 +36,7 @@ struct page_ext {
        unsigned long flags;
 };
 
+extern unsigned long page_ext_size;
 extern void pgdat_page_ext_init(struct pglist_data *pgdat);
 
 #ifdef CONFIG_SPARSEMEM
@@ -51,6 +53,13 @@ static inline void page_ext_init(void)
 
 struct page_ext *lookup_page_ext(const struct page *page);
 
+static inline struct page_ext *page_ext_next(struct page_ext *curr)
+{
+       void *next = curr;
+       next += page_ext_size;
+       return next;
+}
+
 #else /* !CONFIG_PAGE_EXTENSION */
 struct page_ext;
 
index c755245..37a4d9e 100644 (file)
@@ -333,6 +333,16 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
                        mapping_gfp_mask(mapping));
 }
 
+static inline struct page *find_subpage(struct page *page, pgoff_t offset)
+{
+       if (PageHuge(page))
+               return page;
+
+       VM_BUG_ON_PAGE(PageTail(page), page);
+
+       return page + (offset & (compound_nr(page) - 1));
+}
+
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
 struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
 unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
deleted file mode 100644 (file)
index 6706414..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *     aspm.h
- *
- *     PCI Express ASPM defines and function prototypes
- *
- *     Copyright (C) 2007 Intel Corp.
- *             Zhang Yanmin (yanmin.zhang@intel.com)
- *             Shaohua Li (shaohua.li@intel.com)
- *
- *     For more information, please consult the following manuals (look at
- *     http://www.pcisig.com/ for how to get them):
- *
- *     PCI Express Specification
- */
-
-#ifndef LINUX_ASPM_H
-#define LINUX_ASPM_H
-
-#include <linux/pci.h>
-
-#define PCIE_LINK_STATE_L0S    1
-#define PCIE_LINK_STATE_L1     2
-#define PCIE_LINK_STATE_CLKPM  4
-
-#ifdef CONFIG_PCIEASPM
-int pci_disable_link_state(struct pci_dev *pdev, int state);
-int pci_disable_link_state_locked(struct pci_dev *pdev, int state);
-void pcie_no_aspm(void);
-#else
-static inline int pci_disable_link_state(struct pci_dev *pdev, int state)
-{ return 0; }
-static inline void pcie_no_aspm(void) { }
-#endif
-
-#endif /* LINUX_ASPM_H */
index bca9bc3..8318a97 100644 (file)
@@ -30,8 +30,10 @@ struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
                                         unsigned int *nents, u32 length);
 void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl);
 void pci_p2pmem_publish(struct pci_dev *pdev, bool publish);
-int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-                     enum dma_data_direction dir);
+int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
+               int nents, enum dma_data_direction dir, unsigned long attrs);
+void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
+               int nents, enum dma_data_direction dir, unsigned long attrs);
 int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
                            bool *use_p2pdma);
 ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
@@ -81,11 +83,17 @@ static inline void pci_p2pmem_free_sgl(struct pci_dev *pdev,
 static inline void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
 {
 }
-static inline int pci_p2pdma_map_sg(struct device *dev,
-               struct scatterlist *sg, int nents, enum dma_data_direction dir)
+static inline int pci_p2pdma_map_sg_attrs(struct device *dev,
+               struct scatterlist *sg, int nents, enum dma_data_direction dir,
+               unsigned long attrs)
 {
        return 0;
 }
+static inline void pci_p2pdma_unmap_sg_attrs(struct device *dev,
+               struct scatterlist *sg, int nents, enum dma_data_direction dir,
+               unsigned long attrs)
+{
+}
 static inline int pci_p2pdma_enable_store(const char *page,
                struct pci_dev **p2p_dev, bool *use_p2pdma)
 {
@@ -111,4 +119,16 @@ static inline struct pci_dev *pci_p2pmem_find(struct device *client)
        return pci_p2pmem_find_many(&client, 1);
 }
 
+static inline int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg,
+                                   int nents, enum dma_data_direction dir)
+{
+       return pci_p2pdma_map_sg_attrs(dev, sg, nents, dir, 0);
+}
+
+static inline void pci_p2pdma_unmap_sg(struct device *dev,
+               struct scatterlist *sg, int nents, enum dma_data_direction dir)
+{
+       pci_p2pdma_unmap_sg_attrs(dev, sg, nents, dir, 0);
+}
+
 #endif /* _LINUX_PCI_P2P_H */
index 82e4cd1..f9088c8 100644 (file)
@@ -6,12 +6,18 @@
  *     Copyright 1994, Drew Eckhardt
  *     Copyright 1997--1999 Martin Mares <mj@ucw.cz>
  *
+ *     PCI Express ASPM defines and function prototypes
+ *     Copyright (c) 2007 Intel Corp.
+ *             Zhang Yanmin (yanmin.zhang@intel.com)
+ *             Shaohua Li (shaohua.li@intel.com)
+ *
  *     For more information, please consult the following manuals (look at
  *     http://www.pcisig.com/ for how to get them):
  *
  *     PCI BIOS Specification
  *     PCI Local Bus Specification
  *     PCI to PCI Bridge Specification
+ *     PCI Express Specification
  *     PCI System Design Guide
  */
 #ifndef LINUX_PCI_H
@@ -145,11 +151,6 @@ static inline const char *pci_power_name(pci_power_t state)
        return pci_power_names[1 + (__force int) state];
 }
 
-#define PCI_PM_D2_DELAY                200
-#define PCI_PM_D3_WAIT         10
-#define PCI_PM_D3COLD_WAIT     100
-#define PCI_PM_BUS_WAIT                50
-
 /**
  * typedef pci_channel_state_t
  *
@@ -418,7 +419,6 @@ struct pci_dev {
        unsigned int    broken_intx_masking:1;  /* INTx masking can't be used */
        unsigned int    io_window_1k:1;         /* Intel bridge 1K I/O windows */
        unsigned int    irq_managed:1;
-       unsigned int    has_secondary_link:1;
        unsigned int    non_compliant_bars:1;   /* Broken BARs; ignore them */
        unsigned int    is_probed:1;            /* Device probing in progress */
        unsigned int    link_active_reporting:1;/* Device capable of reporting link active */
@@ -649,9 +649,6 @@ static inline struct pci_dev *pci_upstream_bridge(struct pci_dev *dev)
        return dev->bus->self;
 }
 
-struct device *pci_get_host_bridge_device(struct pci_dev *dev);
-void pci_put_host_bridge_device(struct device *dev);
-
 #ifdef CONFIG_PCI_MSI
 static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev)
 {
@@ -925,6 +922,11 @@ enum {
        PCI_SCAN_ALL_PCIE_DEVS  = 0x00000040,   /* Scan all, not just dev 0 */
 };
 
+#define PCI_IRQ_LEGACY         (1 << 0) /* Allow legacy interrupts */
+#define PCI_IRQ_MSI            (1 << 1) /* Allow MSI interrupts */
+#define PCI_IRQ_MSIX           (1 << 2) /* Allow MSI-X interrupts */
+#define PCI_IRQ_AFFINITY       (1 << 3) /* Auto-assign affinity */
+
 /* These external functions are only available when PCI support is enabled */
 #ifdef CONFIG_PCI
 
@@ -969,7 +971,7 @@ resource_size_t pcibios_align_resource(void *, const struct resource *,
                                resource_size_t,
                                resource_size_t);
 
-/* Weak but can be overriden by arch */
+/* Weak but can be overridden by arch */
 void pci_fixup_cardbus(struct pci_bus *);
 
 /* Generic PCI functions used internally */
@@ -995,7 +997,6 @@ struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
 int pci_scan_root_bus_bridge(struct pci_host_bridge *bridge);
 struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
                                int busnr);
-void pcie_update_link_speed(struct pci_bus *bus, u16 link_status);
 struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
                                 const char *name,
                                 struct hotplug_slot *hotplug);
@@ -1241,19 +1242,12 @@ int pci_wake_from_d3(struct pci_dev *dev, bool enable);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
 bool pci_dev_run_wake(struct pci_dev *dev);
-bool pci_check_pme_status(struct pci_dev *dev);
-void pci_pme_wakeup_bus(struct pci_bus *bus);
 void pci_d3cold_enable(struct pci_dev *dev);
 void pci_d3cold_disable(struct pci_dev *dev);
 bool pcie_relaxed_ordering_enabled(struct pci_dev *dev);
 void pci_wakeup_bus(struct pci_bus *bus);
 void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state);
 
-/* PCI Virtual Channel */
-int pci_save_vc_state(struct pci_dev *dev);
-void pci_restore_vc_state(struct pci_dev *dev);
-void pci_allocate_vc_save_buffers(struct pci_dev *dev);
-
 /* For use by arch with custom probe code */
 void set_pcie_port_type(struct pci_dev *pdev);
 void set_pcie_hotplug_bridge(struct pci_dev *pdev);
@@ -1297,8 +1291,6 @@ int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *);
 void pci_release_selected_regions(struct pci_dev *, int);
 
 /* drivers/pci/bus.c */
-struct pci_bus *pci_bus_get(struct pci_bus *bus);
-void pci_bus_put(struct pci_bus *bus);
 void pci_add_resource(struct list_head *resources, struct resource *res);
 void pci_add_resource_offset(struct list_head *resources, struct resource *res,
                             resource_size_t offset);
@@ -1408,11 +1400,6 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus,
 int pci_set_vga_state(struct pci_dev *pdev, bool decode,
                      unsigned int command_bits, u32 flags);
 
-#define PCI_IRQ_LEGACY         (1 << 0) /* Allow legacy interrupts */
-#define PCI_IRQ_MSI            (1 << 1) /* Allow MSI interrupts */
-#define PCI_IRQ_MSIX           (1 << 2) /* Allow MSI-X interrupts */
-#define PCI_IRQ_AFFINITY       (1 << 3) /* Auto-assign affinity */
-
 /*
  * Virtual interrupts allow for more interrupts to be allocated
  * than the device has interrupts for. These are not programmed
@@ -1517,14 +1504,6 @@ static inline int pci_irq_get_node(struct pci_dev *pdev, int vec)
 }
 #endif
 
-static inline int
-pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
-                     unsigned int max_vecs, unsigned int flags)
-{
-       return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, flags,
-                                             NULL);
-}
-
 /**
  * pci_irqd_intx_xlate() - Translate PCI INTx value to an IRQ domain hwirq
  * @d: the INTx IRQ domain
@@ -1565,10 +1544,22 @@ extern bool pcie_ports_native;
 #define pcie_ports_native      false
 #endif
 
+#define PCIE_LINK_STATE_L0S    1
+#define PCIE_LINK_STATE_L1     2
+#define PCIE_LINK_STATE_CLKPM  4
+
 #ifdef CONFIG_PCIEASPM
+int pci_disable_link_state(struct pci_dev *pdev, int state);
+int pci_disable_link_state_locked(struct pci_dev *pdev, int state);
+void pcie_no_aspm(void);
 bool pcie_aspm_support_enabled(void);
 bool pcie_aspm_enabled(struct pci_dev *pdev);
 #else
+static inline int pci_disable_link_state(struct pci_dev *pdev, int state)
+{ return 0; }
+static inline int pci_disable_link_state_locked(struct pci_dev *pdev, int state)
+{ return 0; }
+static inline void pcie_no_aspm(void) { }
 static inline bool pcie_aspm_support_enabled(void) { return false; }
 static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; }
 #endif
@@ -1579,23 +1570,8 @@ bool pci_aer_available(void);
 static inline bool pci_aer_available(void) { return false; }
 #endif
 
-#ifdef CONFIG_PCIE_ECRC
-void pcie_set_ecrc_checking(struct pci_dev *dev);
-void pcie_ecrc_get_policy(char *str);
-#else
-static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
-static inline void pcie_ecrc_get_policy(char *str) { }
-#endif
-
 bool pci_ats_disabled(void);
 
-#ifdef CONFIG_PCIE_PTM
-int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
-#else
-static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
-{ return -EINVAL; }
-#endif
-
 void pci_cfg_access_lock(struct pci_dev *dev);
 bool pci_cfg_access_trylock(struct pci_dev *dev);
 void pci_cfg_access_unlock(struct pci_dev *dev);
@@ -1749,11 +1725,6 @@ static inline void pci_release_regions(struct pci_dev *dev) { }
 
 static inline unsigned long pci_address_to_pio(phys_addr_t addr) { return -1; }
 
-static inline void pci_block_cfg_access(struct pci_dev *dev) { }
-static inline int pci_block_cfg_access_in_atomic(struct pci_dev *dev)
-{ return 0; }
-static inline void pci_unblock_cfg_access(struct pci_dev *dev) { }
-
 static inline struct pci_bus *pci_find_next_bus(const struct pci_bus *from)
 { return NULL; }
 static inline struct pci_dev *pci_get_slot(struct pci_bus *bus,
@@ -1782,17 +1753,36 @@ static inline const struct pci_device_id *pci_match_id(const struct pci_device_i
                                                         struct pci_dev *dev)
 { return NULL; }
 static inline bool pci_ats_disabled(void) { return true; }
+
+static inline int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
+{
+       return -EINVAL;
+}
+
+static inline int
+pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
+                              unsigned int max_vecs, unsigned int flags,
+                              struct irq_affinity *aff_desc)
+{
+       return -ENOSPC;
+}
 #endif /* CONFIG_PCI */
 
+static inline int
+pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
+                     unsigned int max_vecs, unsigned int flags)
+{
+       return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, flags,
+                                             NULL);
+}
+
 #ifdef CONFIG_PCI_ATS
 /* Address Translation Service */
-void pci_ats_init(struct pci_dev *dev);
 int pci_enable_ats(struct pci_dev *dev, int ps);
 void pci_disable_ats(struct pci_dev *dev);
 int pci_ats_queue_depth(struct pci_dev *dev);
 int pci_ats_page_aligned(struct pci_dev *dev);
 #else
-static inline void pci_ats_init(struct pci_dev *d) { }
 static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; }
 static inline void pci_disable_ats(struct pci_dev *d) { }
 static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; }
@@ -1803,7 +1793,7 @@ static inline int pci_ats_page_aligned(struct pci_dev *dev) { return 0; }
 
 #include <asm/pci.h>
 
-/* These two functions provide almost identical functionality. Depennding
+/* These two functions provide almost identical functionality. Depending
  * on the architecture, one will be implemented as a wrapper around the
  * other (in drivers/pci/mmap.c).
  *
@@ -1872,25 +1862,9 @@ static inline const char *pci_name(const struct pci_dev *pdev)
        return dev_name(&pdev->dev);
 }
 
-
-/*
- * Some archs don't want to expose struct resource to userland as-is
- * in sysfs and /proc
- */
-#ifdef HAVE_ARCH_PCI_RESOURCE_TO_USER
 void pci_resource_to_user(const struct pci_dev *dev, int bar,
                          const struct resource *rsrc,
                          resource_size_t *start, resource_size_t *end);
-#else
-static inline void pci_resource_to_user(const struct pci_dev *dev, int bar,
-               const struct resource *rsrc, resource_size_t *start,
-               resource_size_t *end)
-{
-       *start = rsrc->start;
-       *end = rsrc->end;
-}
-#endif /* HAVE_ARCH_PCI_RESOURCE_TO_USER */
-
 
 /*
  * The world is not perfect and supplies us with broken PCI devices.
@@ -2032,10 +2006,6 @@ extern unsigned long pci_cardbus_mem_size;
 extern u8 pci_dfl_cache_line_size;
 extern u8 pci_cache_line_size;
 
-extern unsigned long pci_hotplug_io_size;
-extern unsigned long pci_hotplug_mem_size;
-extern unsigned long pci_hotplug_bus_size;
-
 /* Architecture-specific versions may override these (weak) */
 void pcibios_disable_device(struct pci_dev *dev);
 void pcibios_set_master(struct pci_dev *dev);
@@ -2305,10 +2275,6 @@ int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off,
 #ifdef CONFIG_OF
 struct device_node;
 struct irq_domain;
-void pci_set_of_node(struct pci_dev *dev);
-void pci_release_of_node(struct pci_dev *dev);
-void pci_set_bus_of_node(struct pci_bus *bus);
-void pci_release_bus_of_node(struct pci_bus *bus);
 struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
 int pci_parse_request_of_pci_ranges(struct device *dev,
                                    struct list_head *resources,
@@ -2318,10 +2284,6 @@ int pci_parse_request_of_pci_ranges(struct device *dev,
 struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
 
 #else  /* CONFIG_OF */
-static inline void pci_set_of_node(struct pci_dev *dev) { }
-static inline void pci_release_of_node(struct pci_dev *dev) { }
-static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
-static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
 static inline struct irq_domain *
 pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
 static inline int pci_parse_request_of_pci_ranges(struct device *dev,
@@ -2435,4 +2397,7 @@ void pci_uevent_ers(struct pci_dev *pdev, enum  pci_ers_result err_type);
 #define pci_notice_ratelimited(pdev, fmt, arg...) \
        dev_notice_ratelimited(&(pdev)->dev, fmt, ##arg)
 
+#define pci_info_ratelimited(pdev, fmt, arg...) \
+       dev_info_ratelimited(&(pdev)->dev, fmt, ##arg)
+
 #endif /* LINUX_PCI_H */
index f694eb2..b482e42 100644 (file)
@@ -86,114 +86,14 @@ void pci_hp_deregister(struct hotplug_slot *slot);
 #define pci_hp_initialize(slot, bus, nr, name) \
        __pci_hp_initialize(slot, bus, nr, name, THIS_MODULE, KBUILD_MODNAME)
 
-/* PCI Setting Record (Type 0) */
-struct hpp_type0 {
-       u32 revision;
-       u8  cache_line_size;
-       u8  latency_timer;
-       u8  enable_serr;
-       u8  enable_perr;
-};
-
-/* PCI-X Setting Record (Type 1) */
-struct hpp_type1 {
-       u32 revision;
-       u8  max_mem_read;
-       u8  avg_max_split;
-       u16 tot_max_split;
-};
-
-/* PCI Express Setting Record (Type 2) */
-struct hpp_type2 {
-       u32 revision;
-       u32 unc_err_mask_and;
-       u32 unc_err_mask_or;
-       u32 unc_err_sever_and;
-       u32 unc_err_sever_or;
-       u32 cor_err_mask_and;
-       u32 cor_err_mask_or;
-       u32 adv_err_cap_and;
-       u32 adv_err_cap_or;
-       u16 pci_exp_devctl_and;
-       u16 pci_exp_devctl_or;
-       u16 pci_exp_lnkctl_and;
-       u16 pci_exp_lnkctl_or;
-       u32 sec_unc_err_sever_and;
-       u32 sec_unc_err_sever_or;
-       u32 sec_unc_err_mask_and;
-       u32 sec_unc_err_mask_or;
-};
-
-/*
- * _HPX PCI Express Setting Record (Type 3)
- */
-struct hpx_type3 {
-       u16 device_type;
-       u16 function_type;
-       u16 config_space_location;
-       u16 pci_exp_cap_id;
-       u16 pci_exp_cap_ver;
-       u16 pci_exp_vendor_id;
-       u16 dvsec_id;
-       u16 dvsec_rev;
-       u16 match_offset;
-       u32 match_mask_and;
-       u32 match_value;
-       u16 reg_offset;
-       u32 reg_mask_and;
-       u32 reg_mask_or;
-};
-
-struct hotplug_program_ops {
-       void (*program_type0)(struct pci_dev *dev, struct hpp_type0 *hpp);
-       void (*program_type1)(struct pci_dev *dev, struct hpp_type1 *hpp);
-       void (*program_type2)(struct pci_dev *dev, struct hpp_type2 *hpp);
-       void (*program_type3)(struct pci_dev *dev, struct hpx_type3 *hpp);
-};
-
-enum hpx_type3_dev_type {
-       HPX_TYPE_ENDPOINT       = BIT(0),
-       HPX_TYPE_LEG_END        = BIT(1),
-       HPX_TYPE_RC_END         = BIT(2),
-       HPX_TYPE_RC_EC          = BIT(3),
-       HPX_TYPE_ROOT_PORT      = BIT(4),
-       HPX_TYPE_UPSTREAM       = BIT(5),
-       HPX_TYPE_DOWNSTREAM     = BIT(6),
-       HPX_TYPE_PCI_BRIDGE     = BIT(7),
-       HPX_TYPE_PCIE_BRIDGE    = BIT(8),
-};
-
-enum hpx_type3_fn_type {
-       HPX_FN_NORMAL           = BIT(0),
-       HPX_FN_SRIOV_PHYS       = BIT(1),
-       HPX_FN_SRIOV_VIRT       = BIT(2),
-};
-
-enum hpx_type3_cfg_loc {
-       HPX_CFG_PCICFG          = 0,
-       HPX_CFG_PCIE_CAP        = 1,
-       HPX_CFG_PCIE_CAP_EXT    = 2,
-       HPX_CFG_VEND_CAP        = 3,
-       HPX_CFG_DVSEC           = 4,
-       HPX_CFG_MAX,
-};
-
 #ifdef CONFIG_ACPI
 #include <linux/acpi.h>
-int pci_acpi_program_hp_params(struct pci_dev *dev,
-                              const struct hotplug_program_ops *hp_ops);
 bool pciehp_is_native(struct pci_dev *bridge);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *bridge);
 bool shpchp_is_native(struct pci_dev *bridge);
 int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
 int acpi_pci_detect_ejectable(acpi_handle handle);
 #else
-static inline int pci_acpi_program_hp_params(struct pci_dev *dev,
-                                   const struct hotplug_program_ops *hp_ops)
-{
-       return -ENODEV;
-}
-
 static inline int acpi_get_hp_hw_control_from_firmware(struct pci_dev *bridge)
 {
        return 0;
index de1b75e..21a5724 100644 (file)
 #define PCI_VENDOR_ID_MYRICOM          0x14c1
 
 #define PCI_VENDOR_ID_MEDIATEK         0x14c3
+#define PCI_DEVICE_ID_MEDIATEK_7629    0x7629
 
 #define PCI_VENDOR_ID_TITAN            0x14D2
 #define PCI_DEVICE_ID_TITAN_010L       0x8001
 
 #define PCI_VENDOR_ID_ASMEDIA          0x1b21
 
+#define PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS    0x1c36
+
 #define PCI_VENDOR_ID_CIRCUITCO                0x1cc8
 #define PCI_SUBSYSTEM_ID_CIRCUITCO_MINNOWBOARD 0x0001
 
index 61448c1..68ccc5b 100644 (file)
@@ -292,7 +292,7 @@ struct pmu {
         *  -EBUSY      -- @event is for this PMU but PMU temporarily unavailable
         *  -EINVAL     -- @event is for this PMU but @event is not valid
         *  -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported
-        *  -EACCESS    -- @event is for this PMU, @event is valid, but no privilidges
+        *  -EACCES     -- @event is for this PMU, @event is valid, but no privileges
         *
         *  0           -- @event is for this PMU and valid
         *
index a7ecbe0..9a0e981 100644 (file)
@@ -678,6 +678,7 @@ static inline bool phy_is_started(struct phy_device *phydev)
        return phydev->state >= PHY_UP;
 }
 
+void phy_resolve_aneg_pause(struct phy_device *phydev);
 void phy_resolve_aneg_linkmode(struct phy_device *phydev);
 
 /**
@@ -1076,6 +1077,7 @@ int genphy_config_eee_advert(struct phy_device *phydev);
 int __genphy_config_aneg(struct phy_device *phydev, bool changed);
 int genphy_aneg_done(struct phy_device *phydev);
 int genphy_update_link(struct phy_device *phydev);
+int genphy_read_lpa(struct phy_device *phydev);
 int genphy_read_status(struct phy_device *phydev);
 int genphy_suspend(struct phy_device *phydev);
 int genphy_resume(struct phy_device *phydev);
index 7ccb875..9841568 100644 (file)
@@ -5513,6 +5513,18 @@ struct ec_params_fp_seed {
        uint8_t seed[FP_CONTEXT_TPM_BYTES];
 } __ec_align4;
 
+#define EC_CMD_FP_ENC_STATUS 0x0409
+
+/* FP TPM seed has been set or not */
+#define FP_ENC_STATUS_SEED_SET BIT(0)
+
+struct ec_response_fp_encryption_status {
+       /* Used bits in encryption engine status */
+       uint32_t valid_flags;
+       /* Encryption engine status */
+       uint32_t status;
+} __ec_align4;
+
 /*****************************************************************************/
 /* Touchpad MCU commands: range 0x0500-0x05FF */
 
diff --git a/include/linux/platform_data/db8500_thermal.h b/include/linux/platform_data/db8500_thermal.h
deleted file mode 100644 (file)
index 55e5575..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * db8500_thermal.h - DB8500 Thermal Management Implementation
- *
- * Copyright (C) 2012 ST-Ericsson
- * Copyright (C) 2012 Linaro Ltd.
- *
- * Author: Hongbo Zhang <hongbo.zhang@linaro.com>
- */
-
-#ifndef _DB8500_THERMAL_H_
-#define _DB8500_THERMAL_H_
-
-#include <linux/thermal.h>
-
-#define COOLING_DEV_MAX 8
-
-struct db8500_trip_point {
-       unsigned long temp;
-       enum thermal_trip_type type;
-       char cdev_name[COOLING_DEV_MAX][THERMAL_NAME_LENGTH];
-};
-
-struct db8500_thsens_platform_data {
-       struct db8500_trip_point trip_points[THERMAL_MAX_TRIPS];
-       int num_trips;
-};
-
-#endif /* _DB8500_THERMAL_H_ */
index 6eaa53c..30e676b 100644 (file)
@@ -51,7 +51,10 @@ struct sdma_script_start_addrs {
        /* End of v2 array */
        s32 zcanfd_2_mcu_addr;
        s32 zqspi_2_mcu_addr;
+       s32 mcu_2_ecspi_addr;
        /* End of v3 array */
+       s32 mcu_2_zqspi_addr;
+       /* End of v4 array */
 };
 
 /**
diff --git a/include/linux/platform_data/eth-netx.h b/include/linux/platform_data/eth-netx.h
deleted file mode 100644 (file)
index a3a6322..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#ifndef __ETH_NETX_H
-#define __ETH_NETX_H
-
-struct netxeth_platform_data {
-       unsigned int xcno;      /* number of xmac/xpec engine this eth uses */
-};
-
-#endif
index 1b5cec0..f268840 100644 (file)
@@ -64,6 +64,8 @@ extern struct resource *platform_get_resource_byname(struct platform_device *,
                                                     unsigned int,
                                                     const char *);
 extern int platform_get_irq_byname(struct platform_device *, const char *);
+extern int platform_get_irq_byname_optional(struct platform_device *dev,
+                                           const char *name);
 extern int platform_add_devices(struct platform_device **, int);
 
 struct platform_device_info {
index 222c3e0..ebf5ef1 100644 (file)
@@ -34,8 +34,6 @@ enum pm_qos_flags_status {
 #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT    PM_QOS_LATENCY_ANY
 #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS PM_QOS_LATENCY_ANY_NS
 #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0
-#define PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE     0
-#define PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE     (-1)
 #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1)
 
 #define PM_QOS_FLAG_NO_POWER_OFF       (1 << 0)
@@ -54,8 +52,6 @@ struct pm_qos_flags_request {
 enum dev_pm_qos_req_type {
        DEV_PM_QOS_RESUME_LATENCY = 1,
        DEV_PM_QOS_LATENCY_TOLERANCE,
-       DEV_PM_QOS_MIN_FREQUENCY,
-       DEV_PM_QOS_MAX_FREQUENCY,
        DEV_PM_QOS_FLAGS,
 };
 
@@ -97,14 +93,10 @@ struct pm_qos_flags {
 struct dev_pm_qos {
        struct pm_qos_constraints resume_latency;
        struct pm_qos_constraints latency_tolerance;
-       struct pm_qos_constraints min_frequency;
-       struct pm_qos_constraints max_frequency;
        struct pm_qos_flags flags;
        struct dev_pm_qos_request *resume_latency_req;
        struct dev_pm_qos_request *latency_tolerance_req;
        struct dev_pm_qos_request *flags_req;
-       struct dev_pm_qos_request *min_frequency_req;
-       struct dev_pm_qos_request *max_frequency_req;
 };
 
 /* Action requested to pm_qos_update_target */
@@ -199,10 +191,6 @@ static inline s32 dev_pm_qos_read_value(struct device *dev,
        switch (type) {
        case DEV_PM_QOS_RESUME_LATENCY:
                return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT;
-       case DEV_PM_QOS_MIN_FREQUENCY:
-               return PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE;
-       case DEV_PM_QOS_MAX_FREQUENCY:
-               return PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE;
        default:
                WARN_ON(1);
                return 0;
@@ -267,4 +255,48 @@ static inline s32 dev_pm_qos_raw_resume_latency(struct device *dev)
 }
 #endif
 
+#define FREQ_QOS_MIN_DEFAULT_VALUE     0
+#define FREQ_QOS_MAX_DEFAULT_VALUE     (-1)
+
+enum freq_qos_req_type {
+       FREQ_QOS_MIN = 1,
+       FREQ_QOS_MAX,
+};
+
+struct freq_constraints {
+       struct pm_qos_constraints min_freq;
+       struct blocking_notifier_head min_freq_notifiers;
+       struct pm_qos_constraints max_freq;
+       struct blocking_notifier_head max_freq_notifiers;
+};
+
+struct freq_qos_request {
+       enum freq_qos_req_type type;
+       struct plist_node pnode;
+       struct freq_constraints *qos;
+};
+
+static inline int freq_qos_request_active(struct freq_qos_request *req)
+{
+       return !IS_ERR_OR_NULL(req->qos);
+}
+
+void freq_constraints_init(struct freq_constraints *qos);
+
+s32 freq_qos_read_value(struct freq_constraints *qos,
+                       enum freq_qos_req_type type);
+
+int freq_qos_add_request(struct freq_constraints *qos,
+                        struct freq_qos_request *req,
+                        enum freq_qos_req_type type, s32 value);
+int freq_qos_update_request(struct freq_qos_request *req, s32 new_value);
+int freq_qos_remove_request(struct freq_qos_request *req);
+
+int freq_qos_add_notifier(struct freq_constraints *qos,
+                         enum freq_qos_req_type type,
+                         struct notifier_block *notifier);
+int freq_qos_remove_notifier(struct freq_constraints *qos,
+                            enum freq_qos_req_type type,
+                            struct notifier_block *notifier);
+
 #endif
index cefd374..c09d67e 100644 (file)
@@ -488,13 +488,6 @@ extern int hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 extern void print_hex_dump(const char *level, const char *prefix_str,
                           int prefix_type, int rowsize, int groupsize,
                           const void *buf, size_t len, bool ascii);
-#if defined(CONFIG_DYNAMIC_DEBUG)
-#define print_hex_dump_bytes(prefix_str, prefix_type, buf, len)        \
-       dynamic_hex_dump(prefix_str, prefix_type, 16, 1, buf, len, true)
-#else
-extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
-                                const void *buf, size_t len);
-#endif /* defined(CONFIG_DYNAMIC_DEBUG) */
 #else
 static inline void print_hex_dump(const char *level, const char *prefix_str,
                                  int prefix_type, int rowsize, int groupsize,
@@ -526,4 +519,19 @@ static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type,
 }
 #endif
 
+/**
+ * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params
+ * @prefix_str: string to prefix each line with;
+ *  caller supplies trailing spaces for alignment if desired
+ * @prefix_type: controls whether prefix of an offset, address, or none
+ *  is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE)
+ * @buf: data blob to dump
+ * @len: number of bytes in the @buf
+ *
+ * Calls print_hex_dump(), with log level of KERN_DEBUG,
+ * rowsize of 16, groupsize of 1, and ASCII output included.
+ */
+#define print_hex_dump_bytes(prefix_str, prefix_type, buf, len)        \
+       print_hex_dump_debug(prefix_str, prefix_type, 16, 1, buf, len, true)
+
 #endif
index 24632a7..b2c9c46 100644 (file)
@@ -262,7 +262,7 @@ struct pwm_ops {
        int (*capture)(struct pwm_chip *chip, struct pwm_device *pwm,
                       struct pwm_capture *result, unsigned long timeout);
        int (*apply)(struct pwm_chip *chip, struct pwm_device *pwm,
-                    struct pwm_state *state);
+                    const struct pwm_state *state);
        void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm,
                          struct pwm_state *state);
        struct module *owner;
@@ -316,7 +316,7 @@ struct pwm_capture {
 /* PWM user APIs */
 struct pwm_device *pwm_request(int pwm_id, const char *label);
 void pwm_free(struct pwm_device *pwm);
-int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state);
+int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state);
 int pwm_adjust_config(struct pwm_device *pwm);
 
 /**
diff --git a/include/linux/quicklist.h b/include/linux/quicklist.h
deleted file mode 100644 (file)
index 034982c..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef LINUX_QUICKLIST_H
-#define LINUX_QUICKLIST_H
-/*
- * Fast allocations and disposal of pages. Pages must be in the condition
- * as needed after allocation when they are freed. Per cpu lists of pages
- * are kept that only contain node local pages.
- *
- * (C) 2007, SGI. Christoph Lameter <cl@linux.com>
- */
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/percpu.h>
-
-#ifdef CONFIG_QUICKLIST
-
-struct quicklist {
-       void *page;
-       int nr_pages;
-};
-
-DECLARE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK];
-
-/*
- * The two key functions quicklist_alloc and quicklist_free are inline so
- * that they may be custom compiled for the platform.
- * Specifying a NULL ctor can remove constructor support. Specifying
- * a constant quicklist allows the determination of the exact address
- * in the per cpu area.
- *
- * The fast patch in quicklist_alloc touched only a per cpu cacheline and
- * the first cacheline of the page itself. There is minmal overhead involved.
- */
-static inline void *quicklist_alloc(int nr, gfp_t flags, void (*ctor)(void *))
-{
-       struct quicklist *q;
-       void **p = NULL;
-
-       q =&get_cpu_var(quicklist)[nr];
-       p = q->page;
-       if (likely(p)) {
-               q->page = p[0];
-               p[0] = NULL;
-               q->nr_pages--;
-       }
-       put_cpu_var(quicklist);
-       if (likely(p))
-               return p;
-
-       p = (void *)__get_free_page(flags | __GFP_ZERO);
-       if (ctor && p)
-               ctor(p);
-       return p;
-}
-
-static inline void __quicklist_free(int nr, void (*dtor)(void *), void *p,
-       struct page *page)
-{
-       struct quicklist *q;
-
-       q = &get_cpu_var(quicklist)[nr];
-       *(void **)p = q->page;
-       q->page = p;
-       q->nr_pages++;
-       put_cpu_var(quicklist);
-}
-
-static inline void quicklist_free(int nr, void (*dtor)(void *), void *pp)
-{
-       __quicklist_free(nr, dtor, pp, virt_to_page(pp));
-}
-
-static inline void quicklist_free_page(int nr, void (*dtor)(void *),
-                                                       struct page *page)
-{
-       __quicklist_free(nr, dtor, page_address(page), page);
-}
-
-void quicklist_trim(int nr, void (*dtor)(void *),
-       unsigned long min_pages, unsigned long max_free);
-
-unsigned long quicklist_total_size(void);
-
-#else
-
-static inline unsigned long quicklist_total_size(void)
-{
-       return 0;
-}
-
-#endif
-
-#endif /* LINUX_QUICKLIST_H */
-
index 179faab..fdd421b 100644 (file)
@@ -60,41 +60,87 @@ rb_insert_augmented_cached(struct rb_node *node,
        rb_insert_augmented(node, &root->rb_root, augment);
 }
 
-#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield,      \
-                            rbtype, rbaugmented, rbcompute)            \
+/*
+ * Template for declaring augmented rbtree callbacks (generic case)
+ *
+ * RBSTATIC:    'static' or empty
+ * RBNAME:      name of the rb_augment_callbacks structure
+ * RBSTRUCT:    struct type of the tree nodes
+ * RBFIELD:     name of struct rb_node field within RBSTRUCT
+ * RBAUGMENTED: name of field within RBSTRUCT holding data for subtree
+ * RBCOMPUTE:   name of function that recomputes the RBAUGMENTED data
+ */
+
+#define RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME,                         \
+                            RBSTRUCT, RBFIELD, RBAUGMENTED, RBCOMPUTE) \
 static inline void                                                     \
-rbname ## _propagate(struct rb_node *rb, struct rb_node *stop)         \
+RBNAME ## _propagate(struct rb_node *rb, struct rb_node *stop)         \
 {                                                                      \
        while (rb != stop) {                                            \
-               rbstruct *node = rb_entry(rb, rbstruct, rbfield);       \
-               rbtype augmented = rbcompute(node);                     \
-               if (node->rbaugmented == augmented)                     \
+               RBSTRUCT *node = rb_entry(rb, RBSTRUCT, RBFIELD);       \
+               if (RBCOMPUTE(node, true))                              \
                        break;                                          \
-               node->rbaugmented = augmented;                          \
-               rb = rb_parent(&node->rbfield);                         \
+               rb = rb_parent(&node->RBFIELD);                         \
        }                                                               \
 }                                                                      \
 static inline void                                                     \
-rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)                \
+RBNAME ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)                \
 {                                                                      \
-       rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);            \
-       rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);            \
-       new->rbaugmented = old->rbaugmented;                            \
+       RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);            \
+       RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);            \
+       new->RBAUGMENTED = old->RBAUGMENTED;                            \
 }                                                                      \
 static void                                                            \
-rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)      \
+RBNAME ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)      \
 {                                                                      \
-       rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);            \
-       rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);            \
-       new->rbaugmented = old->rbaugmented;                            \
-       old->rbaugmented = rbcompute(old);                              \
+       RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);            \
+       RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);            \
+       new->RBAUGMENTED = old->RBAUGMENTED;                            \
+       RBCOMPUTE(old, false);                                          \
 }                                                                      \
-rbstatic const struct rb_augment_callbacks rbname = {                  \
-       .propagate = rbname ## _propagate,                              \
-       .copy = rbname ## _copy,                                        \
-       .rotate = rbname ## _rotate                                     \
+RBSTATIC const struct rb_augment_callbacks RBNAME = {                  \
+       .propagate = RBNAME ## _propagate,                              \
+       .copy = RBNAME ## _copy,                                        \
+       .rotate = RBNAME ## _rotate                                     \
 };
 
+/*
+ * Template for declaring augmented rbtree callbacks,
+ * computing RBAUGMENTED scalar as max(RBCOMPUTE(node)) for all subtree nodes.
+ *
+ * RBSTATIC:    'static' or empty
+ * RBNAME:      name of the rb_augment_callbacks structure
+ * RBSTRUCT:    struct type of the tree nodes
+ * RBFIELD:     name of struct rb_node field within RBSTRUCT
+ * RBTYPE:      type of the RBAUGMENTED field
+ * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree
+ * RBCOMPUTE:   name of function that returns the per-node RBTYPE scalar
+ */
+
+#define RB_DECLARE_CALLBACKS_MAX(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,        \
+                                RBTYPE, RBAUGMENTED, RBCOMPUTE)              \
+static inline bool RBNAME ## _compute_max(RBSTRUCT *node, bool exit)         \
+{                                                                            \
+       RBSTRUCT *child;                                                      \
+       RBTYPE max = RBCOMPUTE(node);                                         \
+       if (node->RBFIELD.rb_left) {                                          \
+               child = rb_entry(node->RBFIELD.rb_left, RBSTRUCT, RBFIELD);   \
+               if (child->RBAUGMENTED > max)                                 \
+                       max = child->RBAUGMENTED;                             \
+       }                                                                     \
+       if (node->RBFIELD.rb_right) {                                         \
+               child = rb_entry(node->RBFIELD.rb_right, RBSTRUCT, RBFIELD);  \
+               if (child->RBAUGMENTED > max)                                 \
+                       max = child->RBAUGMENTED;                             \
+       }                                                                     \
+       if (exit && node->RBAUGMENTED == max)                                 \
+               return true;                                                  \
+       node->RBAUGMENTED = max;                                              \
+       return false;                                                         \
+}                                                                            \
+RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME,                                       \
+                    RBSTRUCT, RBFIELD, RBAUGMENTED, RBNAME ## _compute_max)
+
 
 #define        RB_RED          0
 #define        RB_BLACK        1
index 563290f..75c97e4 100644 (file)
@@ -6,16 +6,11 @@
 
 /*
  * rcuwait provides a way of blocking and waking up a single
- * task in an rcu-safe manner; where it is forbidden to use
- * after exit_notify(). task_struct is not properly rcu protected,
- * unless dealing with rcu-aware lists, ie: find_task_by_*().
+ * task in an rcu-safe manner.
  *
- * Alternatively we have task_rcu_dereference(), but the return
- * semantics have different implications which would break the
- * wakeup side. The only time @task is non-nil is when a user is
- * blocked (or checking if it needs to) on a condition, and reset
- * as soon as we know that the condition has succeeded and are
- * awoken.
+ * The only time @task is non-nil is when a user is blocked (or
+ * checking if it needs to) on a condition, and reset as soon as we
+ * know that the condition has succeeded and are awoken.
  */
 struct rcuwait {
        struct task_struct __rcu *task;
@@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
  */
 #define rcuwait_wait_event(w, condition)                               \
 ({                                                                     \
-       /*                                                              \
-        * Complain if we are called after do_exit()/exit_notify(),     \
-        * as we cannot rely on the rcu critical region for the         \
-        * wakeup side.                                                 \
-        */                                                             \
-       WARN_ON(current->exit_state);                                   \
-                                                                       \
        rcu_assign_pointer((w)->task, current);                         \
        for (;;) {                                                      \
                /*                                                      \
index 70db597..67a1d86 100644 (file)
@@ -223,6 +223,7 @@ extern long schedule_timeout_uninterruptible(long timeout);
 extern long schedule_timeout_idle(long timeout);
 asmlinkage void schedule(void);
 extern void schedule_preempt_disabled(void);
+asmlinkage void preempt_schedule_irq(void);
 
 extern int __must_check io_schedule_prepare(void);
 extern void io_schedule_finish(int token);
@@ -1130,7 +1131,10 @@ struct task_struct {
 
        struct tlbflush_unmap_batch     tlb_ubc;
 
-       struct rcu_head                 rcu;
+       union {
+               refcount_t              rcu_users;
+               struct rcu_head         rcu;
+       };
 
        /* Cache last used pipe for splice(): */
        struct pipe_inode_info          *splice_pipe;
@@ -1839,7 +1843,10 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
  * running or not.
  */
 #ifndef vcpu_is_preempted
-# define vcpu_is_preempted(cpu)        false
+static inline bool vcpu_is_preempted(int cpu)
+{
+       return false;
+}
 #endif
 
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
index 4a79440..e677001 100644 (file)
@@ -362,16 +362,16 @@ enum {
 
 static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
 {
+       if (current->mm != mm)
+               return;
        if (likely(!(atomic_read(&mm->membarrier_state) &
                     MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
                return;
        sync_core_before_usermode();
 }
 
-static inline void membarrier_execve(struct task_struct *t)
-{
-       atomic_set(&t->mm->membarrier_state, 0);
-}
+extern void membarrier_exec_mmap(struct mm_struct *mm);
+
 #else
 #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
 static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
@@ -380,7 +380,7 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
 {
 }
 #endif
-static inline void membarrier_execve(struct task_struct *t)
+static inline void membarrier_exec_mmap(struct mm_struct *mm)
 {
 }
 static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
index 3d90ed8..4b1c3b6 100644 (file)
@@ -119,7 +119,7 @@ static inline void put_task_struct(struct task_struct *t)
                __put_task_struct(t);
 }
 
-struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+void put_task_struct_rcu_user(struct task_struct *task);
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
 extern int arch_task_struct_size __read_mostly;
index ace6fdb..9df7547 100644 (file)
@@ -77,6 +77,55 @@ enum lsm_event {
        LSM_POLICY_CHANGE,
 };
 
+/*
+ * These are reasons that can be passed to the security_locked_down()
+ * LSM hook. Lockdown reasons that protect kernel integrity (ie, the
+ * ability for userland to modify kernel code) are placed before
+ * LOCKDOWN_INTEGRITY_MAX.  Lockdown reasons that protect kernel
+ * confidentiality (ie, the ability for userland to extract
+ * information from the running kernel that would otherwise be
+ * restricted) are placed before LOCKDOWN_CONFIDENTIALITY_MAX.
+ *
+ * LSM authors should note that the semantics of any given lockdown
+ * reason are not guaranteed to be stable - the same reason may block
+ * one set of features in one kernel release, and a slightly different
+ * set of features in a later kernel release. LSMs that seek to expose
+ * lockdown policy at any level of granularity other than "none",
+ * "integrity" or "confidentiality" are responsible for either
+ * ensuring that they expose a consistent level of functionality to
+ * userland, or ensuring that userland is aware that this is
+ * potentially a moving target. It is easy to misuse this information
+ * in a way that could break userspace. Please be careful not to do
+ * so.
+ *
+ * If you add to this, remember to extend lockdown_reasons in
+ * security/lockdown/lockdown.c.
+ */
+enum lockdown_reason {
+       LOCKDOWN_NONE,
+       LOCKDOWN_MODULE_SIGNATURE,
+       LOCKDOWN_DEV_MEM,
+       LOCKDOWN_EFI_TEST,
+       LOCKDOWN_KEXEC,
+       LOCKDOWN_HIBERNATION,
+       LOCKDOWN_PCI_ACCESS,
+       LOCKDOWN_IOPORT,
+       LOCKDOWN_MSR,
+       LOCKDOWN_ACPI_TABLES,
+       LOCKDOWN_PCMCIA_CIS,
+       LOCKDOWN_TIOCSSERIAL,
+       LOCKDOWN_MODULE_PARAMETERS,
+       LOCKDOWN_MMIOTRACE,
+       LOCKDOWN_DEBUGFS,
+       LOCKDOWN_INTEGRITY_MAX,
+       LOCKDOWN_KCORE,
+       LOCKDOWN_KPROBES,
+       LOCKDOWN_BPF_READ,
+       LOCKDOWN_PERF,
+       LOCKDOWN_TRACEFS,
+       LOCKDOWN_CONFIDENTIALITY_MAX,
+};
+
 /* These functions are in security/commoncap.c */
 extern int cap_capable(const struct cred *cred, struct user_namespace *ns,
                       int cap, unsigned int opts);
@@ -195,6 +244,7 @@ int unregister_blocking_lsm_notifier(struct notifier_block *nb);
 
 /* prototypes */
 extern int security_init(void);
+extern int early_security_init(void);
 
 /* Security operations */
 int security_binder_set_context_mgr(struct task_struct *mgr);
@@ -392,6 +442,7 @@ void security_inode_invalidate_secctx(struct inode *inode);
 int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
 int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
 int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
+int security_locked_down(enum lockdown_reason what);
 #else /* CONFIG_SECURITY */
 
 static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data)
@@ -423,6 +474,11 @@ static inline int security_init(void)
        return 0;
 }
 
+static inline int early_security_init(void)
+{
+       return 0;
+}
+
 static inline int security_binder_set_context_mgr(struct task_struct *mgr)
 {
        return 0;
@@ -1210,6 +1266,10 @@ static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32
 {
        return -EOPNOTSUPP;
 }
+static inline int security_locked_down(enum lockdown_reason what)
+{
+       return 0;
+}
 #endif /* CONFIG_SECURITY */
 
 #ifdef CONFIG_SECURITY_NETWORK
index 9443caf..0f80123 100644 (file)
@@ -69,7 +69,7 @@ struct shrinker {
 
        /* These are for internal use */
        struct list_head list;
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
        /* ID in shrinker_idr */
        int id;
 #endif
@@ -81,6 +81,11 @@ struct shrinker {
 /* Flags */
 #define SHRINKER_NUMA_AWARE    (1 << 0)
 #define SHRINKER_MEMCG_AWARE   (1 << 1)
+/*
+ * It just makes sense when the shrinker is also MEMCG_AWARE for now,
+ * non-MEMCG_AWARE shrinker should not have this flag set.
+ */
+#define SHRINKER_NONSLAB       (1 << 2)
 
 extern int prealloc_shrinker(struct shrinker *shrinker);
 extern void register_shrinker_prepared(struct shrinker *shrinker);
index 907209c..64a395c 100644 (file)
@@ -1354,7 +1354,8 @@ static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6
        return skb->hash;
 }
 
-__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb);
+__u32 skb_get_hash_perturb(const struct sk_buff *skb,
+                          const siphash_key_t *perturb);
 
 static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
 {
@@ -1494,6 +1495,19 @@ static inline int skb_queue_empty(const struct sk_buff_head *list)
        return list->next == (const struct sk_buff *) list;
 }
 
+/**
+ *     skb_queue_empty_lockless - check if a queue is empty
+ *     @list: queue head
+ *
+ *     Returns true if the queue is empty, false otherwise.
+ *     This variant can be used in lockless contexts.
+ */
+static inline bool skb_queue_empty_lockless(const struct sk_buff_head *list)
+{
+       return READ_ONCE(list->next) == (const struct sk_buff *) list;
+}
+
+
 /**
  *     skb_queue_is_last - check if skb is the last entry in the queue
  *     @list: queue head
@@ -1847,9 +1861,11 @@ static inline void __skb_insert(struct sk_buff *newsk,
                                struct sk_buff *prev, struct sk_buff *next,
                                struct sk_buff_head *list)
 {
-       newsk->next = next;
-       newsk->prev = prev;
-       next->prev  = prev->next = newsk;
+       /* see skb_queue_empty_lockless() for the opposite READ_ONCE() */
+       WRITE_ONCE(newsk->next, next);
+       WRITE_ONCE(newsk->prev, prev);
+       WRITE_ONCE(next->prev, newsk);
+       WRITE_ONCE(prev->next, newsk);
        list->qlen++;
 }
 
@@ -1860,11 +1876,11 @@ static inline void __skb_queue_splice(const struct sk_buff_head *list,
        struct sk_buff *first = list->next;
        struct sk_buff *last = list->prev;
 
-       first->prev = prev;
-       prev->next = first;
+       WRITE_ONCE(first->prev, prev);
+       WRITE_ONCE(prev->next, first);
 
-       last->next = next;
-       next->prev = last;
+       WRITE_ONCE(last->next, next);
+       WRITE_ONCE(next->prev, last);
 }
 
 /**
@@ -2005,8 +2021,8 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
        next       = skb->next;
        prev       = skb->prev;
        skb->next  = skb->prev = NULL;
-       next->prev = prev;
-       prev->next = next;
+       WRITE_ONCE(next->prev, prev);
+       WRITE_ONCE(prev->next, next);
 }
 
 /**
@@ -3510,8 +3526,9 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len);
 int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
 int skb_vlan_pop(struct sk_buff *skb);
 int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
-int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto);
-int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto);
+int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
+                 int mac_len);
+int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len);
 int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse);
 int skb_mpls_dec_ttl(struct sk_buff *skb);
 struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy,
@@ -4144,22 +4161,28 @@ static inline void *skb_ext_find(const struct sk_buff *skb, enum skb_ext_id id)
 
        return NULL;
 }
+
+static inline void skb_ext_reset(struct sk_buff *skb)
+{
+       if (unlikely(skb->active_extensions)) {
+               __skb_ext_put(skb->extensions);
+               skb->active_extensions = 0;
+       }
+}
 #else
 static inline void skb_ext_put(struct sk_buff *skb) {}
+static inline void skb_ext_reset(struct sk_buff *skb) {}
 static inline void skb_ext_del(struct sk_buff *skb, int unused) {}
 static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {}
 static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {}
 #endif /* CONFIG_SKB_EXTENSIONS */
 
-static inline void nf_reset(struct sk_buff *skb)
+static inline void nf_reset_ct(struct sk_buff *skb)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        nf_conntrack_put(skb_nfct(skb));
        skb->_nfct = 0;
 #endif
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-       skb_ext_del(skb, SKB_EXT_BRIDGE_NF);
-#endif
 }
 
 static inline void nf_reset_trace(struct sk_buff *skb)
index 56c9c7e..4d2a2fa 100644 (file)
@@ -493,6 +493,10 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
  * kmalloc is the normal method of allocating memory
  * for objects smaller than page size in the kernel.
  *
+ * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN
+ * bytes. For @size of power of two bytes, the alignment is also guaranteed
+ * to be at least to the size.
+ *
  * The @flags argument may be one of the GFP flags defined at
  * include/linux/gfp.h and described at
  * :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>`
@@ -595,68 +599,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
        return __kmalloc_node(size, flags, node);
 }
 
-struct memcg_cache_array {
-       struct rcu_head rcu;
-       struct kmem_cache *entries[0];
-};
-
-/*
- * This is the main placeholder for memcg-related information in kmem caches.
- * Both the root cache and the child caches will have it. For the root cache,
- * this will hold a dynamically allocated array large enough to hold
- * information about the currently limited memcgs in the system. To allow the
- * array to be accessed without taking any locks, on relocation we free the old
- * version only after a grace period.
- *
- * Root and child caches hold different metadata.
- *
- * @root_cache:        Common to root and child caches.  NULL for root, pointer to
- *             the root cache for children.
- *
- * The following fields are specific to root caches.
- *
- * @memcg_caches: kmemcg ID indexed table of child caches.  This table is
- *             used to index child cachces during allocation and cleared
- *             early during shutdown.
- *
- * @root_caches_node: List node for slab_root_caches list.
- *
- * @children:  List of all child caches.  While the child caches are also
- *             reachable through @memcg_caches, a child cache remains on
- *             this list until it is actually destroyed.
- *
- * The following fields are specific to child caches.
- *
- * @memcg:     Pointer to the memcg this cache belongs to.
- *
- * @children_node: List node for @root_cache->children list.
- *
- * @kmem_caches_node: List node for @memcg->kmem_caches list.
- */
-struct memcg_cache_params {
-       struct kmem_cache *root_cache;
-       union {
-               struct {
-                       struct memcg_cache_array __rcu *memcg_caches;
-                       struct list_head __root_caches_node;
-                       struct list_head children;
-                       bool dying;
-               };
-               struct {
-                       struct mem_cgroup *memcg;
-                       struct list_head children_node;
-                       struct list_head kmem_caches_node;
-                       struct percpu_ref refcnt;
-
-                       void (*work_fn)(struct kmem_cache *);
-                       union {
-                               struct rcu_head rcu_head;
-                               struct work_struct work;
-                       };
-               };
-       };
-};
-
 int memcg_update_all_caches(int num_memcgs);
 
 /**
index fc0bed5..4049d97 100644 (file)
@@ -263,7 +263,7 @@ struct ucred {
 #define PF_MAX         AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
-#define SOMAXCONN      128
+#define SOMAXCONN      4096
 
 /* Flags we can use with send/ and recv.
    Added those for 1003.1g not all are supported yet
index 4deb11f..b6ccdc2 100644 (file)
@@ -227,7 +227,26 @@ static inline bool strstarts(const char *str, const char *prefix)
 }
 
 size_t memweight(const void *ptr, size_t bytes);
-void memzero_explicit(void *s, size_t count);
+
+/**
+ * memzero_explicit - Fill a region of memory (e.g. sensitive
+ *                   keying data) with 0s.
+ * @s: Pointer to the start of the area.
+ * @count: The size of the area.
+ *
+ * Note: usually using memset() is just fine (!), but in cases
+ * where clearing out _local_ data at the end of a scope is
+ * necessary, memzero_explicit() should be used instead in
+ * order to prevent the compiler from optimising away zeroing.
+ *
+ * memzero_explicit() doesn't need an arch-specific version as
+ * it just invokes the one of memset() implicitly.
+ */
+static inline void memzero_explicit(void *s, size_t count)
+{
+       memset(s, 0, count);
+       barrier_data(s);
+}
 
 /**
  * kbasename - return the last part of a pathname.
@@ -474,8 +493,9 @@ static inline void memcpy_and_pad(void *dest, size_t dest_len,
  * But this can lead to bugs due to typos, or if prefix is a pointer
  * and not a constant. Instead use str_has_prefix().
  *
- * Returns: 0 if @str does not start with @prefix
-         strlen(@prefix) if @str does start with @prefix
+ * Returns:
+ * * strlen(@prefix) if @str starts with @prefix
+ * * 0 if @str does not start with @prefix
  */
 static __always_inline size_t str_has_prefix(const char *str, const char *prefix)
 {
index 87d27e1..d796058 100644 (file)
@@ -64,6 +64,11 @@ static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
        return 0;
 }
 
+static inline void xprt_destroy_backchannel(struct rpc_xprt *xprt,
+                                           unsigned int max_reqs)
+{
+}
+
 static inline bool svc_is_backchannel(const struct svc_rqst *rqstp)
 {
        return false;
index c7f38e8..f860372 100644 (file)
@@ -87,6 +87,7 @@ struct cache_detail {
                                              int has_died);
 
        struct cache_head *     (*alloc)(void);
+       void                    (*flush)(void);
        int                     (*match)(struct cache_head *orig, struct cache_head *new);
        void                    (*init)(struct cache_head *orig, struct cache_head *new);
        void                    (*update)(struct cache_head *orig, struct cache_head *new);
@@ -107,9 +108,9 @@ struct cache_detail {
        /* fields for communication over channel */
        struct list_head        queue;
 
-       atomic_t                readers;                /* how many time is /chennel open */
-       time_t                  last_close;             /* if no readers, when did last close */
-       time_t                  last_warn;              /* when we last warned about no readers */
+       atomic_t                writers;                /* how many time is /channel open */
+       time_t                  last_close;             /* if no writers, when did last close */
+       time_t                  last_warn;              /* when we last warned about no writers */
 
        union {
                struct proc_dir_entry   *procfs;
index 27536b9..a6ef351 100644 (file)
@@ -242,9 +242,6 @@ void                rpc_sleep_on_priority_timeout(struct rpc_wait_queue *queue,
 void           rpc_sleep_on_priority(struct rpc_wait_queue *,
                                        struct rpc_task *,
                                        int priority);
-void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
-               struct rpc_wait_queue *queue,
-               struct rpc_task *task);
 void           rpc_wake_up_queued_task(struct rpc_wait_queue *,
                                        struct rpc_task *);
 void           rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *,
index 981f0d7..40f6588 100644 (file)
@@ -42,6 +42,7 @@
 
 #ifndef SVC_RDMA_H
 #define SVC_RDMA_H
+#include <linux/llist.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/rpc_rdma.h>
@@ -107,8 +108,7 @@ struct svcxprt_rdma {
        struct list_head     sc_read_complete_q;
        struct work_struct   sc_work;
 
-       spinlock_t           sc_recv_lock;
-       struct list_head     sc_recv_ctxts;
+       struct llist_head    sc_recv_ctxts;
 };
 /* sc_flags */
 #define RDMAXPRT_CONN_PENDING  3
@@ -125,6 +125,7 @@ enum {
 #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
 
 struct svc_rdma_recv_ctxt {
+       struct llist_node       rc_node;
        struct list_head        rc_list;
        struct ib_recv_wr       rc_recv_wr;
        struct ib_cqe           rc_cqe;
@@ -200,7 +201,6 @@ extern struct svc_xprt_class svc_rdma_bc_class;
 #endif
 
 /* svc_rdma.c */
-extern struct workqueue_struct *svc_rdma_wq;
 extern int svc_rdma_init(void);
 extern void svc_rdma_cleanup(void);
 
index 8a87d8b..f33e501 100644 (file)
@@ -186,7 +186,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
 extern void xdr_shift_buf(struct xdr_buf *, size_t);
 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
 extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
-extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int);
+extern int xdr_buf_read_mic(struct xdr_buf *, struct xdr_netobj *, unsigned int);
 extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 
index 13e108b..d783e15 100644 (file)
@@ -352,6 +352,7 @@ bool                        xprt_prepare_transmit(struct rpc_task *task);
 void                   xprt_request_enqueue_transmit(struct rpc_task *task);
 void                   xprt_request_enqueue_receive(struct rpc_task *task);
 void                   xprt_request_wait_receive(struct rpc_task *task);
+void                   xprt_request_dequeue_xprt(struct rpc_task *task);
 bool                   xprt_request_need_retransmit(struct rpc_task *task);
 void                   xprt_transmit(struct rpc_task *task);
 void                   xprt_end_transmit(struct rpc_task *task);
index 86fc38f..16c239e 100644 (file)
@@ -49,9 +49,9 @@
  * fully-chunked NFS message (read chunks are the largest). Note only
  * a single chunk type per message is supported currently.
  */
-#define RPCRDMA_MIN_SLOT_TABLE (2U)
+#define RPCRDMA_MIN_SLOT_TABLE (4U)
 #define RPCRDMA_DEF_SLOT_TABLE (128U)
-#define RPCRDMA_MAX_SLOT_TABLE (256U)
+#define RPCRDMA_MAX_SLOT_TABLE (16384U)
 
 #define RPCRDMA_MIN_INLINE  (1024)     /* min inline thresh */
 #define RPCRDMA_DEF_INLINE  (4096)     /* default inline thresh */
index 7638dbe..a940de0 100644 (file)
@@ -61,6 +61,7 @@ struct sock_xprt {
        struct mutex            recv_mutex;
        struct sockaddr_storage srcaddr;
        unsigned short          srcport;
+       int                     xprt_err;
 
        /*
         * UDP socket buffer size parameters
index de2c67a..063c0c1 100644 (file)
@@ -340,6 +340,7 @@ extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_all(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void deactivate_file_page(struct page *page);
+extern void deactivate_page(struct page *page);
 extern void mark_page_lazyfree(struct page *page);
 extern void swap_setup(void);
 
@@ -364,6 +365,7 @@ extern int vm_swappiness;
 extern int remove_mapping(struct address_space *mapping, struct page *page);
 extern unsigned long vm_total_pages;
 
+extern unsigned long reclaim_pages(struct list_head *page_list);
 #ifdef CONFIG_NUMA
 extern int node_reclaim_mode;
 extern int sysctl_min_unmapped_ratio;
index 5420817..fa7ee50 100644 (file)
@@ -196,9 +196,9 @@ struct bin_attribute {
        .size   = _size,                                                \
 }
 
-#define __BIN_ATTR_WO(_name) {                                         \
+#define __BIN_ATTR_WO(_name, _size) {                                  \
        .attr   = { .name = __stringify(_name), .mode = 0200 },         \
-       .store  = _name##_store,                                        \
+       .write  = _name##_write,                                        \
        .size   = _size,                                                \
 }
 
index 3e2a80c..96305a6 100644 (file)
@@ -53,18 +53,4 @@ extern const struct blk_integrity_profile t10_pi_type1_ip;
 extern const struct blk_integrity_profile t10_pi_type3_crc;
 extern const struct blk_integrity_profile t10_pi_type3_ip;
 
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-extern void t10_pi_prepare(struct request *rq, u8 protection_type);
-extern void t10_pi_complete(struct request *rq, u8 protection_type,
-                           unsigned int intervals);
-#else
-static inline void t10_pi_complete(struct request *rq, u8 protection_type,
-                                  unsigned int intervals)
-{
-}
-static inline void t10_pi_prepare(struct request *rq, u8 protection_type)
-{
-}
-#endif
-
 #endif
index 99617e5..668e25a 100644 (file)
@@ -393,7 +393,7 @@ struct tcp_sock {
        /* fastopen_rsk points to request_sock that resulted in this big
         * socket. Used to retransmit SYNACKs etc.
         */
-       struct request_sock *fastopen_rsk;
+       struct request_sock __rcu *fastopen_rsk;
        u32     *saved_syn;
 };
 
@@ -447,8 +447,8 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
 
 static inline bool tcp_passive_fastopen(const struct sock *sk)
 {
-       return (sk->sk_state == TCP_SYN_RECV &&
-               tcp_sk(sk)->fastopen_rsk != NULL);
+       return sk->sk_state == TCP_SYN_RECV &&
+              rcu_access_pointer(tcp_sk(sk)->fastopen_rsk) != NULL;
 }
 
 static inline void fastopen_queue_tune(struct sock *sk, int backlog)
index 8d8821b..659a440 100644 (file)
@@ -134,7 +134,7 @@ static inline void copy_overflow(int size, unsigned long count)
        WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
 }
 
-static __always_inline bool
+static __always_inline __must_check bool
 check_copy_size(const void *addr, size_t bytes, bool is_source)
 {
        int sz = __compiletime_object_size(addr);
index 63238c8..131ea1b 100644 (file)
@@ -152,7 +152,7 @@ struct tcg_algorithm_info {
  * total. Once we've done this we know the offset of the data length field,
  * and can calculate the total size of the event.
  *
- * Return: size of the event on success, <0 on failure
+ * Return: size of the event on success, 0 on failure
  */
 
 static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
@@ -170,6 +170,7 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
        u16 halg;
        int i;
        int j;
+       u32 count, event_type;
 
        marker = event;
        marker_start = marker;
@@ -190,16 +191,22 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
        }
 
        event = (struct tcg_pcr_event2_head *)mapping;
+       /*
+        * The loop below will unmap these fields if the log is larger than
+        * one page, so save them here for reference:
+        */
+       count = READ_ONCE(event->count);
+       event_type = READ_ONCE(event->event_type);
 
        efispecid = (struct tcg_efi_specid_event_head *)event_header->event;
 
        /* Check if event is malformed. */
-       if (event->count > efispecid->num_algs) {
+       if (count > efispecid->num_algs) {
                size = 0;
                goto out;
        }
 
-       for (i = 0; i < event->count; i++) {
+       for (i = 0; i < count; i++) {
                halg_size = sizeof(event->digests[i].alg_id);
 
                /* Map the digest's algorithm identifier */
@@ -256,8 +263,9 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
                + event_field->event_size;
        size = marker - marker_start;
 
-       if ((event->event_type == 0) && (event_field->event_size == 0))
+       if (event_type == 0 && event_field->event_size == 0)
                size = 0;
+
 out:
        if (do_mapping)
                TPM_MEMUNMAP(mapping, mapping_size);
index 34a0385..d4ee6e9 100644 (file)
@@ -55,7 +55,7 @@
  * as usual) and both source and destination can trigger faults.
  */
 
-static __always_inline unsigned long
+static __always_inline __must_check unsigned long
 __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 {
        kasan_check_write(to, n);
@@ -63,7 +63,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
        return raw_copy_from_user(to, from, n);
 }
 
-static __always_inline unsigned long
+static __always_inline __must_check unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
        might_fault();
@@ -85,7 +85,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
  * The caller should also make sure he pins the user space address
  * so that we don't result in page fault and sleep.
  */
-static __always_inline unsigned long
+static __always_inline __must_check unsigned long
 __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 {
        kasan_check_read(from, n);
@@ -93,7 +93,7 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
        return raw_copy_to_user(to, from, n);
 }
 
-static __always_inline unsigned long
+static __always_inline __must_check unsigned long
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
        might_fault();
@@ -103,7 +103,7 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
 }
 
 #ifdef INLINE_COPY_FROM_USER
-static inline unsigned long
+static inline __must_check unsigned long
 _copy_from_user(void *to, const void __user *from, unsigned long n)
 {
        unsigned long res = n;
@@ -117,12 +117,12 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
        return res;
 }
 #else
-extern unsigned long
+extern __must_check unsigned long
 _copy_from_user(void *, const void __user *, unsigned long);
 #endif
 
 #ifdef INLINE_COPY_TO_USER
-static inline unsigned long
+static inline __must_check unsigned long
 _copy_to_user(void __user *to, const void *from, unsigned long n)
 {
        might_fault();
@@ -133,7 +133,7 @@ _copy_to_user(void __user *to, const void *from, unsigned long n)
        return n;
 }
 #else
-extern unsigned long
+extern __must_check unsigned long
 _copy_to_user(void __user *, const void *, unsigned long);
 #endif
 
@@ -222,14 +222,85 @@ static inline bool pagefault_disabled(void)
 
 #ifndef ARCH_HAS_NOCACHE_UACCESS
 
-static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
-                               const void __user *from, unsigned long n)
+static inline __must_check unsigned long
+__copy_from_user_inatomic_nocache(void *to, const void __user *from,
+                                 unsigned long n)
 {
        return __copy_from_user_inatomic(to, from, n);
 }
 
 #endif         /* ARCH_HAS_NOCACHE_UACCESS */
 
+extern __must_check int check_zeroed_user(const void __user *from, size_t size);
+
+/**
+ * copy_struct_from_user: copy a struct from userspace
+ * @dst:   Destination address, in kernel space. This buffer must be @ksize
+ *         bytes long.
+ * @ksize: Size of @dst struct.
+ * @src:   Source address, in userspace.
+ * @usize: (Alleged) size of @src struct.
+ *
+ * Copies a struct from userspace to kernel space, in a way that guarantees
+ * backwards-compatibility for struct syscall arguments (as long as future
+ * struct extensions are made such that all new fields are *appended* to the
+ * old struct, and zeroed-out new fields have the same meaning as the old
+ * struct).
+ *
+ * @ksize is just sizeof(*dst), and @usize should've been passed by userspace.
+ * The recommended usage is something like the following:
+ *
+ *   SYSCALL_DEFINE2(foobar, const struct foo __user *, uarg, size_t, usize)
+ *   {
+ *      int err;
+ *      struct foo karg = {};
+ *
+ *      if (usize > PAGE_SIZE)
+ *        return -E2BIG;
+ *      if (usize < FOO_SIZE_VER0)
+ *        return -EINVAL;
+ *
+ *      err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize);
+ *      if (err)
+ *        return err;
+ *
+ *      // ...
+ *   }
+ *
+ * There are three cases to consider:
+ *  * If @usize == @ksize, then it's copied verbatim.
+ *  * If @usize < @ksize, then the userspace has passed an old struct to a
+ *    newer kernel. The rest of the trailing bytes in @dst (@ksize - @usize)
+ *    are to be zero-filled.
+ *  * If @usize > @ksize, then the userspace has passed a new struct to an
+ *    older kernel. The trailing bytes unknown to the kernel (@usize - @ksize)
+ *    are checked to ensure they are zeroed, otherwise -E2BIG is returned.
+ *
+ * Returns (in all cases, some data may have been copied):
+ *  * -E2BIG:  (@usize > @ksize) and there are non-zero trailing bytes in @src.
+ *  * -EFAULT: access to userspace failed.
+ */
+static __always_inline __must_check int
+copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
+                     size_t usize)
+{
+       size_t size = min(ksize, usize);
+       size_t rest = max(ksize, usize) - size;
+
+       /* Deal with trailing bytes. */
+       if (usize < ksize) {
+               memset(dst + size, 0, rest);
+       } else if (usize > ksize) {
+               int ret = check_zeroed_user(src + size, rest);
+               if (ret <= 0)
+                       return ret ?: -E2BIG;
+       }
+       /* Copy the interoperable parts of the struct. */
+       if (copy_from_user(dst, src, size))
+               return -EFAULT;
+       return 0;
+}
+
 /*
  * probe_kernel_read(): safely attempt to read from a location
  * @dst: pointer to the buffer that shall take the data
@@ -284,8 +355,10 @@ extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
 #ifndef user_access_begin
 #define user_access_begin(ptr,len) access_ok(ptr, len)
 #define user_access_end() do { } while (0)
-#define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0)
-#define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0)
+#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
+#define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e)
+#define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e)
+#define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e)
 static inline unsigned long user_access_save(void) { return 0UL; }
 static inline void user_access_restore(unsigned long flags) { }
 #endif
index 32d990d..911ab7c 100644 (file)
@@ -32,6 +32,7 @@ extern const char *const key_being_used_for[NR__KEY_BEING_USED_FOR];
 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
 
 struct key;
+struct pkcs7_message;
 
 extern int verify_pkcs7_signature(const void *data, size_t len,
                                  const void *raw_pkcs7, size_t pkcs7_len,
@@ -41,6 +42,15 @@ extern int verify_pkcs7_signature(const void *data, size_t len,
                                                      const void *data, size_t len,
                                                      size_t asn1hdrlen),
                                  void *ctx);
+extern int verify_pkcs7_message_sig(const void *data, size_t len,
+                                   struct pkcs7_message *pkcs7,
+                                   struct key *trusted_keys,
+                                   enum key_being_used_for usage,
+                                   int (*view_content)(void *ctx,
+                                                       const void *data,
+                                                       size_t len,
+                                                       size_t asn1hdrlen),
+                                   void *ctx);
 
 #ifdef CONFIG_SIGNED_PE_FILE_VERIFICATION
 extern int verify_pefile_signature(const void *pebuf, unsigned pelen,
index 4c7781f..07875cc 100644 (file)
@@ -48,7 +48,6 @@ struct virtio_vsock_sock {
 
 struct virtio_vsock_pkt {
        struct virtio_vsock_hdr hdr;
-       struct work_struct work;
        struct list_head list;
        /* socket refcnt not held, only use for cancellation */
        struct vsock_sock *vsk;
index dfa718f..4e78094 100644 (file)
@@ -53,15 +53,21 @@ struct vmap_area {
        unsigned long va_start;
        unsigned long va_end;
 
-       /*
-        * Largest available free size in subtree.
-        */
-       unsigned long subtree_max_size;
-       unsigned long flags;
        struct rb_node rb_node;         /* address sorted rbtree */
        struct list_head list;          /* address sorted list */
-       struct llist_node purge_list;    /* "lazy purge" list */
-       struct vm_struct *vm;
+
+       /*
+        * The following three variables can be packed, because
+        * a vmap_area object is always one of the three states:
+        *    1) in "free" tree (root is vmap_area_root)
+        *    2) in "busy" tree (root is free_vmap_area_root)
+        *    3) in purge list  (head is vmap_purge_list)
+        */
+       union {
+               unsigned long subtree_max_size; /* in "free" tree */
+               struct vm_struct *vm;           /* in "busy" tree */
+               struct llist_node purge_list;   /* in purge list */
+       };
 };
 
 /*
index 5921599..86eecbd 100644 (file)
@@ -230,8 +230,8 @@ static inline int xa_err(void *entry)
  * This structure is used either directly or via the XA_LIMIT() macro
  * to communicate the range of IDs that are valid for allocation.
  * Two common ranges are predefined for you:
- *  * xa_limit_32b     - [0 - UINT_MAX]
- *  * xa_limit_31b     - [0 - INT_MAX]
+ * * xa_limit_32b      - [0 - UINT_MAX]
+ * * xa_limit_31b      - [0 - INT_MAX]
  */
 struct xa_limit {
        u32 max;
index 7238865..51bf430 100644 (file)
@@ -46,6 +46,8 @@ const char *zpool_get_type(struct zpool *pool);
 
 void zpool_destroy_pool(struct zpool *pool);
 
+bool zpool_malloc_support_movable(struct zpool *pool);
+
 int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
                        unsigned long *handle);
 
@@ -90,6 +92,7 @@ struct zpool_driver {
                        struct zpool *zpool);
        void (*destroy)(void *pool);
 
+       bool malloc_support_movable;
        int (*malloc)(void *pool, size_t size, gfp_t gfp,
                                unsigned long *handle);
        void (*free)(void *pool, unsigned long handle);
index f7fe456..1afc125 100644 (file)
@@ -203,7 +203,6 @@ struct bonding {
        struct   slave __rcu *primary_slave;
        struct   bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
        bool     force_primary;
-       u32      nest_level;
        s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
        int     (*recv_probe)(const struct sk_buff *, struct bonding *,
                              struct slave *);
@@ -239,6 +238,7 @@ struct bonding {
        struct   dentry *debug_dir;
 #endif /* CONFIG_DEBUG_FS */
        struct rtnl_link_stats64 bond_stats;
+       struct lock_class_key stats_lock_key;
 };
 
 #define bond_slave_get_rcu(dev) \
index 127a5c4..86e0283 100644 (file)
@@ -122,7 +122,7 @@ static inline void skb_mark_napi_id(struct sk_buff *skb,
 static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
 {
 #ifdef CONFIG_NET_RX_BUSY_POLL
-       sk->sk_napi_id = skb->napi_id;
+       WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
 #endif
        sk_rx_queue_set(sk, skb);
 }
@@ -132,8 +132,8 @@ static inline void sk_mark_napi_id_once(struct sock *sk,
                                        const struct sk_buff *skb)
 {
 #ifdef CONFIG_NET_RX_BUSY_POLL
-       if (!sk->sk_napi_id)
-               sk->sk_napi_id = skb->napi_id;
+       if (!READ_ONCE(sk->sk_napi_id))
+               WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
 #endif
 }
 
index ff45c3e..4ab2c49 100644 (file)
@@ -5549,6 +5549,14 @@ const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy,
  */
 const char *reg_initiator_name(enum nl80211_reg_initiator initiator);
 
+/**
+ * regulatory_pre_cac_allowed - check if pre-CAC allowed in the current regdom
+ * @wiphy: wiphy for which pre-CAC capability is checked.
+ *
+ * Pre-CAC is allowed only in some regdomains (notable ETSI).
+ */
+bool regulatory_pre_cac_allowed(struct wiphy *wiphy);
+
 /**
  * DOC: Internal regulatory db functions
  *
index 90bd210..5cd1227 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <linux/types.h>
 #include <linux/in6.h>
+#include <linux/siphash.h>
 #include <uapi/linux/if_ether.h>
 
 /**
@@ -276,7 +277,7 @@ struct flow_keys_basic {
 struct flow_keys {
        struct flow_dissector_key_control control;
 #define FLOW_KEYS_HASH_START_FIELD basic
-       struct flow_dissector_key_basic basic;
+       struct flow_dissector_key_basic basic __aligned(SIPHASH_ALIGNMENT);
        struct flow_dissector_key_tags tags;
        struct flow_dissector_key_vlan vlan;
        struct flow_dissector_key_vlan cvlan;
index d126b5d..2ad85e6 100644 (file)
@@ -69,7 +69,7 @@ struct fq {
        struct list_head backlogs;
        spinlock_t lock;
        u32 flows_cnt;
-       u32 perturbation;
+       siphash_key_t   perturbation;
        u32 limit;
        u32 memory_limit;
        u32 memory_usage;
index be40a4b..107c0d7 100644 (file)
@@ -108,7 +108,7 @@ begin:
 
 static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb)
 {
-       u32 hash = skb_get_hash_perturb(skb, fq->perturbation);
+       u32 hash = skb_get_hash_perturb(skb, &fq->perturbation);
 
        return reciprocal_scale(hash, fq->flows_cnt);
 }
@@ -308,7 +308,7 @@ static int fq_init(struct fq *fq, int flows_cnt)
        INIT_LIST_HEAD(&fq->backlogs);
        spin_lock_init(&fq->lock);
        fq->flows_cnt = max_t(u32, flows_cnt, 1);
-       fq->perturbation = prandom_u32();
+       get_random_bytes(&fq->perturbation, sizeof(fq->perturbation));
        fq->quantum = 300;
        fq->limit = 8192;
        fq->memory_limit = 16 << 20; /* 16 MBytes */
index 81643cf..c814446 100644 (file)
@@ -21,9 +21,13 @@ void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf);
 int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp);
 int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num);
 #else
-void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf) {}
-int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp) { return 0; }
-int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num)
+static inline void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf) {}
+
+static inline int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp)
+{ return 0; }
+
+static inline int hwbm_pool_add(struct hwbm_pool *bm_pool,
+                               unsigned int buf_num)
 { return 0; }
 #endif /* CONFIG_HWBM */
 #endif /* _HWBM_H */
index aef38c1..dfd919b 100644 (file)
@@ -71,6 +71,7 @@ struct inet_timewait_sock {
                                tw_pad          : 2,    /* 2 bits hole */
                                tw_tos          : 8;
        u32                     tw_txhash;
+       u32                     tw_priority;
        struct timer_list       tw_timer;
        struct inet_bind_bucket *tw_tb;
 };
index 95bb77f..a2c61c3 100644 (file)
@@ -185,7 +185,7 @@ static inline struct sk_buff *ip_fraglist_next(struct ip_fraglist_iter *iter)
 }
 
 struct ip_frag_state {
-       struct iphdr    *iph;
+       bool            DF;
        unsigned int    hlen;
        unsigned int    ll_rs;
        unsigned int    mtu;
@@ -196,7 +196,7 @@ struct ip_frag_state {
 };
 
 void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs,
-                 unsigned int mtu, struct ip_frag_state *state);
+                 unsigned int mtu, bool DF, struct ip_frag_state *state);
 struct sk_buff *ip_frag_next(struct sk_buff *skb,
                             struct ip_frag_state *state);
 
index 3759167..078887c 100644 (file)
@@ -889,6 +889,7 @@ struct netns_ipvs {
        struct delayed_work     defense_work;   /* Work handler */
        int                     drop_rate;
        int                     drop_counter;
+       int                     old_secure_tcp;
        atomic_t                dropentry;
        /* locks in ctl.c */
        spinlock_t              dropentry_lock;  /* drop entry handling */
index 8dfc656..009605c 100644 (file)
@@ -981,7 +981,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
  *     upper-layer output functions
  */
 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
-            __u32 mark, struct ipv6_txoptions *opt, int tclass);
+            __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority);
 
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr);
 
index df528a6..ea985aa 100644 (file)
@@ -104,7 +104,7 @@ void llc_sk_reset(struct sock *sk);
 
 /* Access to a connection */
 int llc_conn_state_process(struct sock *sk, struct sk_buff *skb);
-int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb);
+void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb);
 void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb);
 void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit);
 void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit);
index f8712bb..c7e15a2 100644 (file)
@@ -52,6 +52,9 @@ struct bpf_prog;
 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 
 struct net {
+       /* First cache line can be often dirtied.
+        * Do not place here read-mostly fields.
+        */
        refcount_t              passive;        /* To decide when the network
                                                 * namespace should be freed.
                                                 */
@@ -60,7 +63,13 @@ struct net {
                                                 */
        spinlock_t              rules_mod_lock;
 
-       u32                     hash_mix;
+       unsigned int            dev_unreg_count;
+
+       unsigned int            dev_base_seq;   /* protected by rtnl_mutex */
+       int                     ifindex;
+
+       spinlock_t              nsid_lock;
+       atomic_t                fnhe_genid;
 
        struct list_head        list;           /* list of network namespaces */
        struct list_head        exit_list;      /* To linked to call pernet exit
@@ -76,11 +85,11 @@ struct net {
 #endif
        struct user_namespace   *user_ns;       /* Owning user namespace */
        struct ucounts          *ucounts;
-       spinlock_t              nsid_lock;
        struct idr              netns_ids;
 
        struct ns_common        ns;
 
+       struct list_head        dev_base_head;
        struct proc_dir_entry   *proc_net;
        struct proc_dir_entry   *proc_net_stat;
 
@@ -93,17 +102,18 @@ struct net {
 
        struct uevent_sock      *uevent_sock;           /* uevent socket */
 
-       struct list_head        dev_base_head;
        struct hlist_head       *dev_name_head;
        struct hlist_head       *dev_index_head;
-       unsigned int            dev_base_seq;   /* protected by rtnl_mutex */
-       int                     ifindex;
-       unsigned int            dev_unreg_count;
+       /* Note that @hash_mix can be read millions times per second,
+        * it is critical that it is on a read_mostly cache line.
+        */
+       u32                     hash_mix;
+
+       struct net_device       *loopback_dev;          /* The loopback */
 
        /* core fib_rules */
        struct list_head        rules_ops;
 
-       struct net_device       *loopback_dev;          /* The loopback */
        struct netns_core       core;
        struct netns_mib        mib;
        struct netns_packet     packet;
@@ -171,7 +181,6 @@ struct net {
        struct sock             *crypto_nlsk;
 #endif
        struct sock             *diag_nlsk;
-       atomic_t                fnhe_genid;
 } __randomize_layout;
 
 #include <linux/seq_file_net.h>
@@ -333,7 +342,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
 #define __net_initconst        __initconst
 #endif
 
-int peernet2id_alloc(struct net *net, struct net *peer);
+int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp);
 int peernet2id(struct net *net, struct net *peer);
 bool peernet_has_id(struct net *net, struct net *peer);
 struct net *get_net_ns_by_id(struct net *net, int id);
index 2655e03..001d294 100644 (file)
@@ -889,6 +889,8 @@ enum nft_chain_flags {
        NFT_CHAIN_HW_OFFLOAD            = 0x2,
 };
 
+#define NFT_CHAIN_POLICY_UNSET         U8_MAX
+
 /**
  *     struct nft_chain - nf_tables chain
  *
@@ -1181,6 +1183,10 @@ struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
                                           const struct nlattr *nla,
                                           u8 genmask);
 
+void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
+                                   struct nft_flowtable *flowtable,
+                                   enum nft_trans_phase phase);
+
 void nft_register_flowtable_type(struct nf_flowtable_type *type);
 void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
 
index fd178d5..cf8b332 100644 (file)
@@ -185,7 +185,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
 
 static inline bool reqsk_queue_empty(const struct request_sock_queue *queue)
 {
-       return queue->rskq_accept_head == NULL;
+       return READ_ONCE(queue->rskq_accept_head) == NULL;
 }
 
 static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue,
@@ -197,7 +197,7 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue
        req = queue->rskq_accept_head;
        if (req) {
                sk_acceptq_removed(parent);
-               queue->rskq_accept_head = req->dl_next;
+               WRITE_ONCE(queue->rskq_accept_head, req->dl_next);
                if (queue->rskq_accept_head == NULL)
                        queue->rskq_accept_tail = NULL;
        }
index dfce19c..6c51684 100644 (file)
@@ -53,10 +53,11 @@ struct rtable {
        unsigned int            rt_flags;
        __u16                   rt_type;
        __u8                    rt_is_input;
-       u8                      rt_gw_family;
+       __u8                    rt_uses_gateway;
 
        int                     rt_iif;
 
+       u8                      rt_gw_family;
        /* Info on neighbour */
        union {
                __be32          rt_gw4;
index 43f5b7e..637548d 100644 (file)
@@ -494,6 +494,11 @@ static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc)
        return q;
 }
 
+static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc)
+{
+       return rcu_dereference_bh(qdisc->dev_queue->qdisc);
+}
+
 static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
 {
        return qdisc->dev_queue->qdisc_sleeping;
index 5d60f13..3ab5c6b 100644 (file)
@@ -610,4 +610,9 @@ static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize)
        return sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, datasize);
 }
 
+static inline bool sctp_newsk_ready(const struct sock *sk)
+{
+       return sock_flag(sk, SOCK_DEAD) || sk->sk_socket;
+}
+
 #endif /* __net_sctp_h__ */
index 2c53f1a..8f9adcf 100644 (file)
@@ -878,12 +878,17 @@ static inline bool sk_acceptq_is_full(const struct sock *sk)
  */
 static inline int sk_stream_min_wspace(const struct sock *sk)
 {
-       return sk->sk_wmem_queued >> 1;
+       return READ_ONCE(sk->sk_wmem_queued) >> 1;
 }
 
 static inline int sk_stream_wspace(const struct sock *sk)
 {
-       return sk->sk_sndbuf - sk->sk_wmem_queued;
+       return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued);
+}
+
+static inline void sk_wmem_queued_add(struct sock *sk, int val)
+{
+       WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val);
 }
 
 void sk_stream_write_space(struct sock *sk);
@@ -949,8 +954,8 @@ static inline void sk_incoming_cpu_update(struct sock *sk)
 {
        int cpu = raw_smp_processor_id();
 
-       if (unlikely(sk->sk_incoming_cpu != cpu))
-               sk->sk_incoming_cpu = cpu;
+       if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu))
+               WRITE_ONCE(sk->sk_incoming_cpu, cpu);
 }
 
 static inline void sock_rps_record_flow_hash(__u32 hash)
@@ -1207,7 +1212,7 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
 
 static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
 {
-       if (sk->sk_wmem_queued >= sk->sk_sndbuf)
+       if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
                return false;
 
        return sk->sk_prot->stream_memory_free ?
@@ -1467,7 +1472,7 @@ DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
 static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
 {
        sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
-       sk->sk_wmem_queued -= skb->truesize;
+       sk_wmem_queued_add(sk, -skb->truesize);
        sk_mem_uncharge(sk, skb->truesize);
        if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
            !sk->sk_tx_skb_cache && !skb_cloned(skb)) {
@@ -2014,7 +2019,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
        skb->len             += copy;
        skb->data_len        += copy;
        skb->truesize        += copy;
-       sk->sk_wmem_queued   += copy;
+       sk_wmem_queued_add(sk, copy);
        sk_mem_charge(sk, copy);
        return 0;
 }
@@ -2220,10 +2225,14 @@ static inline void sk_wake_async(const struct sock *sk, int how, int band)
 
 static inline void sk_stream_moderate_sndbuf(struct sock *sk)
 {
-       if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) {
-               sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
-               sk->sk_sndbuf = max_t(u32, sk->sk_sndbuf, SOCK_MIN_SNDBUF);
-       }
+       u32 val;
+
+       if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
+               return;
+
+       val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
+
+       WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF));
 }
 
 struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
@@ -2233,12 +2242,17 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
  * sk_page_frag - return an appropriate page_frag
  * @sk: socket
  *
- * If socket allocation mode allows current thread to sleep, it means its
- * safe to use the per task page_frag instead of the per socket one.
+ * Use the per task page_frag instead of the per socket one for
+ * optimization when we know that we're in the normal context and owns
+ * everything that's associated with %current.
+ *
+ * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
+ * inside other socket operations and end up recursing into sk_page_frag()
+ * while it's already in use.
  */
 static inline struct page_frag *sk_page_frag(struct sock *sk)
 {
-       if (gfpflags_allow_blocking(sk->sk_allocation))
+       if (gfpflags_normal_context(sk->sk_allocation))
                return &current->task_frag;
 
        return &sk->sk_frag;
@@ -2251,7 +2265,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
  */
 static inline bool sock_writeable(const struct sock *sk)
 {
-       return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
+       return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1);
 }
 
 static inline gfp_t gfp_any(void)
@@ -2271,7 +2285,9 @@ static inline long sock_sndtimeo(const struct sock *sk, bool noblock)
 
 static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len)
 {
-       return (waitall ? len : min_t(int, sk->sk_rcvlowat, len)) ? : 1;
+       int v = waitall ? len : min_t(int, READ_ONCE(sk->sk_rcvlowat), len);
+
+       return v ?: 1;
 }
 
 /* Alas, with timeout socket operations are not restartable.
index c9a3f96..ab4eb5e 100644 (file)
@@ -258,7 +258,7 @@ static inline bool tcp_under_memory_pressure(const struct sock *sk)
            mem_cgroup_under_socket_pressure(sk->sk_memcg))
                return true;
 
-       return tcp_memory_pressure;
+       return READ_ONCE(tcp_memory_pressure);
 }
 /*
  * The next routines deal with comparing 32 bit unsigned ints
@@ -1380,13 +1380,14 @@ static inline int tcp_win_from_space(const struct sock *sk, int space)
 /* Note: caller must be prepared to deal with negative returns */
 static inline int tcp_space(const struct sock *sk)
 {
-       return tcp_win_from_space(sk, sk->sk_rcvbuf - sk->sk_backlog.len -
+       return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
+                                 READ_ONCE(sk->sk_backlog.len) -
                                  atomic_read(&sk->sk_rmem_alloc));
 }
 
 static inline int tcp_full_space(const struct sock *sk)
 {
-       return tcp_win_from_space(sk, sk->sk_rcvbuf);
+       return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
 }
 
 extern void tcp_openreq_init_rwin(struct request_sock *req,
@@ -1916,7 +1917,8 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
 static inline bool tcp_stream_memory_free(const struct sock *sk, int wake)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
-       u32 notsent_bytes = tp->write_seq - tp->snd_nxt;
+       u32 notsent_bytes = READ_ONCE(tp->write_seq) -
+                           READ_ONCE(tp->snd_nxt);
 
        return (notsent_bytes << wake) < tcp_notsent_lowat(tp);
 }
index 335283d..373aadc 100644 (file)
@@ -197,6 +197,7 @@ struct vxlan_rdst {
        u8                       offloaded:1;
        __be32                   remote_vni;
        u32                      remote_ifindex;
+       struct net_device        *remote_dev;
        struct list_head         list;
        struct rcu_head          rcu;
        struct dst_cache         dst_cache;
index 6a47ba8..e7e733a 100644 (file)
@@ -366,7 +366,7 @@ struct ib_tm_caps {
 
 struct ib_cq_init_attr {
        unsigned int    cqe;
-       int             comp_vector;
+       u32             comp_vector;
        u32             flags;
 };
 
index 3810b34..6bd5ed6 100644 (file)
@@ -32,6 +32,7 @@ extern int scsi_ioctl_reset(struct scsi_device *, int __user *);
 struct scsi_eh_save {
        /* saved state */
        int result;
+       unsigned int resid_len;
        int eh_eflags;
        enum dma_data_direction data_direction;
        unsigned underflow;
index 0fd3929..057d2a2 100644 (file)
@@ -264,6 +264,9 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 };
 #define AZX_REG_ML_LOUTPAY             0x20
 #define AZX_REG_ML_LINPAY              0x30
 
+/* bit0 is reserved, with BIT(1) mapping to stream1 */
+#define ML_LOSIDV_STREAM_MASK          0xFFFE
+
 #define ML_LCTL_SCF_MASK                       0xF
 #define AZX_MLCTL_SPA                          (0x1 << 16)
 #define AZX_MLCTL_CPA                          (0x1 << 23)
index 985a5f5..31f76b6 100644 (file)
@@ -135,9 +135,9 @@ int asoc_simple_init_priv(struct asoc_simple_priv *priv,
                               struct link_info *li);
 
 #ifdef DEBUG
-inline void asoc_simple_debug_dai(struct asoc_simple_priv *priv,
-                                 char *name,
-                                 struct asoc_simple_dai *dai)
+static inline void asoc_simple_debug_dai(struct asoc_simple_priv *priv,
+                                        char *name,
+                                        struct asoc_simple_dai *dai)
 {
        struct device *dev = simple_priv_to_dev(priv);
 
@@ -167,7 +167,7 @@ inline void asoc_simple_debug_dai(struct asoc_simple_priv *priv,
                dev_dbg(dev, "%s clk %luHz\n", name, clk_get_rate(dai->clk));
 }
 
-inline void asoc_simple_debug_info(struct asoc_simple_priv *priv)
+static inline void asoc_simple_debug_info(struct asoc_simple_priv *priv)
 {
        struct snd_soc_card *card = simple_priv_to_card(priv);
        struct device *dev = simple_priv_to_dev(priv);
index 5df604d..75ae189 100644 (file)
@@ -1688,6 +1688,7 @@ TRACE_EVENT(qgroup_update_reserve,
                __entry->qgid           = qgroup->qgroupid;
                __entry->cur_reserved   = qgroup->rsv.values[type];
                __entry->diff           = diff;
+               __entry->type           = type;
        ),
 
        TP_printk_btrfs("qgid=%llu type=%s cur_reserved=%llu diff=%lld",
@@ -1710,6 +1711,7 @@ TRACE_EVENT(qgroup_meta_reserve,
        TP_fast_assign_btrfs(root->fs_info,
                __entry->refroot        = root->root_key.objectid;
                __entry->diff           = diff;
+               __entry->type           = type;
        ),
 
        TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
@@ -1726,7 +1728,6 @@ TRACE_EVENT(qgroup_meta_convert,
        TP_STRUCT__entry_btrfs(
                __field(        u64,    refroot                 )
                __field(        s64,    diff                    )
-               __field(        int,    type                    )
        ),
 
        TP_fast_assign_btrfs(root->fs_info,
index eb57e30..69e8bb8 100644 (file)
@@ -35,8 +35,8 @@ DECLARE_EVENT_CLASS(kmem_alloc,
                __entry->gfp_flags      = gfp_flags;
        ),
 
-       TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
-               __entry->call_site,
+       TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+               (void *)__entry->call_site,
                __entry->ptr,
                __entry->bytes_req,
                __entry->bytes_alloc,
@@ -131,7 +131,8 @@ DECLARE_EVENT_CLASS(kmem_free,
                __entry->ptr            = ptr;
        ),
 
-       TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+       TP_printk("call_site=%pS ptr=%p",
+                 (void *)__entry->call_site, __entry->ptr)
 );
 
 DEFINE_EVENT(kmem_free, kfree,
index f6a4eaa..a138306 100644 (file)
@@ -451,20 +451,81 @@ TRACE_EVENT(xprtrdma_createmrs,
 
        TP_STRUCT__entry(
                __field(const void *, r_xprt)
+               __string(addr, rpcrdma_addrstr(r_xprt))
+               __string(port, rpcrdma_portstr(r_xprt))
                __field(unsigned int, count)
        ),
 
        TP_fast_assign(
                __entry->r_xprt = r_xprt;
                __entry->count = count;
+               __assign_str(addr, rpcrdma_addrstr(r_xprt));
+               __assign_str(port, rpcrdma_portstr(r_xprt));
        ),
 
-       TP_printk("r_xprt=%p: created %u MRs",
-               __entry->r_xprt, __entry->count
+       TP_printk("peer=[%s]:%s r_xprt=%p: created %u MRs",
+               __get_str(addr), __get_str(port), __entry->r_xprt,
+               __entry->count
        )
 );
 
-DEFINE_RXPRT_EVENT(xprtrdma_nomrs);
+TRACE_EVENT(xprtrdma_mr_get,
+       TP_PROTO(
+               const struct rpcrdma_req *req
+       ),
+
+       TP_ARGS(req),
+
+       TP_STRUCT__entry(
+               __field(const void *, req)
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(u32, xid)
+       ),
+
+       TP_fast_assign(
+               const struct rpc_rqst *rqst = &req->rl_slot;
+
+               __entry->req = req;
+               __entry->task_id = rqst->rq_task->tk_pid;
+               __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+               __entry->xid = be32_to_cpu(rqst->rq_xid);
+       ),
+
+       TP_printk("task:%u@%u xid=0x%08x req=%p",
+               __entry->task_id, __entry->client_id, __entry->xid,
+               __entry->req
+       )
+);
+
+TRACE_EVENT(xprtrdma_nomrs,
+       TP_PROTO(
+               const struct rpcrdma_req *req
+       ),
+
+       TP_ARGS(req),
+
+       TP_STRUCT__entry(
+               __field(const void *, req)
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(u32, xid)
+       ),
+
+       TP_fast_assign(
+               const struct rpc_rqst *rqst = &req->rl_slot;
+
+               __entry->req = req;
+               __entry->task_id = rqst->rq_task->tk_pid;
+               __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+               __entry->xid = be32_to_cpu(rqst->rq_xid);
+       ),
+
+       TP_printk("task:%u@%u xid=0x%08x req=%p",
+               __entry->task_id, __entry->client_id, __entry->xid,
+               __entry->req
+       )
+);
 
 DEFINE_RDCH_EVENT(read);
 DEFINE_WRCH_EVENT(write);
@@ -623,21 +684,21 @@ TRACE_EVENT(xprtrdma_post_send,
 
 TRACE_EVENT(xprtrdma_post_recv,
        TP_PROTO(
-               const struct ib_cqe *cqe
+               const struct rpcrdma_rep *rep
        ),
 
-       TP_ARGS(cqe),
+       TP_ARGS(rep),
 
        TP_STRUCT__entry(
-               __field(const void *, cqe)
+               __field(const void *, rep)
        ),
 
        TP_fast_assign(
-               __entry->cqe = cqe;
+               __entry->rep = rep;
        ),
 
-       TP_printk("cqe=%p",
-               __entry->cqe
+       TP_printk("rep=%p",
+               __entry->rep
        )
 );
 
@@ -715,14 +776,15 @@ TRACE_EVENT(xprtrdma_wc_receive,
        TP_ARGS(wc),
 
        TP_STRUCT__entry(
-               __field(const void *, cqe)
+               __field(const void *, rep)
                __field(u32, byte_len)
                __field(unsigned int, status)
                __field(u32, vendor_err)
        ),
 
        TP_fast_assign(
-               __entry->cqe = wc->wr_cqe;
+               __entry->rep = container_of(wc->wr_cqe, struct rpcrdma_rep,
+                                           rr_cqe);
                __entry->status = wc->status;
                if (wc->status) {
                        __entry->byte_len = 0;
@@ -733,8 +795,8 @@ TRACE_EVENT(xprtrdma_wc_receive,
                }
        ),
 
-       TP_printk("cqe=%p %u bytes: %s (%u/0x%x)",
-               __entry->cqe, __entry->byte_len,
+       TP_printk("rep=%p %u bytes: %s (%u/0x%x)",
+               __entry->rep, __entry->byte_len,
                rdma_show_wc_status(__entry->status),
                __entry->status, __entry->vendor_err
        )
index a13a62d..191fe44 100644 (file)
@@ -519,10 +519,10 @@ TRACE_EVENT(rxrpc_local,
            );
 
 TRACE_EVENT(rxrpc_peer,
-           TP_PROTO(struct rxrpc_peer *peer, enum rxrpc_peer_trace op,
+           TP_PROTO(unsigned int peer_debug_id, enum rxrpc_peer_trace op,
                     int usage, const void *where),
 
-           TP_ARGS(peer, op, usage, where),
+           TP_ARGS(peer_debug_id, op, usage, where),
 
            TP_STRUCT__entry(
                    __field(unsigned int,       peer            )
@@ -532,7 +532,7 @@ TRACE_EVENT(rxrpc_peer,
                             ),
 
            TP_fast_assign(
-                   __entry->peer = peer->debug_id;
+                   __entry->peer = peer_debug_id;
                    __entry->op = op;
                    __entry->usage = usage;
                    __entry->where = where;
@@ -546,10 +546,10 @@ TRACE_EVENT(rxrpc_peer,
            );
 
 TRACE_EVENT(rxrpc_conn,
-           TP_PROTO(struct rxrpc_connection *conn, enum rxrpc_conn_trace op,
+           TP_PROTO(unsigned int conn_debug_id, enum rxrpc_conn_trace op,
                     int usage, const void *where),
 
-           TP_ARGS(conn, op, usage, where),
+           TP_ARGS(conn_debug_id, op, usage, where),
 
            TP_STRUCT__entry(
                    __field(unsigned int,       conn            )
@@ -559,7 +559,7 @@ TRACE_EVENT(rxrpc_conn,
                             ),
 
            TP_fast_assign(
-                   __entry->conn = conn->debug_id;
+                   __entry->conn = conn_debug_id;
                    __entry->op = op;
                    __entry->usage = usage;
                    __entry->where = where;
@@ -606,10 +606,10 @@ TRACE_EVENT(rxrpc_client,
            );
 
 TRACE_EVENT(rxrpc_call,
-           TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op,
+           TP_PROTO(unsigned int call_debug_id, enum rxrpc_call_trace op,
                     int usage, const void *where, const void *aux),
 
-           TP_ARGS(call, op, usage, where, aux),
+           TP_ARGS(call_debug_id, op, usage, where, aux),
 
            TP_STRUCT__entry(
                    __field(unsigned int,               call            )
@@ -620,7 +620,7 @@ TRACE_EVENT(rxrpc_call,
                             ),
 
            TP_fast_assign(
-                   __entry->call = call->debug_id;
+                   __entry->call = call_debug_id;
                    __entry->op = op;
                    __entry->usage = usage;
                    __entry->where = where;
@@ -1068,7 +1068,7 @@ TRACE_EVENT(rxrpc_recvmsg,
                             ),
 
            TP_fast_assign(
-                   __entry->call = call->debug_id;
+                   __entry->call = call ? call->debug_id : 0;
                    __entry->why = why;
                    __entry->seq = seq;
                    __entry->offset = offset;
index a0c4b8a..51fe9f6 100644 (file)
@@ -82,7 +82,7 @@ TRACE_EVENT(sock_rcvqueue_full,
        TP_fast_assign(
                __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
                __entry->truesize   = skb->truesize;
-               __entry->sk_rcvbuf  = sk->sk_rcvbuf;
+               __entry->sk_rcvbuf  = READ_ONCE(sk->sk_rcvbuf);
        ),
 
        TP_printk("rmem_alloc=%d truesize=%u sk_rcvbuf=%d",
@@ -115,7 +115,7 @@ TRACE_EVENT(sock_exceed_buf_limit,
                __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
                __entry->sysctl_wmem = sk_get_wmem0(sk, prot);
                __entry->wmem_alloc = refcount_read(&sk->sk_wmem_alloc);
-               __entry->wmem_queued = sk->sk_wmem_queued;
+               __entry->wmem_queued = READ_ONCE(sk->sk_wmem_queued);
                __entry->kind = kind;
        ),
 
index 3a27335..c2ce648 100644 (file)
@@ -66,8 +66,9 @@ DECLARE_EVENT_CLASS(writeback_page_template,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->name,
-                       mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", 32);
+               strscpy_pad(__entry->name,
+                           mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)",
+                           32);
                __entry->ino = mapping ? mapping->host->i_ino : 0;
                __entry->index = page->index;
        ),
@@ -110,8 +111,8 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
                struct backing_dev_info *bdi = inode_to_bdi(inode);
 
                /* may be called for files on pseudo FSes w/ unregistered bdi */
-               strncpy(__entry->name,
-                       bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
+               strscpy_pad(__entry->name,
+                           bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
                __entry->ino            = inode->i_ino;
                __entry->state          = inode->i_state;
                __entry->flags          = flags;
@@ -316,8 +317,8 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->name,
-                       dev_name(inode_to_bdi(inode)->dev), 32);
+               strscpy_pad(__entry->name,
+                           dev_name(inode_to_bdi(inode)->dev), 32);
                __entry->ino            = inode->i_ino;
                __entry->sync_mode      = wbc->sync_mode;
                __entry->cgroup_ino     = __trace_wbc_assign_cgroup(wbc);
@@ -360,8 +361,9 @@ DECLARE_EVENT_CLASS(writeback_work_class,
                __field(unsigned int, cgroup_ino)
        ),
        TP_fast_assign(
-               strncpy(__entry->name,
-                       wb->bdi->dev ? dev_name(wb->bdi->dev) : "(unknown)", 32);
+               strscpy_pad(__entry->name,
+                           wb->bdi->dev ? dev_name(wb->bdi->dev) :
+                           "(unknown)", 32);
                __entry->nr_pages = work->nr_pages;
                __entry->sb_dev = work->sb ? work->sb->s_dev : 0;
                __entry->sync_mode = work->sync_mode;
@@ -414,7 +416,7 @@ DECLARE_EVENT_CLASS(writeback_class,
                __field(unsigned int, cgroup_ino)
        ),
        TP_fast_assign(
-               strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+               strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32);
                __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
        ),
        TP_printk("bdi %s: cgroup_ino=%u",
@@ -436,7 +438,7 @@ TRACE_EVENT(writeback_bdi_register,
                __array(char, name, 32)
        ),
        TP_fast_assign(
-               strncpy(__entry->name, dev_name(bdi->dev), 32);
+               strscpy_pad(__entry->name, dev_name(bdi->dev), 32);
        ),
        TP_printk("bdi %s",
                __entry->name
@@ -461,7 +463,7 @@ DECLARE_EVENT_CLASS(wbc_class,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->name, dev_name(bdi->dev), 32);
+               strscpy_pad(__entry->name, dev_name(bdi->dev), 32);
                __entry->nr_to_write    = wbc->nr_to_write;
                __entry->pages_skipped  = wbc->pages_skipped;
                __entry->sync_mode      = wbc->sync_mode;
@@ -512,7 +514,7 @@ TRACE_EVENT(writeback_queue_io,
        ),
        TP_fast_assign(
                unsigned long *older_than_this = work->older_than_this;
-               strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+               strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32);
                __entry->older  = older_than_this ?  *older_than_this : 0;
                __entry->age    = older_than_this ?
                                  (jiffies - *older_than_this) * 1000 / HZ : -1;
@@ -598,7 +600,7 @@ TRACE_EVENT(bdi_dirty_ratelimit,
        ),
 
        TP_fast_assign(
-               strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32);
+               strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32);
                __entry->write_bw       = KBps(wb->write_bandwidth);
                __entry->avg_write_bw   = KBps(wb->avg_write_bandwidth);
                __entry->dirty_rate     = KBps(dirty_rate);
@@ -663,7 +665,7 @@ TRACE_EVENT(balance_dirty_pages,
 
        TP_fast_assign(
                unsigned long freerun = (thresh + bg_thresh) / 2;
-               strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32);
+               strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32);
 
                __entry->limit          = global_wb_domain.dirty_limit;
                __entry->setpoint       = (global_wb_domain.dirty_limit +
@@ -723,8 +725,8 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->name,
-                       dev_name(inode_to_bdi(inode)->dev), 32);
+               strscpy_pad(__entry->name,
+                           dev_name(inode_to_bdi(inode)->dev), 32);
                __entry->ino            = inode->i_ino;
                __entry->state          = inode->i_state;
                __entry->dirtied_when   = inode->dirtied_when;
@@ -797,8 +799,8 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->name,
-                       dev_name(inode_to_bdi(inode)->dev), 32);
+               strscpy_pad(__entry->name,
+                           dev_name(inode_to_bdi(inode)->dev), 32);
                __entry->ino            = inode->i_ino;
                __entry->state          = inode->i_state;
                __entry->dirtied_when   = inode->dirtied_when;
index 63b1f50..c160a53 100644 (file)
@@ -67,6 +67,9 @@
 #define MADV_WIPEONFORK 18             /* Zero memory on fork, child only */
 #define MADV_KEEPONFORK 19             /* Undo MADV_WIPEONFORK */
 
+#define MADV_COLD      20              /* deactivate these pages */
+#define MADV_PAGEOUT   21              /* reclaim these pages */
+
 /* compatibility flags */
 #define MAP_FILE       0
 
index c99b4f2..4fe35d6 100644 (file)
@@ -1003,6 +1003,8 @@ struct drm_amdgpu_info_device {
        __u64 high_va_max;
        /* gfx10 pa_sc_tile_steering_override */
        __u32 pa_sc_tile_steering_override;
+       /* disabled TCCs */
+       __u64 tcc_disabled_mask;
 };
 
 struct drm_amdgpu_info_hw_ip {
index 63ae4a3..c02dec9 100644 (file)
@@ -22,9 +22,9 @@ struct btf_header {
 };
 
 /* Max # of type identifier */
-#define BTF_MAX_TYPE   0x0000ffff
+#define BTF_MAX_TYPE   0x000fffff
 /* Max offset into the string section */
-#define BTF_MAX_NAME_OFFSET    0x0000ffff
+#define BTF_MAX_NAME_OFFSET    0x00ffffff
 /* Max # of struct/union/enum members or func args */
 #define BTF_MAX_VLEN   0xffff
 
index e4a79f8..ab5c7e8 100644 (file)
@@ -11,6 +11,9 @@
    more information about COFF, then O'Reilly has a very excellent book.
 */
 
+#ifndef _UAPI_LINUX_COFF_H
+#define _UAPI_LINUX_COFF_H
+
 #define  E_SYMNMLEN  8   /* Number of characters in a symbol name         */
 #define  E_FILNMLEN 14   /* Number of characters in a file name           */
 #define  E_DIMNUM    4   /* Number of array dimensions in auxiliary entry */
@@ -350,3 +353,5 @@ struct COFF_reloc {
 
 /* For new sections we haven't heard of before */
 #define COFF_DEF_SECTION_ALIGNMENT       4
+
+#endif /* _UAPI_LINUX_COFF_H */
index 2971d29..373cada 100644 (file)
  *
  * Protocol changelog:
  *
+ * 7.1:
+ *  - add the following messages:
+ *      FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK,
+ *      FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE,
+ *      FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR,
+ *      FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR,
+ *      FUSE_RELEASEDIR
+ *  - add padding to messages to accommodate 32-bit servers on 64-bit kernels
+ *
+ * 7.2:
+ *  - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags
+ *  - add FUSE_FSYNCDIR message
+ *
+ * 7.3:
+ *  - add FUSE_ACCESS message
+ *  - add FUSE_CREATE message
+ *  - add filehandle to fuse_setattr_in
+ *
+ * 7.4:
+ *  - add frsize to fuse_kstatfs
+ *  - clean up request size limit checking
+ *
+ * 7.5:
+ *  - add flags and max_write to fuse_init_out
+ *
+ * 7.6:
+ *  - add max_readahead to fuse_init_in and fuse_init_out
+ *
+ * 7.7:
+ *  - add FUSE_INTERRUPT message
+ *  - add POSIX file lock support
+ *
+ * 7.8:
+ *  - add lock_owner and flags fields to fuse_release_in
+ *  - add FUSE_BMAP message
+ *  - add FUSE_DESTROY message
+ *
  * 7.9:
  *  - new fuse_getattr_in input argument of GETATTR
  *  - add lk_flags in fuse_lk_in
  *
  *  7.31
  *  - add FUSE_WRITE_KILL_PRIV flag
+ *  - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING
+ *  - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag
  */
 
 #ifndef _LINUX_FUSE_H
@@ -274,6 +313,7 @@ struct fuse_file_lock {
  * FUSE_CACHE_SYMLINKS: cache READLINK responses
  * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
  * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request
+ * FUSE_MAP_ALIGNMENT: map_alignment field is valid
  */
 #define FUSE_ASYNC_READ                (1 << 0)
 #define FUSE_POSIX_LOCKS       (1 << 1)
@@ -301,6 +341,7 @@ struct fuse_file_lock {
 #define FUSE_CACHE_SYMLINKS    (1 << 23)
 #define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
 #define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
+#define FUSE_MAP_ALIGNMENT     (1 << 26)
 
 /**
  * CUSE INIT request/reply flags
@@ -422,9 +463,15 @@ enum fuse_opcode {
        FUSE_RENAME2            = 45,
        FUSE_LSEEK              = 46,
        FUSE_COPY_FILE_RANGE    = 47,
+       FUSE_SETUPMAPPING       = 48,
+       FUSE_REMOVEMAPPING      = 49,
 
        /* CUSE specific operations */
        CUSE_INIT               = 4096,
+
+       /* Reserved opcodes: helpful to detect structure endian-ness */
+       CUSE_INIT_BSWAP_RESERVED        = 1048576,      /* CUSE_INIT << 8 */
+       FUSE_INIT_BSWAP_RESERVED        = 436207616,    /* FUSE_INIT << 24 */
 };
 
 enum fuse_notify_code {
@@ -652,7 +699,7 @@ struct fuse_init_out {
        uint32_t        max_write;
        uint32_t        time_gran;
        uint16_t        max_pages;
-       uint16_t        padding;
+       uint16_t        map_alignment;
        uint32_t        unused[8];
 };
 
index 96ee9d9..ea57526 100644 (file)
@@ -28,6 +28,7 @@ struct io_uring_sqe {
                __u16           poll_events;
                __u32           sync_range_flags;
                __u32           msg_flags;
+               __u32           timeout_flags;
        };
        __u64   user_data;      /* data to be passed back at completion time */
        union {
@@ -61,6 +62,7 @@ struct io_uring_sqe {
 #define IORING_OP_SYNC_FILE_RANGE      8
 #define IORING_OP_SENDMSG      9
 #define IORING_OP_RECVMSG      10
+#define IORING_OP_TIMEOUT      11
 
 /*
  * sqe->fsync_flags
index 233efbb..52641d8 100644 (file)
@@ -999,6 +999,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ARM_PTRAUTH_GENERIC 172
 #define KVM_CAP_PMU_EVENT_FILTER 173
 #define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
+#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1145,6 +1146,7 @@ struct kvm_dirty_tlb {
 #define KVM_REG_S390           0x5000000000000000ULL
 #define KVM_REG_ARM64          0x6000000000000000ULL
 #define KVM_REG_MIPS           0x7000000000000000ULL
+#define KVM_REG_RISCV          0x8000000000000000ULL
 
 #define KVM_REG_SIZE_SHIFT     52
 #define KVM_REG_SIZE_MASK      0x00f0000000000000ULL
index 3b86c14..8076c94 100644 (file)
@@ -123,7 +123,7 @@ struct ebt_entry_match {
        union {
                struct {
                        char name[EBT_EXTENSION_MAXNAMELEN];
-                       uint8_t revision;
+                       __u8 revision;
                };
                struct xt_match *match;
        } u;
@@ -136,7 +136,7 @@ struct ebt_entry_watcher {
        union {
                struct {
                        char name[EBT_EXTENSION_MAXNAMELEN];
-                       uint8_t revision;
+                       __u8 revision;
                };
                struct xt_target *watcher;
        } u;
@@ -149,7 +149,7 @@ struct ebt_entry_target {
        union {
                struct {
                        char name[EBT_EXTENSION_MAXNAMELEN];
-                       uint8_t revision;
+                       __u8 revision;
                };
                struct xt_target *target;
        } u;
index b1e9de4..a519313 100644 (file)
 #include <linux/types.h>
 
 /* latest upcall version available */
-#define CLD_UPCALL_VERSION 1
+#define CLD_UPCALL_VERSION 2
 
 /* defined by RFC3530 */
 #define NFS4_OPAQUE_LIMIT 1024
 
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE      32
+#endif
+
 enum cld_command {
        Cld_Create,             /* create a record for this cm_id */
        Cld_Remove,             /* remove record of this cm_id */
        Cld_Check,              /* is this cm_id allowed? */
        Cld_GraceDone,          /* grace period is complete */
-       Cld_GraceStart,
+       Cld_GraceStart,         /* grace start (upload client records) */
+       Cld_GetVersion,         /* query max supported upcall version */
 };
 
 /* representation of long-form NFSv4 client ID */
@@ -45,6 +50,17 @@ struct cld_name {
        unsigned char   cn_id[NFS4_OPAQUE_LIMIT];       /* client-provided */
 } __attribute__((packed));
 
+/* sha256 hash of the kerberos principal */
+struct cld_princhash {
+       __u8            cp_len;                         /* length of cp_data */
+       unsigned char   cp_data[SHA256_DIGEST_SIZE];    /* hash of principal */
+} __attribute__((packed));
+
+struct cld_clntinfo {
+       struct cld_name         cc_name;
+       struct cld_princhash    cc_princhash;
+} __attribute__((packed));
+
 /* message struct for communication with userspace */
 struct cld_msg {
        __u8            cm_vers;                /* upcall version */
@@ -54,7 +70,28 @@ struct cld_msg {
        union {
                __s64           cm_gracetime;   /* grace period start time */
                struct cld_name cm_name;
+               __u8            cm_version;     /* for getting max version */
+       } __attribute__((packed)) cm_u;
+} __attribute__((packed));
+
+/* version 2 message can include hash of kerberos principal */
+struct cld_msg_v2 {
+       __u8            cm_vers;                /* upcall version */
+       __u8            cm_cmd;                 /* upcall command */
+       __s16           cm_status;              /* return code */
+       __u32           cm_xid;                 /* transaction id */
+       union {
+               struct cld_name cm_name;
+               __u8            cm_version;     /* for getting max version */
+               struct cld_clntinfo cm_clntinfo; /* name & princ hash */
        } __attribute__((packed)) cm_u;
 } __attribute__((packed));
 
+struct cld_msg_hdr {
+       __u8            cm_vers;                /* upcall version */
+       __u8            cm_cmd;                 /* upcall command */
+       __s16           cm_status;              /* return code */
+       __u32           cm_xid;                 /* transaction id */
+} __attribute__((packed));
+
 #endif /* !_NFSD_CLD_H */
index 1c215ea..e168dc5 100644 (file)
@@ -45,6 +45,27 @@ struct nvme_passthru_cmd {
        __u32   result;
 };
 
+struct nvme_passthru_cmd64 {
+       __u8    opcode;
+       __u8    flags;
+       __u16   rsvd1;
+       __u32   nsid;
+       __u32   cdw2;
+       __u32   cdw3;
+       __u64   metadata;
+       __u64   addr;
+       __u32   metadata_len;
+       __u32   data_len;
+       __u32   cdw10;
+       __u32   cdw11;
+       __u32   cdw12;
+       __u32   cdw13;
+       __u32   cdw14;
+       __u32   cdw15;
+       __u32   timeout_ms;
+       __u64   result;
+};
+
 #define nvme_admin_cmd nvme_passthru_cmd
 
 #define NVME_IOCTL_ID          _IO('N', 0x40)
@@ -54,5 +75,7 @@ struct nvme_passthru_cmd {
 #define NVME_IOCTL_RESET       _IO('N', 0x44)
 #define NVME_IOCTL_SUBSYS_RESET        _IO('N', 0x45)
 #define NVME_IOCTL_RESCAN      _IO('N', 0x46)
+#define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64)
+#define NVME_IOCTL_IO64_CMD    _IOWR('N', 0x48, struct nvme_passthru_cmd64)
 
 #endif /* _UAPI_LINUX_NVME_IOCTL_H */
index f28e562..29d6e93 100644 (file)
 #define  PCI_EXP_SLTCTL_CCIE   0x0010  /* Command Completed Interrupt Enable */
 #define  PCI_EXP_SLTCTL_HPIE   0x0020  /* Hot-Plug Interrupt Enable */
 #define  PCI_EXP_SLTCTL_AIC    0x00c0  /* Attention Indicator Control */
+#define  PCI_EXP_SLTCTL_ATTN_IND_SHIFT 6      /* Attention Indicator shift */
 #define  PCI_EXP_SLTCTL_ATTN_IND_ON    0x0040 /* Attention Indicator on */
 #define  PCI_EXP_SLTCTL_ATTN_IND_BLINK 0x0080 /* Attention Indicator blinking */
 #define  PCI_EXP_SLTCTL_ATTN_IND_OFF   0x00c0 /* Attention Indicator off */
 #define PCI_EXT_CAP_ID_DPC     0x1D    /* Downstream Port Containment */
 #define PCI_EXT_CAP_ID_L1SS    0x1E    /* L1 PM Substates */
 #define PCI_EXT_CAP_ID_PTM     0x1F    /* Precision Time Measurement */
-#define PCI_EXT_CAP_ID_MAX     PCI_EXT_CAP_ID_PTM
+#define PCI_EXT_CAP_ID_DLF     0x25    /* Data Link Feature */
+#define PCI_EXT_CAP_ID_PL_16GT 0x26    /* Physical Layer 16.0 GT/s */
+#define PCI_EXT_CAP_ID_MAX     PCI_EXT_CAP_ID_PL_16GT
 
 #define PCI_EXT_CAP_DSN_SIZEOF 12
 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
 #define  PCI_L1SS_CTL1_LTR_L12_TH_SCALE        0xe0000000  /* LTR_L1.2_THRESHOLD_Scale */
 #define PCI_L1SS_CTL2          0x0c    /* Control 2 Register */
 
+/* Data Link Feature */
+#define PCI_DLF_CAP            0x04    /* Capabilities Register */
+#define  PCI_DLF_EXCHANGE_ENABLE       0x80000000  /* Data Link Feature Exchange Enable */
+
+/* Physical Layer 16.0 GT/s */
+#define PCI_PL_16GT_LE_CTRL    0x20    /* Lane Equalization Control Register */
+#define  PCI_PL_16GT_LE_CTRL_DSP_TX_PRESET_MASK                0x0000000F
+#define  PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK                0x000000F0
+#define  PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT       4
+
 #endif /* LINUX_PCI_REGS_H */
index 364c350..62b6f69 100644 (file)
@@ -35,6 +35,9 @@
 
 */
 
+#ifndef _UAPI_LINUX_PG_H
+#define _UAPI_LINUX_PG_H
+
 #define PG_MAGIC       'P'
 #define PG_RESET       'Z'
 #define PG_COMMAND     'C'
@@ -61,4 +64,4 @@ struct pg_read_hdr {
 
 };
 
-/* end of pg.h */
+#endif /* _UAPI_LINUX_PG_H */
index f163010..59e89a1 100644 (file)
 #define PTP_ENABLE_FEATURE (1<<0)
 #define PTP_RISING_EDGE    (1<<1)
 #define PTP_FALLING_EDGE   (1<<2)
+
+/*
+ * flag fields valid for the new PTP_EXTTS_REQUEST2 ioctl.
+ */
 #define PTP_EXTTS_VALID_FLAGS  (PTP_ENABLE_FEATURE |   \
                                 PTP_RISING_EDGE |      \
                                 PTP_FALLING_EDGE)
 
+/*
+ * flag fields valid for the original PTP_EXTTS_REQUEST ioctl.
+ * DO NOT ADD NEW FLAGS HERE.
+ */
+#define PTP_EXTTS_V1_VALID_FLAGS       (PTP_ENABLE_FEATURE |   \
+                                        PTP_RISING_EDGE |      \
+                                        PTP_FALLING_EDGE)
+
 /*
  * Bits of the ptp_perout_request.flags field:
  */
 #define PTP_PEROUT_ONE_SHOT (1<<0)
+
+/*
+ * flag fields valid for the new PTP_PEROUT_REQUEST2 ioctl.
+ */
 #define PTP_PEROUT_VALID_FLAGS (PTP_PEROUT_ONE_SHOT)
+
+/*
+ * No flags are valid for the original PTP_PEROUT_REQUEST ioctl
+ */
+#define PTP_PEROUT_V1_VALID_FLAGS      (0)
+
 /*
  * struct ptp_clock_time - represents a time value
  *
index b3105ac..25b4fa0 100644 (file)
 #define CLONE_NEWNET           0x40000000      /* New network namespace */
 #define CLONE_IO               0x80000000      /* Clone io context */
 
-/*
- * Arguments for the clone3 syscall
+#ifndef __ASSEMBLY__
+/**
+ * struct clone_args - arguments for the clone3 syscall
+ * @flags:       Flags for the new process as listed above.
+ *               All flags are valid except for CSIGNAL and
+ *               CLONE_DETACHED.
+ * @pidfd:       If CLONE_PIDFD is set, a pidfd will be
+ *               returned in this argument.
+ * @child_tid:   If CLONE_CHILD_SETTID is set, the TID of the
+ *               child process will be returned in the child's
+ *               memory.
+ * @parent_tid:  If CLONE_PARENT_SETTID is set, the TID of
+ *               the child process will be returned in the
+ *               parent's memory.
+ * @exit_signal: The exit_signal the parent process will be
+ *               sent when the child exits.
+ * @stack:       Specify the location of the stack for the
+ *               child process.
+ *               Note, @stack is expected to point to the
+ *               lowest address. The stack direction will be
+ *               determined by the kernel and set up
+ *               appropriately based on @stack_size.
+ * @stack_size:  The size of the stack for the child process.
+ * @tls:         If CLONE_SETTLS is set, the tls descriptor
+ *               is set to tls.
+ *
+ * The structure is versioned by size and thus extensible.
+ * New struct members must go at the end of the struct and
+ * must be properly 64bit aligned.
  */
 struct clone_args {
        __aligned_u64 flags;
@@ -46,6 +73,9 @@ struct clone_args {
        __aligned_u64 stack_size;
        __aligned_u64 tls;
 };
+#endif
+
+#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
 
 /*
  * Scheduling policies
index 0f4f87a..e7fe550 100644 (file)
 #define PORT_SUNIX     121
 
 /* Freescale Linflex UART */
-#define PORT_LINFLEXUART       121
+#define PORT_LINFLEXUART       122
 
 #endif /* _UAPILINUX_SERIAL_CORE_H */
diff --git a/include/uapi/linux/virtio_fs.h b/include/uapi/linux/virtio_fs.h
new file mode 100644 (file)
index 0000000..b02eb2a
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+
+#ifndef _UAPI_LINUX_VIRTIO_FS_H
+#define _UAPI_LINUX_VIRTIO_FS_H
+
+#include <linux/types.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_types.h>
+
+struct virtio_fs_config {
+       /* Filesystem name (UTF-8, not NUL-terminated, padded with NULs) */
+       __u8 tag[36];
+
+       /* Number of request queues */
+       __u32 num_request_queues;
+} __attribute__((packed));
+
+#endif /* _UAPI_LINUX_VIRTIO_FS_H */
index 348fd01..585e07b 100644 (file)
@@ -44,6 +44,7 @@
 #define VIRTIO_ID_VSOCK        19 /* virtio vsock transport */
 #define VIRTIO_ID_CRYPTO       20 /* virtio crypto */
 #define VIRTIO_ID_IOMMU        23 /* virtio IOMMU */
+#define VIRTIO_ID_FS           26 /* virtio filesystem */
 #define VIRTIO_ID_PMEM         27 /* virtio pmem */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
index 98b30c1..d89969a 100644 (file)
@@ -212,30 +212,7 @@ int xen_xlate_map_ballooned_pages(xen_pfn_t **pfns, void **vaddr,
 
 bool xen_running_on_version_or_later(unsigned int major, unsigned int minor);
 
-efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc);
-efi_status_t xen_efi_set_time(efi_time_t *tm);
-efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
-                                    efi_time_t *tm);
-efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm);
-efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor,
-                                 u32 *attr, unsigned long *data_size,
-                                 void *data);
-efi_status_t xen_efi_get_next_variable(unsigned long *name_size,
-                                      efi_char16_t *name, efi_guid_t *vendor);
-efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor,
-                                 u32 attr, unsigned long data_size,
-                                 void *data);
-efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space,
-                                        u64 *remaining_space,
-                                        u64 *max_variable_size);
-efi_status_t xen_efi_get_next_high_mono_count(u32 *count);
-efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules,
-                                   unsigned long count, unsigned long sg_list);
-efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules,
-                                       unsigned long count, u64 *max_size,
-                                       int *reset_type);
-void xen_efi_reset_system(int reset_type, efi_status_t status,
-                         unsigned long data_size, efi_char16_t *data);
+void xen_efi_runtime_setup(void);
 
 
 #ifdef CONFIG_PREEMPT
index 5712359..b4daad2 100644 (file)
@@ -1963,6 +1963,10 @@ config BASE_SMALL
        default 0 if BASE_FULL
        default 1 if !BASE_FULL
 
+config MODULE_SIG_FORMAT
+       def_bool n
+       select SYSTEM_DATA_VERIFICATION
+
 menuconfig MODULES
        bool "Enable loadable module support"
        option modules
@@ -2047,7 +2051,7 @@ config MODULE_SRCVERSION_ALL
 
 config MODULE_SIG
        bool "Module signature verification"
-       select SYSTEM_DATA_VERIFICATION
+       select MODULE_SIG_FORMAT
        help
          Check modules for valid signatures upon load: the signature
          is simply appended to the module. For more information see
@@ -2057,6 +2061,11 @@ config MODULE_SIG
          kernel build dependency so that the signing tool can use its crypto
          library.
 
+         You should enable this option if you wish to use either
+         CONFIG_SECURITY_LOCKDOWN_LSM or lockdown functionality imposed via
+         another LSM - otherwise unsigned modules will be loadable regardless
+         of the lockdown policy.
+
          !!!WARNING!!!  If you enable this option, you MUST make sure that the
          module DOES NOT get stripped after being signed.  This includes the
          debuginfo strip done by some packagers (such as rpmbuild) and
index 653693d..91f6ebb 100644 (file)
@@ -507,7 +507,7 @@ void __init __weak mem_encrypt_init(void) { }
 
 void __init __weak poking_init(void) { }
 
-void __init __weak pgd_cache_init(void) { }
+void __init __weak pgtable_cache_init(void) { }
 
 bool initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
@@ -556,6 +556,7 @@ static void __init mm_init(void)
        report_meminit();
        mem_init();
        kmem_cache_init();
+       kmemleak_init();
        pgtable_init();
        debug_objects_mem_init();
        vmalloc_init();
@@ -564,7 +565,6 @@ static void __init mm_init(void)
        init_espfix_bsp();
        /* Should be run after espfix64 is set up. */
        pti_init();
-       pgd_cache_init();
 }
 
 void __init __weak arch_call_rest_init(void)
@@ -593,8 +593,8 @@ asmlinkage __visible void __init start_kernel(void)
        boot_cpu_init();
        page_address_init();
        pr_notice("%s", linux_banner);
+       early_security_init();
        setup_arch(&command_line);
-       mm_init_cpumask(&init_mm);
        setup_command_line(command_line);
        setup_nr_cpu_ids();
        setup_per_cpu_areas();
@@ -740,7 +740,6 @@ asmlinkage __visible void __init start_kernel(void)
                initrd_start = 0;
        }
 #endif
-       kmemleak_init();
        setup_per_cpu_pageset();
        numa_policy_init();
        acpi_early_init();
index 7c15729..3d920ff 100644 (file)
@@ -1240,15 +1240,14 @@ static int do_mq_notify(mqd_t mqdes, const struct sigevent *notification)
 
                        /* create the notify skb */
                        nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL);
-                       if (!nc) {
-                               ret = -ENOMEM;
-                               goto out;
-                       }
+                       if (!nc)
+                               return -ENOMEM;
+
                        if (copy_from_user(nc->data,
                                        notification->sigev_value.sival_ptr,
                                        NOTIFY_COOKIE_LEN)) {
                                ret = -EFAULT;
-                               goto out;
+                               goto free_skb;
                        }
 
                        /* TODO: add a header? */
@@ -1264,8 +1263,7 @@ retry:
                        fdput(f);
                        if (IS_ERR(sock)) {
                                ret = PTR_ERR(sock);
-                               sock = NULL;
-                               goto out;
+                               goto free_skb;
                        }
 
                        timeo = MAX_SCHEDULE_TIMEOUT;
@@ -1274,11 +1272,8 @@ retry:
                                sock = NULL;
                                goto retry;
                        }
-                       if (ret) {
-                               sock = NULL;
-                               nc = NULL;
-                               goto out;
-                       }
+                       if (ret)
+                               return ret;
                }
        }
 
@@ -1333,7 +1328,8 @@ out_fput:
 out:
        if (sock)
                netlink_detachskb(sock, nc);
-       else if (nc)
+       else
+free_skb:
                dev_kfree_skb(nc);
 
        return ret;
index 7da4504..ec97a70 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1852,7 +1852,8 @@ static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid)
 {
        struct sem_undo *un;
 
-       list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) {
+       list_for_each_entry_rcu(un, &ulp->list_proc, list_proc,
+                               spin_is_locked(&ulp->lock)) {
                if (un->semid == semid)
                        return un;
        }
index 25f9d83..daad787 100644 (file)
@@ -58,6 +58,7 @@ endif
 obj-$(CONFIG_UID16) += uid16.o
 obj-$(CONFIG_MODULES) += module.o
 obj-$(CONFIG_MODULE_SIG) += module_signing.o
+obj-$(CONFIG_MODULE_SIG_FORMAT) += module_signature.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_CRASH_CORE) += crash_core.o
index adb3adc..29c7c06 100644 (file)
@@ -2332,7 +2332,7 @@ static int btf_enum_check_kflag_member(struct btf_verifier_env *env,
                if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
                        btf_verifier_log_member(env, struct_type, member,
                                                "Member is not byte aligned");
-                               return -EINVAL;
+                       return -EINVAL;
                }
 
                nr_bits = int_bitsize;
@@ -2377,9 +2377,8 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
                return -EINVAL;
        }
 
-       if (t->size != sizeof(int)) {
-               btf_verifier_log_type(env, t, "Expected size:%zu",
-                                     sizeof(int));
+       if (t->size > 8 || !is_power_of_2(t->size)) {
+               btf_verifier_log_type(env, t, "Unexpected size");
                return -EINVAL;
        }
 
index 66088a9..ef0e1e3 100644 (file)
@@ -502,7 +502,7 @@ int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
        return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
 }
 
-void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
+static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
 {
        int i;
 
index d27f3b6..3867864 100644 (file)
@@ -128,7 +128,7 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
 
                if (!dtab->n_buckets) /* Overflow check */
                        return -EINVAL;
-               cost += sizeof(struct hlist_head) * dtab->n_buckets;
+               cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets;
        }
 
        /* if map size is larger than memlock limit, reject it */
@@ -719,6 +719,32 @@ const struct bpf_map_ops dev_map_hash_ops = {
        .map_check_btf = map_check_no_btf,
 };
 
+static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab,
+                                      struct net_device *netdev)
+{
+       unsigned long flags;
+       u32 i;
+
+       spin_lock_irqsave(&dtab->index_lock, flags);
+       for (i = 0; i < dtab->n_buckets; i++) {
+               struct bpf_dtab_netdev *dev;
+               struct hlist_head *head;
+               struct hlist_node *next;
+
+               head = dev_map_index_hash(dtab, i);
+
+               hlist_for_each_entry_safe(dev, next, head, index_hlist) {
+                       if (netdev != dev->dev)
+                               continue;
+
+                       dtab->items--;
+                       hlist_del_rcu(&dev->index_hlist);
+                       call_rcu(&dev->rcu, __dev_map_entry_free);
+               }
+       }
+       spin_unlock_irqrestore(&dtab->index_lock, flags);
+}
+
 static int dev_map_notification(struct notifier_block *notifier,
                                ulong event, void *ptr)
 {
@@ -735,6 +761,11 @@ static int dev_map_notification(struct notifier_block *notifier,
                 */
                rcu_read_lock();
                list_for_each_entry_rcu(dtab, &dev_map_list, list) {
+                       if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+                               dev_map_hash_remove_netdev(dtab, netdev);
+                               continue;
+                       }
+
                        for (i = 0; i < dtab->map.max_entries; i++) {
                                struct bpf_dtab_netdev *dev, *odev;
 
index cc0d0cf..a70f720 100644 (file)
@@ -14,8 +14,9 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
 #include <linux/kdev_t.h>
-#include <linux/parser.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
@@ -583,58 +584,52 @@ static const struct super_operations bpf_super_ops = {
 
 enum {
        OPT_MODE,
-       OPT_ERR,
 };
 
-static const match_table_t bpf_mount_tokens = {
-       { OPT_MODE, "mode=%o" },
-       { OPT_ERR, NULL },
+static const struct fs_parameter_spec bpf_param_specs[] = {
+       fsparam_u32oct  ("mode",                        OPT_MODE),
+       {}
+};
+
+static const struct fs_parameter_description bpf_fs_parameters = {
+       .name           = "bpf",
+       .specs          = bpf_param_specs,
 };
 
 struct bpf_mount_opts {
        umode_t mode;
 };
 
-static int bpf_parse_options(char *data, struct bpf_mount_opts *opts)
+static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
-       substring_t args[MAX_OPT_ARGS];
-       int option, token;
-       char *ptr;
+       struct bpf_mount_opts *opts = fc->fs_private;
+       struct fs_parse_result result;
+       int opt;
 
-       opts->mode = S_IRWXUGO;
-
-       while ((ptr = strsep(&data, ",")) != NULL) {
-               if (!*ptr)
-                       continue;
-
-               token = match_token(ptr, bpf_mount_tokens, args);
-               switch (token) {
-               case OPT_MODE:
-                       if (match_octal(&args[0], &option))
-                               return -EINVAL;
-                       opts->mode = option & S_IALLUGO;
-                       break;
+       opt = fs_parse(fc, &bpf_fs_parameters, param, &result);
+       if (opt < 0)
                /* We might like to report bad mount options here, but
                 * traditionally we've ignored all mount options, so we'd
                 * better continue to ignore non-existing options for bpf.
                 */
-               }
+               return opt == -ENOPARAM ? 0 : opt;
+
+       switch (opt) {
+       case OPT_MODE:
+               opts->mode = result.uint_32 & S_IALLUGO;
+               break;
        }
 
        return 0;
 }
 
-static int bpf_fill_super(struct super_block *sb, void *data, int silent)
+static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
 {
        static const struct tree_descr bpf_rfiles[] = { { "" } };
-       struct bpf_mount_opts opts;
+       struct bpf_mount_opts *opts = fc->fs_private;
        struct inode *inode;
        int ret;
 
-       ret = bpf_parse_options(data, &opts);
-       if (ret)
-               return ret;
-
        ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
        if (ret)
                return ret;
@@ -644,21 +639,50 @@ static int bpf_fill_super(struct super_block *sb, void *data, int silent)
        inode = sb->s_root->d_inode;
        inode->i_op = &bpf_dir_iops;
        inode->i_mode &= ~S_IALLUGO;
-       inode->i_mode |= S_ISVTX | opts.mode;
+       inode->i_mode |= S_ISVTX | opts->mode;
 
        return 0;
 }
 
-static struct dentry *bpf_mount(struct file_system_type *type, int flags,
-                               const char *dev_name, void *data)
+static int bpf_get_tree(struct fs_context *fc)
+{
+       return get_tree_nodev(fc, bpf_fill_super);
+}
+
+static void bpf_free_fc(struct fs_context *fc)
 {
-       return mount_nodev(type, flags, data, bpf_fill_super);
+       kfree(fc->fs_private);
+}
+
+static const struct fs_context_operations bpf_context_ops = {
+       .free           = bpf_free_fc,
+       .parse_param    = bpf_parse_param,
+       .get_tree       = bpf_get_tree,
+};
+
+/*
+ * Set up the filesystem mount context.
+ */
+static int bpf_init_fs_context(struct fs_context *fc)
+{
+       struct bpf_mount_opts *opts;
+
+       opts = kzalloc(sizeof(struct bpf_mount_opts), GFP_KERNEL);
+       if (!opts)
+               return -ENOMEM;
+
+       opts->mode = S_IRWXUGO;
+
+       fc->fs_private = opts;
+       fc->ops = &bpf_context_ops;
+       return 0;
 }
 
 static struct file_system_type bpf_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "bpf",
-       .mount          = bpf_mount,
+       .init_fs_context = bpf_init_fs_context,
+       .parameters     = &bpf_fs_parameters,
        .kill_sb        = kill_litter_super,
 };
 
index 82eabd4..0937719 100644 (file)
@@ -1326,24 +1326,32 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 {
        struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 
+       kvfree(aux->func_info);
        free_used_maps(aux);
        bpf_prog_uncharge_memlock(aux->prog);
        security_bpf_prog_free(aux);
        bpf_prog_free(aux->prog);
 }
 
+static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
+{
+       bpf_prog_kallsyms_del_all(prog);
+       btf_put(prog->aux->btf);
+       bpf_prog_free_linfo(prog);
+
+       if (deferred)
+               call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+       else
+               __bpf_prog_put_rcu(&prog->aux->rcu);
+}
+
 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
 {
        if (atomic_dec_and_test(&prog->aux->refcnt)) {
                perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
                /* bpf_prog_free_id() must be called first */
                bpf_prog_free_id(prog, do_idr_lock);
-               bpf_prog_kallsyms_del_all(prog);
-               btf_put(prog->aux->btf);
-               kvfree(prog->aux->func_info);
-               bpf_prog_free_linfo(prog);
-
-               call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+               __bpf_prog_put_noref(prog, true);
        }
 }
 
@@ -1741,11 +1749,12 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
        return err;
 
 free_used_maps:
-       bpf_prog_free_linfo(prog);
-       kvfree(prog->aux->func_info);
-       btf_put(prog->aux->btf);
-       bpf_prog_kallsyms_del_subprogs(prog);
-       free_used_maps(prog->aux);
+       /* In case we have subprogs, we need to wait for a grace
+        * period before we can tear down JIT memory since symbols
+        * are already exposed under kallsyms.
+        */
+       __bpf_prog_put_noref(prog, prog->aux->func_cnt);
+       return err;
 free_prog:
        bpf_prog_uncharge_memlock(prog);
 free_prog_sec:
index 942c662..82a1ffe 100644 (file)
@@ -37,7 +37,7 @@ static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
 
        node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN);
        if (!node)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        err = xsk_map_inc(map);
        if (err) {
index c52bc91..c87ee64 100644 (file)
@@ -798,7 +798,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
                    cpumask_subset(cp->cpus_allowed, top_cpuset.effective_cpus))
                        continue;
 
-               if (is_sched_load_balance(cp))
+               if (is_sched_load_balance(cp) &&
+                   !cpumask_empty(cp->effective_cpus))
                        csa[csn++] = cp;
 
                /* skip @cp's subtree if not a partition root */
index e1967e9..fc28e17 100644 (file)
@@ -392,8 +392,7 @@ enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 
 void __init cpu_smt_disable(bool force)
 {
-       if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
-               cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+       if (!cpu_smt_possible())
                return;
 
        if (force) {
@@ -438,6 +437,14 @@ static inline bool cpu_smt_allowed(unsigned int cpu)
         */
        return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
 }
+
+/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
+bool cpu_smt_possible(void)
+{
+       return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
+               cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
+}
+EXPORT_SYMBOL_GPL(cpu_smt_possible);
 #else
 static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
 #endif
index 10f1187..f76d6f7 100644 (file)
@@ -893,30 +893,25 @@ static struct sysrq_key_op sysrq_dbg_op = {
 };
 #endif
 
-static int kgdb_panic_event(struct notifier_block *self,
-                           unsigned long val,
-                           void *data)
+void kgdb_panic(const char *msg)
 {
+       if (!kgdb_io_module_registered)
+               return;
+
        /*
-        * Avoid entering the debugger if we were triggered due to a panic
-        * We don't want to get stuck waiting for input from user in such case.
-        * panic_timeout indicates the system should automatically
+        * We don't want to get stuck waiting for input from user if
+        * "panic_timeout" indicates the system should automatically
         * reboot on panic.
         */
        if (panic_timeout)
-               return NOTIFY_DONE;
+               return;
 
        if (dbg_kdb_mode)
-               kdb_printf("PANIC: %s\n", (char *)data);
+               kdb_printf("PANIC: %s\n", msg);
+
        kgdb_breakpoint();
-       return NOTIFY_DONE;
 }
 
-static struct notifier_block kgdb_panic_event_nb = {
-       .notifier_call  = kgdb_panic_event,
-       .priority       = INT_MAX,
-};
-
 void __weak kgdb_arch_late(void)
 {
 }
@@ -965,8 +960,6 @@ static void kgdb_register_callbacks(void)
                        kgdb_arch_late();
                register_module_notifier(&dbg_module_load_nb);
                register_reboot_notifier(&dbg_reboot_notifier);
-               atomic_notifier_chain_register(&panic_notifier_list,
-                                              &kgdb_panic_event_nb);
 #ifdef CONFIG_MAGIC_SYSRQ
                register_sysrq_key('g', &sysrq_dbg_op);
 #endif
@@ -980,16 +973,14 @@ static void kgdb_register_callbacks(void)
 static void kgdb_unregister_callbacks(void)
 {
        /*
-        * When this routine is called KGDB should unregister from the
-        * panic handler and clean up, making sure it is not handling any
+        * When this routine is called KGDB should unregister from
+        * handlers and clean up, making sure it is not handling any
         * break exceptions at the time.
         */
        if (kgdb_io_module_registered) {
                kgdb_io_module_registered = 0;
                unregister_reboot_notifier(&dbg_reboot_notifier);
                unregister_module_notifier(&dbg_module_load_nb);
-               atomic_notifier_chain_unregister(&panic_notifier_list,
-                                              &kgdb_panic_event_nb);
                kgdb_arch_exit();
 #ifdef CONFIG_MAGIC_SYSRQ
                unregister_sysrq_key('g', &sysrq_dbg_op);
index ca4e5d4..c00b925 100644 (file)
@@ -87,9 +87,9 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
  */
 void dma_common_free_remap(void *cpu_addr, size_t size)
 {
-       struct page **pages = dma_common_find_pages(cpu_addr);
+       struct vm_struct *area = find_vm_area(cpu_addr);
 
-       if (!pages) {
+       if (!area || area->flags != VM_DMA_COHERENT) {
                WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
                return;
        }
index fc482c8..57fb4dc 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/binfmts.h>
+#include <linux/elfcore.h>
 
 Elf_Half __weak elf_core_extra_phdrs(void)
 {
index 4f08b17..aec8dba 100644 (file)
@@ -2239,7 +2239,7 @@ static void __perf_event_disable(struct perf_event *event,
  *
  * If event->ctx is a cloned context, callers must make sure that
  * every task struct that event->ctx->task could possibly point to
- * remains valid.  This condition is satisifed when called through
+ * remains valid.  This condition is satisfied when called through
  * perf_event_for_each_child or perf_event_for_each because they
  * hold the top-level event's child_mutex, so any descendant that
  * goes to exit will block in perf_event_exit_event().
@@ -3779,11 +3779,23 @@ static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event)
        perf_event_groups_insert(&ctx->flexible_groups, event);
 }
 
+/* pick an event from the flexible_groups to rotate */
 static inline struct perf_event *
-ctx_first_active(struct perf_event_context *ctx)
+ctx_event_to_rotate(struct perf_event_context *ctx)
 {
-       return list_first_entry_or_null(&ctx->flexible_active,
-                                       struct perf_event, active_list);
+       struct perf_event *event;
+
+       /* pick the first active flexible event */
+       event = list_first_entry_or_null(&ctx->flexible_active,
+                                        struct perf_event, active_list);
+
+       /* if no active flexible event, pick the first event */
+       if (!event) {
+               event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree),
+                                     typeof(*event), group_node);
+       }
+
+       return event;
 }
 
 static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
@@ -3808,9 +3820,9 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
        perf_pmu_disable(cpuctx->ctx.pmu);
 
        if (task_rotate)
-               task_event = ctx_first_active(task_ctx);
+               task_event = ctx_event_to_rotate(task_ctx);
        if (cpu_rotate)
-               cpu_event = ctx_first_active(&cpuctx->ctx);
+               cpu_event = ctx_event_to_rotate(&cpuctx->ctx);
 
        /*
         * As per the order given at ctx_resched() first 'pop' task flexible
@@ -5595,8 +5607,10 @@ static void perf_mmap_close(struct vm_area_struct *vma)
                perf_pmu_output_stop(event);
 
                /* now it's safe to free the pages */
-               atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
-               atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
+               if (!rb->aux_mmap_locked)
+                       atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
+               else
+                       atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
 
                /* this has to be the last one */
                rb_free_aux(rb);
@@ -5668,7 +5682,8 @@ again:
         * undo the VM accounting.
         */
 
-       atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
+       atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
+                       &mmap_user->locked_vm);
        atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
        free_uid(mmap_user);
 
@@ -5812,8 +5827,20 @@ accounting:
 
        user_locked = atomic_long_read(&user->locked_vm) + user_extra;
 
-       if (user_locked > user_lock_limit)
+       if (user_locked <= user_lock_limit) {
+               /* charge all to locked_vm */
+       } else if (atomic_long_read(&user->locked_vm) >= user_lock_limit) {
+               /* charge all to pinned_vm */
+               extra = user_extra;
+               user_extra = 0;
+       } else {
+               /*
+                * charge locked_vm until it hits user_lock_limit;
+                * charge the rest from pinned_vm
+                */
                extra = user_locked - user_lock_limit;
+               user_extra -= extra;
+       }
 
        lock_limit = rlimit(RLIMIT_MEMLOCK);
        lock_limit >>= PAGE_SHIFT;
@@ -6054,7 +6081,7 @@ static void perf_sample_regs_intr(struct perf_regs *regs_intr,
  * Get remaining task size from user stack pointer.
  *
  * It'd be better to take stack vma map and limit this more
- * precisly, but there's no way to get it safely under interrupt,
+ * precisely, but there's no way to get it safely under interrupt,
  * so using TASK_SIZE as limit.
  */
 static u64 perf_ustack_task_size(struct pt_regs *regs)
@@ -6616,7 +6643,7 @@ void perf_prepare_sample(struct perf_event_header *header,
 
        if (sample_type & PERF_SAMPLE_STACK_USER) {
                /*
-                * Either we need PERF_SAMPLE_STACK_USER bit to be allways
+                * Either we need PERF_SAMPLE_STACK_USER bit to be always
                 * processed as the last one or have additional check added
                 * in case new sample type is added, because we could eat
                 * up the rest of the sample size.
@@ -6922,7 +6949,7 @@ static void __perf_event_output_stop(struct perf_event *event, void *data)
 static int __perf_pmu_output_stop(void *info)
 {
        struct perf_event *event = info;
-       struct pmu *pmu = event->pmu;
+       struct pmu *pmu = event->ctx->pmu;
        struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
        struct remote_output ro = {
                .rb     = event->rb,
@@ -10586,58 +10613,29 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
        u32 size;
        int ret;
 
-       if (!access_ok(uattr, PERF_ATTR_SIZE_VER0))
-               return -EFAULT;
-
-       /*
-        * zero the full structure, so that a short copy will be nice.
-        */
+       /* Zero the full structure, so that a short copy will be nice. */
        memset(attr, 0, sizeof(*attr));
 
        ret = get_user(size, &uattr->size);
        if (ret)
                return ret;
 
-       if (size > PAGE_SIZE)   /* silly large */
-               goto err_size;
-
-       if (!size)              /* abi compat */
+       /* ABI compatibility quirk: */
+       if (!size)
                size = PERF_ATTR_SIZE_VER0;
-
-       if (size < PERF_ATTR_SIZE_VER0)
+       if (size < PERF_ATTR_SIZE_VER0 || size > PAGE_SIZE)
                goto err_size;
 
-       /*
-        * If we're handed a bigger struct than we know of,
-        * ensure all the unknown bits are 0 - i.e. new
-        * user-space does not rely on any kernel feature
-        * extensions we dont know about yet.
-        */
-       if (size > sizeof(*attr)) {
-               unsigned char __user *addr;
-               unsigned char __user *end;
-               unsigned char val;
-
-               addr = (void __user *)uattr + sizeof(*attr);
-               end  = (void __user *)uattr + size;
-
-               for (; addr < end; addr++) {
-                       ret = get_user(val, addr);
-                       if (ret)
-                               return ret;
-                       if (val)
-                               goto err_size;
-               }
-               size = sizeof(*attr);
+       ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
+       if (ret) {
+               if (ret == -E2BIG)
+                       goto err_size;
+               return ret;
        }
 
-       ret = copy_from_user(attr, uattr, size);
-       if (ret)
-               return -EFAULT;
-
        attr->size = size;
 
-       if (attr->__reserved_1)
+       if (attr->__reserved_1 || attr->__reserved_2)
                return -EINVAL;
 
        if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
@@ -10917,6 +10915,13 @@ SYSCALL_DEFINE5(perf_event_open,
            perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
                return -EACCES;
 
+       err = security_locked_down(LOCKDOWN_PERF);
+       if (err && (attr.sample_type & PERF_SAMPLE_REGS_INTR))
+               /* REGS_INTR can leak data, lockdown must prevent this */
+               return err;
+
+       err = 0;
+
        /*
         * In cgroup mode, the pid argument is used to pass the fd
         * opened to the cgroup directory in cgroupfs. The cpu argument
@@ -11884,6 +11889,10 @@ static int inherit_group(struct perf_event *parent_event,
                                            child, leader, child_ctx);
                if (IS_ERR(child_ctr))
                        return PTR_ERR(child_ctr);
+
+               if (sub->aux_event == parent_event &&
+                   !perf_get_aux_event(child_ctr, leader))
+                       return -EINVAL;
        }
        return 0;
 }
index 84fa004..c747610 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/task_work.h>
 #include <linux/shmem_fs.h>
+#include <linux/khugepaged.h>
 
 #include <linux/uprobes.h>
 
@@ -143,17 +144,19 @@ static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
  *
  * @vma:      vma that holds the pte pointing to page
  * @addr:     address the old @page is mapped at
- * @page:     the cowed page we are replacing by kpage
- * @kpage:    the modified page we replace page by
+ * @old_page: the page we are replacing by new_page
+ * @new_page: the modified page we replace page by
  *
- * Returns 0 on success, -EFAULT on failure.
+ * If @new_page is NULL, only unmap @old_page.
+ *
+ * Returns 0 on success, negative error code otherwise.
  */
 static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
                                struct page *old_page, struct page *new_page)
 {
        struct mm_struct *mm = vma->vm_mm;
        struct page_vma_mapped_walk pvmw = {
-               .page = old_page,
+               .page = compound_head(old_page),
                .vma = vma,
                .address = addr,
        };
@@ -164,12 +167,12 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
                                addr + PAGE_SIZE);
 
-       VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
-
-       err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
-                       false);
-       if (err)
-               return err;
+       if (new_page) {
+               err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
+                                           &memcg, false);
+               if (err)
+                       return err;
+       }
 
        /* For try_to_free_swap() and munlock_vma_page() below */
        lock_page(old_page);
@@ -177,15 +180,20 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
        mmu_notifier_invalidate_range_start(&range);
        err = -EAGAIN;
        if (!page_vma_mapped_walk(&pvmw)) {
-               mem_cgroup_cancel_charge(new_page, memcg, false);
+               if (new_page)
+                       mem_cgroup_cancel_charge(new_page, memcg, false);
                goto unlock;
        }
        VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
 
-       get_page(new_page);
-       page_add_new_anon_rmap(new_page, vma, addr, false);
-       mem_cgroup_commit_charge(new_page, memcg, false, false);
-       lru_cache_add_active_or_unevictable(new_page, vma);
+       if (new_page) {
+               get_page(new_page);
+               page_add_new_anon_rmap(new_page, vma, addr, false);
+               mem_cgroup_commit_charge(new_page, memcg, false, false);
+               lru_cache_add_active_or_unevictable(new_page, vma);
+       } else
+               /* no new page, just dec_mm_counter for old_page */
+               dec_mm_counter(mm, MM_ANONPAGES);
 
        if (!PageAnon(old_page)) {
                dec_mm_counter(mm, mm_counter_file(old_page));
@@ -194,8 +202,9 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
        flush_cache_page(vma, addr, pte_pfn(*pvmw.pte));
        ptep_clear_flush_notify(vma, addr, pvmw.pte);
-       set_pte_at_notify(mm, addr, pvmw.pte,
-                       mk_pte(new_page, vma->vm_page_prot));
+       if (new_page)
+               set_pte_at_notify(mm, addr, pvmw.pte,
+                                 mk_pte(new_page, vma->vm_page_prot));
 
        page_remove_rmap(old_page, false);
        if (!page_mapped(old_page))
@@ -464,14 +473,18 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
        struct page *old_page, *new_page;
        struct vm_area_struct *vma;
        int ret, is_register, ref_ctr_updated = 0;
+       bool orig_page_huge = false;
+       unsigned int gup_flags = FOLL_FORCE;
 
        is_register = is_swbp_insn(&opcode);
        uprobe = container_of(auprobe, struct uprobe, arch);
 
 retry:
+       if (is_register)
+               gup_flags |= FOLL_SPLIT_PMD;
        /* Read the page with vaddr into memory */
-       ret = get_user_pages_remote(NULL, mm, vaddr, 1,
-                       FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL);
+       ret = get_user_pages_remote(NULL, mm, vaddr, 1, gup_flags,
+                                   &old_page, &vma, NULL);
        if (ret <= 0)
                return ret;
 
@@ -479,6 +492,12 @@ retry:
        if (ret <= 0)
                goto put_old;
 
+       if (WARN(!is_register && PageCompound(old_page),
+                "uprobe unregister should never work on compound page\n")) {
+               ret = -EINVAL;
+               goto put_old;
+       }
+
        /* We are going to replace instruction, update ref_ctr. */
        if (!ref_ctr_updated && uprobe->ref_ctr_offset) {
                ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1);
@@ -488,6 +507,10 @@ retry:
                ref_ctr_updated = 1;
        }
 
+       ret = 0;
+       if (!is_register && !PageAnon(old_page))
+               goto put_old;
+
        ret = anon_vma_prepare(vma);
        if (ret)
                goto put_old;
@@ -501,8 +524,33 @@ retry:
        copy_highpage(new_page, old_page);
        copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
+       if (!is_register) {
+               struct page *orig_page;
+               pgoff_t index;
+
+               VM_BUG_ON_PAGE(!PageAnon(old_page), old_page);
+
+               index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
+               orig_page = find_get_page(vma->vm_file->f_inode->i_mapping,
+                                         index);
+
+               if (orig_page) {
+                       if (PageUptodate(orig_page) &&
+                           pages_identical(new_page, orig_page)) {
+                               /* let go new_page */
+                               put_page(new_page);
+                               new_page = NULL;
+
+                               if (PageCompound(orig_page))
+                                       orig_page_huge = true;
+                       }
+                       put_page(orig_page);
+               }
+       }
+
        ret = __replace_page(vma, vaddr, old_page, new_page);
-       put_page(new_page);
+       if (new_page)
+               put_page(new_page);
 put_old:
        put_page(old_page);
 
@@ -513,6 +561,10 @@ put_old:
        if (ret && is_register && ref_ctr_updated)
                update_ref_ctr(uprobe, mm, -1);
 
+       /* try collapse pmd for compound page */
+       if (!ret && orig_page_huge)
+               collapse_pte_mapped_thp(mm, vaddr);
+
        return ret;
 }
 
index 22ab6a4..a46a50d 100644 (file)
@@ -182,6 +182,11 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
        put_task_struct(tsk);
 }
 
+void put_task_struct_rcu_user(struct task_struct *task)
+{
+       if (refcount_dec_and_test(&task->rcu_users))
+               call_rcu(&task->rcu, delayed_put_task_struct);
+}
 
 void release_task(struct task_struct *p)
 {
@@ -222,76 +227,13 @@ repeat:
 
        write_unlock_irq(&tasklist_lock);
        release_thread(p);
-       call_rcu(&p->rcu, delayed_put_task_struct);
+       put_task_struct_rcu_user(p);
 
        p = leader;
        if (unlikely(zap_leader))
                goto repeat;
 }
 
-/*
- * Note that if this function returns a valid task_struct pointer (!NULL)
- * task->usage must remain >0 for the duration of the RCU critical section.
- */
-struct task_struct *task_rcu_dereference(struct task_struct **ptask)
-{
-       struct sighand_struct *sighand;
-       struct task_struct *task;
-
-       /*
-        * We need to verify that release_task() was not called and thus
-        * delayed_put_task_struct() can't run and drop the last reference
-        * before rcu_read_unlock(). We check task->sighand != NULL,
-        * but we can read the already freed and reused memory.
-        */
-retry:
-       task = rcu_dereference(*ptask);
-       if (!task)
-               return NULL;
-
-       probe_kernel_address(&task->sighand, sighand);
-
-       /*
-        * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
-        * was already freed we can not miss the preceding update of this
-        * pointer.
-        */
-       smp_rmb();
-       if (unlikely(task != READ_ONCE(*ptask)))
-               goto retry;
-
-       /*
-        * We've re-checked that "task == *ptask", now we have two different
-        * cases:
-        *
-        * 1. This is actually the same task/task_struct. In this case
-        *    sighand != NULL tells us it is still alive.
-        *
-        * 2. This is another task which got the same memory for task_struct.
-        *    We can't know this of course, and we can not trust
-        *    sighand != NULL.
-        *
-        *    In this case we actually return a random value, but this is
-        *    correct.
-        *
-        *    If we return NULL - we can pretend that we actually noticed that
-        *    *ptask was updated when the previous task has exited. Or pretend
-        *    that probe_slab_address(&sighand) reads NULL.
-        *
-        *    If we return the new task (because sighand is not NULL for any
-        *    reason) - this is fine too. This (new) task can't go away before
-        *    another gp pass.
-        *
-        *    And note: We could even eliminate the false positive if re-read
-        *    task->sighand once again to avoid the falsely NULL. But this case
-        *    is very unlikely so we don't care.
-        */
-       if (!sighand)
-               return NULL;
-
-       return task;
-}
-
 void rcuwait_wake_up(struct rcuwait *w)
 {
        struct task_struct *task;
@@ -311,10 +253,6 @@ void rcuwait_wake_up(struct rcuwait *w)
         */
        smp_mb(); /* (B) */
 
-       /*
-        * Avoid using task_rcu_dereference() magic as long as we are careful,
-        * see comment in rcuwait_wait_event() regarding ->exit_state.
-        */
        task = rcu_dereference(w->task);
        if (task)
                wake_up_process(task);
index 5a0fd51..55af693 100644 (file)
@@ -125,6 +125,15 @@ int nr_threads;                    /* The idle threads do not count.. */
 
 static int max_threads;                /* tunable limit on nr_threads */
 
+#define NAMED_ARRAY_INDEX(x)   [x] = __stringify(x)
+
+static const char * const resident_page_types[] = {
+       NAMED_ARRAY_INDEX(MM_FILEPAGES),
+       NAMED_ARRAY_INDEX(MM_ANONPAGES),
+       NAMED_ARRAY_INDEX(MM_SWAPENTS),
+       NAMED_ARRAY_INDEX(MM_SHMEMPAGES),
+};
+
 DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
@@ -645,12 +654,15 @@ static void check_mm(struct mm_struct *mm)
 {
        int i;
 
+       BUILD_BUG_ON_MSG(ARRAY_SIZE(resident_page_types) != NR_MM_COUNTERS,
+                        "Please make sure 'struct resident_page_types[]' is updated as well");
+
        for (i = 0; i < NR_MM_COUNTERS; i++) {
                long x = atomic_long_read(&mm->rss_stat.count[i]);
 
                if (unlikely(x))
-                       printk(KERN_ALERT "BUG: Bad rss-counter state "
-                                         "mm:%p idx:%d val:%ld\n", mm, i, x);
+                       pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n",
+                                mm, resident_page_types[i], x);
        }
 
        if (mm_pgtables_bytes(mm))
@@ -903,10 +915,12 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
                tsk->cpus_ptr = &tsk->cpus_mask;
 
        /*
-        * One for us, one for whoever does the "release_task()" (usually
-        * parent)
+        * One for the user space visible state that goes away when reaped.
+        * One for the scheduler.
         */
-       refcount_set(&tsk->usage, 2);
+       refcount_set(&tsk->rcu_users, 2);
+       /* One for the rcu users */
+       refcount_set(&tsk->usage, 1);
 #ifdef CONFIG_BLK_DEV_IO_TRACE
        tsk->btrace_seq = 0;
 #endif
@@ -2511,39 +2525,19 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
 #ifdef __ARCH_WANT_SYS_CLONE3
 noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
                                              struct clone_args __user *uargs,
-                                             size_t size)
+                                             size_t usize)
 {
+       int err;
        struct clone_args args;
 
-       if (unlikely(size > PAGE_SIZE))
+       if (unlikely(usize > PAGE_SIZE))
                return -E2BIG;
-
-       if (unlikely(size < sizeof(struct clone_args)))
+       if (unlikely(usize < CLONE_ARGS_SIZE_VER0))
                return -EINVAL;
 
-       if (unlikely(!access_ok(uargs, size)))
-               return -EFAULT;
-
-       if (size > sizeof(struct clone_args)) {
-               unsigned char __user *addr;
-               unsigned char __user *end;
-               unsigned char val;
-
-               addr = (void __user *)uargs + sizeof(struct clone_args);
-               end = (void __user *)uargs + size;
-
-               for (; addr < end; addr++) {
-                       if (get_user(val, addr))
-                               return -EFAULT;
-                       if (val)
-                               return -E2BIG;
-               }
-
-               size = sizeof(struct clone_args);
-       }
-
-       if (copy_from_user(&args, uargs, size))
-               return -EFAULT;
+       err = copy_struct_from_user(&args, sizeof(args), uargs, usize);
+       if (err)
+               return err;
 
        /*
         * Verify that higher 32bits of exit_signal are unset and that
@@ -2567,7 +2561,35 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
        return 0;
 }
 
-static bool clone3_args_valid(const struct kernel_clone_args *kargs)
+/**
+ * clone3_stack_valid - check and prepare stack
+ * @kargs: kernel clone args
+ *
+ * Verify that the stack arguments userspace gave us are sane.
+ * In addition, set the stack direction for userspace since it's easy for us to
+ * determine.
+ */
+static inline bool clone3_stack_valid(struct kernel_clone_args *kargs)
+{
+       if (kargs->stack == 0) {
+               if (kargs->stack_size > 0)
+                       return false;
+       } else {
+               if (kargs->stack_size == 0)
+                       return false;
+
+               if (!access_ok((void __user *)kargs->stack, kargs->stack_size))
+                       return false;
+
+#if !defined(CONFIG_STACK_GROWSUP) && !defined(CONFIG_IA64)
+               kargs->stack += kargs->stack_size;
+#endif
+       }
+
+       return true;
+}
+
+static bool clone3_args_valid(struct kernel_clone_args *kargs)
 {
        /*
         * All lower bits of the flag word are taken.
@@ -2587,9 +2609,23 @@ static bool clone3_args_valid(const struct kernel_clone_args *kargs)
            kargs->exit_signal)
                return false;
 
+       if (!clone3_stack_valid(kargs))
+               return false;
+
        return true;
 }
 
+/**
+ * clone3 - create a new process with specific properties
+ * @uargs: argument structure
+ * @size:  size of @uargs
+ *
+ * clone3() is the extensible successor to clone()/clone2().
+ * It takes a struct as argument that is versioned by its size.
+ *
+ * Return: On success, a positive PID for the child process.
+ *         On error, a negative errno number.
+ */
 SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size)
 {
        int err;
@@ -2920,7 +2956,7 @@ int sysctl_max_threads(struct ctl_table *table, int write,
        struct ctl_table t;
        int ret;
        int threads = max_threads;
-       int min = MIN_THREADS;
+       int min = 1;
        int max = MAX_THREADS;
 
        t = *table;
@@ -2932,7 +2968,7 @@ int sysctl_max_threads(struct ctl_table *table, int write,
        if (ret || !write)
                return ret;
 
-       set_max_threads(threads);
+       max_threads = threads;
 
        return 0;
 }
index c073842..dc520f0 100644 (file)
@@ -22,12 +22,6 @@ EXPORT_SYMBOL(system_freezing_cnt);
 bool pm_freezing;
 bool pm_nosig_freezing;
 
-/*
- * Temporary export for the deadlock workaround in ata_scsi_hotplug().
- * Remove once the hack becomes unnecessary.
- */
-EXPORT_SYMBOL_GPL(pm_freezing);
-
 /* protects freezing and frozen transitions */
 static DEFINE_SPINLOCK(freezer_lock);
 
index 9ff4498..5a0fc0b 100755 (executable)
@@ -71,7 +71,13 @@ done | cpio --quiet -pd $cpio_dir >/dev/null 2>&1
 find $cpio_dir -type f -print0 |
        xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;'
 
-tar -Jcf $tarfile -C $cpio_dir/ . > /dev/null
+# Create archive and try to normalize metadata for reproducibility.
+# For compatibility with older versions of tar, files are fed to tar
+# pre-sorted, as --sort=name might not be available.
+find $cpio_dir -printf "./%P\n" | LC_ALL=C sort | \
+    tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
+    --owner=0 --group=0 --numeric-owner --no-recursion \
+    -Jcf $tarfile -C $cpio_dir/ -T - > /dev/null
 
 echo "$src_files_md5" >  kernel/kheaders.md5
 echo "$obj_files_md5" >> kernel/kheaders.md5
index 1b018f1..bc933c0 100644 (file)
@@ -205,6 +205,14 @@ static inline int kexec_load_check(unsigned long nr_segments,
        if (result < 0)
                return result;
 
+       /*
+        * kexec can be used to circumvent module loading restrictions, so
+        * prevent loading in that case
+        */
+       result = security_locked_down(LOCKDOWN_KEXEC);
+       if (result)
+               return result;
+
        /*
         * Verify we have a legal set of flags
         * This leaves us room for future extensions.
index d587072..15d70a9 100644 (file)
@@ -300,6 +300,8 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
 {
        struct page *pages;
 
+       if (fatal_signal_pending(current))
+               return NULL;
        pages = alloc_pages(gfp_mask & ~__GFP_ZERO, order);
        if (pages) {
                unsigned int count, i;
index b8cc032..79f252a 100644 (file)
@@ -88,7 +88,7 @@ int __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
        return kexec_image_post_load_cleanup_default(image);
 }
 
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
 static int kexec_image_verify_sig_default(struct kimage *image, void *buf,
                                          unsigned long buf_len)
 {
@@ -177,6 +177,59 @@ void kimage_file_post_load_cleanup(struct kimage *image)
        image->image_loader_data = NULL;
 }
 
+#ifdef CONFIG_KEXEC_SIG
+static int
+kimage_validate_signature(struct kimage *image)
+{
+       const char *reason;
+       int ret;
+
+       ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
+                                          image->kernel_buf_len);
+       switch (ret) {
+       case 0:
+               break;
+
+               /* Certain verification errors are non-fatal if we're not
+                * checking errors, provided we aren't mandating that there
+                * must be a valid signature.
+                */
+       case -ENODATA:
+               reason = "kexec of unsigned image";
+               goto decide;
+       case -ENOPKG:
+               reason = "kexec of image with unsupported crypto";
+               goto decide;
+       case -ENOKEY:
+               reason = "kexec of image with unavailable key";
+       decide:
+               if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) {
+                       pr_notice("%s rejected\n", reason);
+                       return ret;
+               }
+
+               /* If IMA is guaranteed to appraise a signature on the kexec
+                * image, permit it even if the kernel is otherwise locked
+                * down.
+                */
+               if (!ima_appraise_signature(READING_KEXEC_IMAGE) &&
+                   security_locked_down(LOCKDOWN_KEXEC))
+                       return -EPERM;
+
+               return 0;
+
+               /* All other errors are fatal, including nomem, unparseable
+                * signatures and signature check failures - even if signatures
+                * aren't required.
+                */
+       default:
+               pr_notice("kernel signature verification failed (%d).\n", ret);
+       }
+
+       return ret;
+}
+#endif
+
 /*
  * In file mode list of segments is prepared by kernel. Copy relevant
  * data from user space, do error checking, prepare segment list
@@ -186,7 +239,7 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
                             const char __user *cmdline_ptr,
                             unsigned long cmdline_len, unsigned flags)
 {
-       int ret = 0;
+       int ret;
        void *ldata;
        loff_t size;
 
@@ -202,14 +255,11 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
        if (ret)
                goto out;
 
-#ifdef CONFIG_KEXEC_VERIFY_SIG
-       ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
-                                          image->kernel_buf_len);
-       if (ret) {
-               pr_debug("kernel signature verification failed.\n");
+#ifdef CONFIG_KEXEC_SIG
+       ret = kimage_validate_signature(image);
+
+       if (ret)
                goto out;
-       }
-       pr_debug("kernel signature verification successful.\n");
 #endif
        /* It is possible that there no initramfs is being loaded */
        if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
index 621467c..b262f47 100644 (file)
@@ -866,9 +866,9 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
 }
 EXPORT_SYMBOL(kthread_delayed_work_timer_fn);
 
-void __kthread_queue_delayed_work(struct kthread_worker *worker,
-                                 struct kthread_delayed_work *dwork,
-                                 unsigned long delay)
+static void __kthread_queue_delayed_work(struct kthread_worker *worker,
+                                        struct kthread_delayed_work *dwork,
+                                        unsigned long delay)
 {
        struct timer_list *timer = &dwork->timer;
        struct kthread_work *work = &dwork->work;
index c4ce08f..ab4a460 100644 (file)
@@ -1175,6 +1175,7 @@ err:
        pr_warn("patch '%s' failed for module '%s', refusing to load module '%s'\n",
                patch->mod->name, obj->mod->name, obj->mod->name);
        mod->klp_alive = false;
+       obj->mod = NULL;
        klp_cleanup_module_patches_limited(mod, patch);
        mutex_unlock(&klp_mutex);
 
index 89bab07..e84d21a 100644 (file)
@@ -269,7 +269,7 @@ pv_wait_early(struct pv_node *prev, int loop)
        if ((loop & PV_PREV_CHECK_MASK) != 0)
                return false;
 
-       return READ_ONCE(prev->state) != vcpu_running || vcpu_is_preempted(prev->cpu);
+       return READ_ONCE(prev->state) != vcpu_running;
 }
 
 /*
index 32873bc..ff2d735 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/export.h>
 #include <linux/extable.h>
 #include <linux/moduleloader.h>
+#include <linux/module_signature.h>
 #include <linux/trace_events.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
@@ -2838,8 +2839,9 @@ static inline void kmemleak_load_module(const struct module *mod,
 #ifdef CONFIG_MODULE_SIG
 static int module_sig_check(struct load_info *info, int flags)
 {
-       int err = -ENOKEY;
+       int err = -ENODATA;
        const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
+       const char *reason;
        const void *mod = info->hdr;
 
        /*
@@ -2854,16 +2856,38 @@ static int module_sig_check(struct load_info *info, int flags)
                err = mod_verify_sig(mod, info);
        }
 
-       if (!err) {
+       switch (err) {
+       case 0:
                info->sig_ok = true;
                return 0;
-       }
 
-       /* Not having a signature is only an error if we're strict. */
-       if (err == -ENOKEY && !is_module_sig_enforced())
-               err = 0;
+               /* We don't permit modules to be loaded into trusted kernels
+                * without a valid signature on them, but if we're not
+                * enforcing, certain errors are non-fatal.
+                */
+       case -ENODATA:
+               reason = "Loading of unsigned module";
+               goto decide;
+       case -ENOPKG:
+               reason = "Loading of module with unsupported crypto";
+               goto decide;
+       case -ENOKEY:
+               reason = "Loading of module with unavailable key";
+       decide:
+               if (is_module_sig_enforced()) {
+                       pr_notice("%s is rejected\n", reason);
+                       return -EKEYREJECTED;
+               }
 
-       return err;
+               return security_locked_down(LOCKDOWN_MODULE_SIGNATURE);
+
+               /* All other errors are fatal, including nomem, unparseable
+                * signatures and signature check failures - even if signatures
+                * aren't required.
+                */
+       default:
+               return err;
+       }
 }
 #else /* !CONFIG_MODULE_SIG */
 static int module_sig_check(struct load_info *info, int flags)
diff --git a/kernel/module_signature.c b/kernel/module_signature.c
new file mode 100644 (file)
index 0000000..4224a10
--- /dev/null
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Module signature checker
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/errno.h>
+#include <linux/printk.h>
+#include <linux/module_signature.h>
+#include <asm/byteorder.h>
+
+/**
+ * mod_check_sig - check that the given signature is sane
+ *
+ * @ms:                Signature to check.
+ * @file_len:  Size of the file to which @ms is appended.
+ * @name:      What is being checked. Used for error messages.
+ */
+int mod_check_sig(const struct module_signature *ms, size_t file_len,
+                 const char *name)
+{
+       if (be32_to_cpu(ms->sig_len) >= file_len - sizeof(*ms))
+               return -EBADMSG;
+
+       if (ms->id_type != PKEY_ID_PKCS7) {
+               pr_err("%s: Module is not signed with expected PKCS#7 message\n",
+                      name);
+               return -ENOPKG;
+       }
+
+       if (ms->algo != 0 ||
+           ms->hash != 0 ||
+           ms->signer_len != 0 ||
+           ms->key_id_len != 0 ||
+           ms->__pad[0] != 0 ||
+           ms->__pad[1] != 0 ||
+           ms->__pad[2] != 0) {
+               pr_err("%s: PKCS#7 signature info has unexpected non-zero params\n",
+                      name);
+               return -EBADMSG;
+       }
+
+       return 0;
+}
index b10fb19..9d9fc67 100644 (file)
@@ -7,37 +7,13 @@
 
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/module_signature.h>
 #include <linux/string.h>
 #include <linux/verification.h>
 #include <crypto/public_key.h>
 #include "module-internal.h"
 
-enum pkey_id_type {
-       PKEY_ID_PGP,            /* OpenPGP generated key ID */
-       PKEY_ID_X509,           /* X.509 arbitrary subjectKeyIdentifier */
-       PKEY_ID_PKCS7,          /* Signature in PKCS#7 message */
-};
-
-/*
- * Module signature information block.
- *
- * The constituents of the signature section are, in order:
- *
- *     - Signer's name
- *     - Key identifier
- *     - Signature data
- *     - Information block
- */
-struct module_signature {
-       u8      algo;           /* Public-key crypto algorithm [0] */
-       u8      hash;           /* Digest algorithm [0] */
-       u8      id_type;        /* Key identifier type [PKEY_ID_PKCS7] */
-       u8      signer_len;     /* Length of signer's name [0] */
-       u8      key_id_len;     /* Length of key identifier [0] */
-       u8      __pad[3];
-       __be32  sig_len;        /* Length of signature data */
-};
-
 /*
  * Verify the signature on a module.
  */
@@ -45,6 +21,7 @@ int mod_verify_sig(const void *mod, struct load_info *info)
 {
        struct module_signature ms;
        size_t sig_len, modlen = info->len;
+       int ret;
 
        pr_devel("==>%s(,%zu)\n", __func__, modlen);
 
@@ -52,32 +29,15 @@ int mod_verify_sig(const void *mod, struct load_info *info)
                return -EBADMSG;
 
        memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms));
-       modlen -= sizeof(ms);
+
+       ret = mod_check_sig(&ms, modlen, info->name);
+       if (ret)
+               return ret;
 
        sig_len = be32_to_cpu(ms.sig_len);
-       if (sig_len >= modlen)
-               return -EBADMSG;
-       modlen -= sig_len;
+       modlen -= sig_len + sizeof(ms);
        info->len = modlen;
 
-       if (ms.id_type != PKEY_ID_PKCS7) {
-               pr_err("%s: Module is not signed with expected PKCS#7 message\n",
-                      info->name);
-               return -ENOPKG;
-       }
-
-       if (ms.algo != 0 ||
-           ms.hash != 0 ||
-           ms.signer_len != 0 ||
-           ms.key_id_len != 0 ||
-           ms.__pad[0] != 0 ||
-           ms.__pad[1] != 0 ||
-           ms.__pad[2] != 0) {
-               pr_err("%s: PKCS#7 signature info has unexpected non-zero params\n",
-                      info->name);
-               return -EBADMSG;
-       }
-
        return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len,
                                      VERIFY_USE_SECONDARY_KEYRING,
                                      VERIFYING_MODULE_SIGNATURE,
index 057540b..f470a03 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/debug_locks.h>
 #include <linux/sched/debug.h>
 #include <linux/interrupt.h>
+#include <linux/kgdb.h>
 #include <linux/kmsg_dump.h>
 #include <linux/kallsyms.h>
 #include <linux/notifier.h>
@@ -179,6 +180,7 @@ void panic(const char *fmt, ...)
         * after setting panic_cpu) from invoking panic() again.
         */
        local_irq_disable();
+       preempt_disable_notrace();
 
        /*
         * It's possible to come here directly from a panic-assertion and
@@ -219,6 +221,13 @@ void panic(const char *fmt, ...)
                dump_stack();
 #endif
 
+       /*
+        * If kgdb is enabled, give it a chance to run before we stop all
+        * the other CPUs or else we won't be able to debug processes left
+        * running on them.
+        */
+       kgdb_panic(buf);
+
        /*
         * If we have crashed and we have a crash kernel loaded let it handle
         * everything else.
@@ -551,9 +560,6 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 {
        disable_trace_on_warning();
 
-       if (args)
-               pr_warn(CUT_HERE);
-
        if (file)
                pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",
                        raw_smp_processor_id(), current->pid, file, line,
@@ -591,37 +597,26 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
        add_taint(taint, LOCKDEP_STILL_OK);
 }
 
-#ifdef WANT_WARN_ON_SLOWPATH
-void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...)
+#ifndef __WARN_FLAGS
+void warn_slowpath_fmt(const char *file, int line, unsigned taint,
+                      const char *fmt, ...)
 {
        struct warn_args args;
 
-       args.fmt = fmt;
-       va_start(args.args, fmt);
-       __warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL,
-              &args);
-       va_end(args.args);
-}
-EXPORT_SYMBOL(warn_slowpath_fmt);
+       pr_warn(CUT_HERE);
 
-void warn_slowpath_fmt_taint(const char *file, int line,
-                            unsigned taint, const char *fmt, ...)
-{
-       struct warn_args args;
+       if (!fmt) {
+               __warn(file, line, __builtin_return_address(0), taint,
+                      NULL, NULL);
+               return;
+       }
 
        args.fmt = fmt;
        va_start(args.args, fmt);
        __warn(file, line, __builtin_return_address(0), taint, NULL, &args);
        va_end(args.args);
 }
-EXPORT_SYMBOL(warn_slowpath_fmt_taint);
-
-void warn_slowpath_null(const char *file, int line)
-{
-       pr_warn(CUT_HERE);
-       __warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, NULL);
-}
-EXPORT_SYMBOL(warn_slowpath_null);
+EXPORT_SYMBOL(warn_slowpath_fmt);
 #else
 void __warn_printk(const char *fmt, ...)
 {
index cf44878..8e56f8b 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/ctype.h>
+#include <linux/security.h>
 
 #ifdef CONFIG_SYSFS
 /* Protects all built-in parameters, modules use their own param_lock */
@@ -96,13 +97,19 @@ bool parameq(const char *a, const char *b)
        return parameqn(a, b, strlen(a)+1);
 }
 
-static void param_check_unsafe(const struct kernel_param *kp)
+static bool param_check_unsafe(const struct kernel_param *kp)
 {
+       if (kp->flags & KERNEL_PARAM_FL_HWPARAM &&
+           security_locked_down(LOCKDOWN_MODULE_PARAMETERS))
+               return false;
+
        if (kp->flags & KERNEL_PARAM_FL_UNSAFE) {
                pr_notice("Setting dangerous option %s - tainting kernel\n",
                          kp->name);
                add_taint(TAINT_USER, LOCKDEP_STILL_OK);
        }
+
+       return true;
 }
 
 static int parse_one(char *param,
@@ -132,8 +139,10 @@ static int parse_one(char *param,
                        pr_debug("handling %s with %p\n", param,
                                params[i].ops->set);
                        kernel_param_lock(params[i].mod);
-                       param_check_unsafe(&params[i]);
-                       err = params[i].ops->set(val, &params[i]);
+                       if (param_check_unsafe(&params[i]))
+                               err = params[i].ops->set(val, &params[i]);
+                       else
+                               err = -EPERM;
                        kernel_param_unlock(params[i].mod);
                        return err;
                }
@@ -553,8 +562,10 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
                return -EPERM;
 
        kernel_param_lock(mk->mod);
-       param_check_unsafe(attribute->param);
-       err = attribute->param->ops->set(buf, attribute->param);
+       if (param_check_unsafe(attribute->param))
+               err = attribute->param->ops->set(buf, attribute->param);
+       else
+               err = -EPERM;
        kernel_param_unlock(mk->mod);
        if (!err)
                return len;
index cd7434e..3c0a5a8 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/ctype.h>
 #include <linux/genhd.h>
 #include <linux/ktime.h>
+#include <linux/security.h>
 #include <trace/events/power.h>
 
 #include "power.h"
@@ -68,7 +69,7 @@ static const struct platform_hibernation_ops *hibernation_ops;
 
 bool hibernation_available(void)
 {
-       return (nohibernate == 0);
+       return nohibernate == 0 && !security_locked_down(LOCKDOWN_HIBERNATION);
 }
 
 /**
index e8710d1..e26de7a 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/seq_file.h>
 #include <linux/suspend.h>
 #include <linux/syscalls.h>
+#include <linux/pm_runtime.h>
 
 #include "power.h"
 
index 9568a2f..04e83fd 100644 (file)
@@ -650,3 +650,243 @@ static int __init pm_qos_power_init(void)
 }
 
 late_initcall(pm_qos_power_init);
+
+/* Definitions related to the frequency QoS below. */
+
+/**
+ * freq_constraints_init - Initialize frequency QoS constraints.
+ * @qos: Frequency QoS constraints to initialize.
+ */
+void freq_constraints_init(struct freq_constraints *qos)
+{
+       struct pm_qos_constraints *c;
+
+       c = &qos->min_freq;
+       plist_head_init(&c->list);
+       c->target_value = FREQ_QOS_MIN_DEFAULT_VALUE;
+       c->default_value = FREQ_QOS_MIN_DEFAULT_VALUE;
+       c->no_constraint_value = FREQ_QOS_MIN_DEFAULT_VALUE;
+       c->type = PM_QOS_MAX;
+       c->notifiers = &qos->min_freq_notifiers;
+       BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers);
+
+       c = &qos->max_freq;
+       plist_head_init(&c->list);
+       c->target_value = FREQ_QOS_MAX_DEFAULT_VALUE;
+       c->default_value = FREQ_QOS_MAX_DEFAULT_VALUE;
+       c->no_constraint_value = FREQ_QOS_MAX_DEFAULT_VALUE;
+       c->type = PM_QOS_MIN;
+       c->notifiers = &qos->max_freq_notifiers;
+       BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers);
+}
+
+/**
+ * freq_qos_read_value - Get frequency QoS constraint for a given list.
+ * @qos: Constraints to evaluate.
+ * @type: QoS request type.
+ */
+s32 freq_qos_read_value(struct freq_constraints *qos,
+                       enum freq_qos_req_type type)
+{
+       s32 ret;
+
+       switch (type) {
+       case FREQ_QOS_MIN:
+               ret = IS_ERR_OR_NULL(qos) ?
+                       FREQ_QOS_MIN_DEFAULT_VALUE :
+                       pm_qos_read_value(&qos->min_freq);
+               break;
+       case FREQ_QOS_MAX:
+               ret = IS_ERR_OR_NULL(qos) ?
+                       FREQ_QOS_MAX_DEFAULT_VALUE :
+                       pm_qos_read_value(&qos->max_freq);
+               break;
+       default:
+               WARN_ON(1);
+               ret = 0;
+       }
+
+       return ret;
+}
+
+/**
+ * freq_qos_apply - Add/modify/remove frequency QoS request.
+ * @req: Constraint request to apply.
+ * @action: Action to perform (add/update/remove).
+ * @value: Value to assign to the QoS request.
+ */
+static int freq_qos_apply(struct freq_qos_request *req,
+                         enum pm_qos_req_action action, s32 value)
+{
+       int ret;
+
+       switch(req->type) {
+       case FREQ_QOS_MIN:
+               ret = pm_qos_update_target(&req->qos->min_freq, &req->pnode,
+                                          action, value);
+               break;
+       case FREQ_QOS_MAX:
+               ret = pm_qos_update_target(&req->qos->max_freq, &req->pnode,
+                                          action, value);
+               break;
+       default:
+               ret = -EINVAL;
+       }
+
+       return ret;
+}
+
+/**
+ * freq_qos_add_request - Insert new frequency QoS request into a given list.
+ * @qos: Constraints to update.
+ * @req: Preallocated request object.
+ * @type: Request type.
+ * @value: Request value.
+ *
+ * Insert a new entry into the @qos list of requests, recompute the effective
+ * QoS constraint value for that list and initialize the @req object.  The
+ * caller needs to save that object for later use in updates and removal.
+ *
+ * Return 1 if the effective constraint value has changed, 0 if the effective
+ * constraint value has not changed, or a negative error code on failures.
+ */
+int freq_qos_add_request(struct freq_constraints *qos,
+                        struct freq_qos_request *req,
+                        enum freq_qos_req_type type, s32 value)
+{
+       int ret;
+
+       if (IS_ERR_OR_NULL(qos) || !req)
+               return -EINVAL;
+
+       if (WARN(freq_qos_request_active(req),
+                "%s() called for active request\n", __func__))
+               return -EINVAL;
+
+       req->qos = qos;
+       req->type = type;
+       ret = freq_qos_apply(req, PM_QOS_ADD_REQ, value);
+       if (ret < 0) {
+               req->qos = NULL;
+               req->type = 0;
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(freq_qos_add_request);
+
+/**
+ * freq_qos_update_request - Modify existing frequency QoS request.
+ * @req: Request to modify.
+ * @new_value: New request value.
+ *
+ * Update an existing frequency QoS request along with the effective constraint
+ * value for the list of requests it belongs to.
+ *
+ * Return 1 if the effective constraint value has changed, 0 if the effective
+ * constraint value has not changed, or a negative error code on failures.
+ */
+int freq_qos_update_request(struct freq_qos_request *req, s32 new_value)
+{
+       if (!req)
+               return -EINVAL;
+
+       if (WARN(!freq_qos_request_active(req),
+                "%s() called for unknown object\n", __func__))
+               return -EINVAL;
+
+       if (req->pnode.prio == new_value)
+               return 0;
+
+       return freq_qos_apply(req, PM_QOS_UPDATE_REQ, new_value);
+}
+EXPORT_SYMBOL_GPL(freq_qos_update_request);
+
+/**
+ * freq_qos_remove_request - Remove frequency QoS request from its list.
+ * @req: Request to remove.
+ *
+ * Remove the given frequency QoS request from the list of constraints it
+ * belongs to and recompute the effective constraint value for that list.
+ *
+ * Return 1 if the effective constraint value has changed, 0 if the effective
+ * constraint value has not changed, or a negative error code on failures.
+ */
+int freq_qos_remove_request(struct freq_qos_request *req)
+{
+       if (!req)
+               return -EINVAL;
+
+       if (WARN(!freq_qos_request_active(req),
+                "%s() called for unknown object\n", __func__))
+               return -EINVAL;
+
+       return freq_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
+}
+EXPORT_SYMBOL_GPL(freq_qos_remove_request);
+
+/**
+ * freq_qos_add_notifier - Add frequency QoS change notifier.
+ * @qos: List of requests to add the notifier to.
+ * @type: Request type.
+ * @notifier: Notifier block to add.
+ */
+int freq_qos_add_notifier(struct freq_constraints *qos,
+                         enum freq_qos_req_type type,
+                         struct notifier_block *notifier)
+{
+       int ret;
+
+       if (IS_ERR_OR_NULL(qos) || !notifier)
+               return -EINVAL;
+
+       switch (type) {
+       case FREQ_QOS_MIN:
+               ret = blocking_notifier_chain_register(qos->min_freq.notifiers,
+                                                      notifier);
+               break;
+       case FREQ_QOS_MAX:
+               ret = blocking_notifier_chain_register(qos->max_freq.notifiers,
+                                                      notifier);
+               break;
+       default:
+               WARN_ON(1);
+               ret = -EINVAL;
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(freq_qos_add_notifier);
+
+/**
+ * freq_qos_remove_notifier - Remove frequency QoS change notifier.
+ * @qos: List of requests to remove the notifier from.
+ * @type: Request type.
+ * @notifier: Notifier block to remove.
+ */
+int freq_qos_remove_notifier(struct freq_constraints *qos,
+                            enum freq_qos_req_type type,
+                            struct notifier_block *notifier)
+{
+       int ret;
+
+       if (IS_ERR_OR_NULL(qos) || !notifier)
+               return -EINVAL;
+
+       switch (type) {
+       case FREQ_QOS_MIN:
+               ret = blocking_notifier_chain_unregister(qos->min_freq.notifiers,
+                                                        notifier);
+               break;
+       case FREQ_QOS_MAX:
+               ret = blocking_notifier_chain_unregister(qos->max_freq.notifiers,
+                                                        notifier);
+               break;
+       default:
+               WARN_ON(1);
+               ret = -EINVAL;
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(freq_qos_remove_notifier);
index 74877e9..76036a4 100644 (file)
@@ -487,8 +487,8 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
        while (start < end &&
               !find_next_iomem_res(start, end, flags, IORES_DESC_NONE,
                                    false, &res)) {
-               pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
-               end_pfn = (res.end + 1) >> PAGE_SHIFT;
+               pfn = PFN_UP(res.start);
+               end_pfn = PFN_DOWN(res.end + 1);
                if (end_pfn > pfn)
                        ret = (*func)(pfn, end_pfn - pfn, arg);
                if (ret)
index f9a1346..dd05a37 100644 (file)
@@ -1656,7 +1656,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
        if (cpumask_equal(p->cpus_ptr, new_mask))
                goto out;
 
-       if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
+       dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
+       if (dest_cpu >= nr_cpu_ids) {
                ret = -EINVAL;
                goto out;
        }
@@ -1677,7 +1678,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
        if (cpumask_test_cpu(task_cpu(p), new_mask))
                goto out;
 
-       dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
        if (task_running(rq, p) || p->state == TASK_WAKING) {
                struct migration_arg arg = { p, dest_cpu };
                /* Need help from migration thread: drop lock and wait. */
@@ -3254,7 +3254,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
                /* Task is done with its stack. */
                put_task_stack(prev);
 
-               put_task_struct(prev);
+               put_task_struct_rcu_user(prev);
        }
 
        tick_nohz_task_switch();
@@ -3358,15 +3358,15 @@ context_switch(struct rq *rq, struct task_struct *prev,
                else
                        prev->active_mm = NULL;
        } else {                                        // to user
+               membarrier_switch_mm(rq, prev->active_mm, next->mm);
                /*
                 * sys_membarrier() requires an smp_mb() between setting
-                * rq->curr and returning to userspace.
+                * rq->curr / membarrier_switch_mm() and returning to userspace.
                 *
                 * The below provides this either through switch_mm(), or in
                 * case 'prev->active_mm == next->mm' through
                 * finish_task_switch()'s mmdrop().
                 */
-
                switch_mm_irqs_off(prev->active_mm, next->mm, next);
 
                if (!prev->mm) {                        // from kernel
@@ -4042,7 +4042,11 @@ static void __sched notrace __schedule(bool preempt)
 
        if (likely(prev != next)) {
                rq->nr_switches++;
-               rq->curr = next;
+               /*
+                * RCU users of rcu_dereference(rq->curr) may not see
+                * changes to task_struct made by pick_next_task().
+                */
+               RCU_INIT_POINTER(rq->curr, next);
                /*
                 * The membarrier system call requires each architecture
                 * to have a full memory barrier after updating
@@ -4223,9 +4227,8 @@ static void __sched notrace preempt_schedule_common(void)
 
 #ifdef CONFIG_PREEMPTION
 /*
- * this is the entry point to schedule() from in-kernel preemption
- * off of preempt_enable. Kernel preemptions off return from interrupt
- * occur there and call schedule directly.
+ * This is the entry point to schedule() from in-kernel preemption
+ * off of preempt_enable.
  */
 asmlinkage __visible void __sched notrace preempt_schedule(void)
 {
@@ -4296,7 +4299,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
 #endif /* CONFIG_PREEMPTION */
 
 /*
- * this is the entry point to schedule() from kernel preemption
+ * This is the entry point to schedule() from kernel preemption
  * off of irq context.
  * Note, that this is called and return with irqs disabled. This will
  * protect us against recursive calling from irq.
@@ -5103,9 +5106,6 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
        u32 size;
        int ret;
 
-       if (!access_ok(uattr, SCHED_ATTR_SIZE_VER0))
-               return -EFAULT;
-
        /* Zero the full structure, so that a short copy will be nice: */
        memset(attr, 0, sizeof(*attr));
 
@@ -5113,45 +5113,19 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
        if (ret)
                return ret;
 
-       /* Bail out on silly large: */
-       if (size > PAGE_SIZE)
-               goto err_size;
-
        /* ABI compatibility quirk: */
        if (!size)
                size = SCHED_ATTR_SIZE_VER0;
-
-       if (size < SCHED_ATTR_SIZE_VER0)
+       if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
                goto err_size;
 
-       /*
-        * If we're handed a bigger struct than we know of,
-        * ensure all the unknown bits are 0 - i.e. new
-        * user-space does not rely on any kernel feature
-        * extensions we dont know about yet.
-        */
-       if (size > sizeof(*attr)) {
-               unsigned char __user *addr;
-               unsigned char __user *end;
-               unsigned char val;
-
-               addr = (void __user *)uattr + sizeof(*attr);
-               end  = (void __user *)uattr + size;
-
-               for (; addr < end; addr++) {
-                       ret = get_user(val, addr);
-                       if (ret)
-                               return ret;
-                       if (val)
-                               goto err_size;
-               }
-               size = sizeof(*attr);
+       ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
+       if (ret) {
+               if (ret == -E2BIG)
+                       goto err_size;
+               return ret;
        }
 
-       ret = copy_from_user(attr, uattr, size);
-       if (ret)
-               return -EFAULT;
-
        if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) &&
            size < SCHED_ATTR_SIZE_VER1)
                return -EINVAL;
@@ -5351,7 +5325,7 @@ sched_attr_copy_to_user(struct sched_attr __user *uattr,
  * sys_sched_getattr - similar to sched_getparam, but with sched_attr
  * @pid: the pid in question.
  * @uattr: structure containing the extended parameters.
- * @usize: sizeof(attr) that user-space knows about, for forwards and backwards compatibility.
+ * @usize: sizeof(attr) for fwd/bwd comp.
  * @flags: for future extension.
  */
 SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
@@ -6069,7 +6043,8 @@ void init_idle(struct task_struct *idle, int cpu)
        __set_task_cpu(idle, cpu);
        rcu_read_unlock();
 
-       rq->curr = rq->idle = idle;
+       rq->idle = idle;
+       rcu_assign_pointer(rq->curr, idle);
        idle->on_rq = TASK_ON_RQ_QUEUED;
 #ifdef CONFIG_SMP
        idle->on_cpu = 1;
@@ -6430,8 +6405,6 @@ int sched_cpu_activate(unsigned int cpu)
        }
        rq_unlock_irqrestore(rq, &rf);
 
-       update_max_interval();
-
        return 0;
 }
 
index 2305ce8..46ed4e1 100644 (file)
@@ -740,7 +740,7 @@ void vtime_account_system(struct task_struct *tsk)
 
        write_seqcount_begin(&vtime->seqcount);
        /* We might have scheduled out from guest path */
-       if (current->flags & PF_VCPU)
+       if (tsk->flags & PF_VCPU)
                vtime_account_guest(tsk, vtime);
        else
                __vtime_account_system(tsk, vtime);
@@ -783,7 +783,7 @@ void vtime_guest_enter(struct task_struct *tsk)
         */
        write_seqcount_begin(&vtime->seqcount);
        __vtime_account_system(tsk, vtime);
-       current->flags |= PF_VCPU;
+       tsk->flags |= PF_VCPU;
        write_seqcount_end(&vtime->seqcount);
 }
 EXPORT_SYMBOL_GPL(vtime_guest_enter);
@@ -794,7 +794,7 @@ void vtime_guest_exit(struct task_struct *tsk)
 
        write_seqcount_begin(&vtime->seqcount);
        vtime_account_guest(tsk, vtime);
-       current->flags &= ~PF_VCPU;
+       tsk->flags &= ~PF_VCPU;
        write_seqcount_end(&vtime->seqcount);
 }
 EXPORT_SYMBOL_GPL(vtime_guest_exit);
index d4bbf68..682a754 100644 (file)
@@ -749,7 +749,6 @@ void init_entity_runnable_average(struct sched_entity *se)
        /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
 }
 
-static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
 static void attach_entity_cfs_rq(struct sched_entity *se);
 
 /*
@@ -1603,7 +1602,7 @@ static void task_numa_compare(struct task_numa_env *env,
                return;
 
        rcu_read_lock();
-       cur = task_rcu_dereference(&dst_rq->curr);
+       cur = rcu_dereference(dst_rq->curr);
        if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
                cur = NULL;
 
@@ -4354,21 +4353,16 @@ static inline u64 sched_cfs_bandwidth_slice(void)
 }
 
 /*
- * Replenish runtime according to assigned quota and update expiration time.
- * We use sched_clock_cpu directly instead of rq->clock to avoid adding
- * additional synchronization around rq->lock.
+ * Replenish runtime according to assigned quota. We use sched_clock_cpu
+ * directly instead of rq->clock to avoid adding additional synchronization
+ * around rq->lock.
  *
  * requires cfs_b->lock
  */
 void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
 {
-       u64 now;
-
-       if (cfs_b->quota == RUNTIME_INF)
-               return;
-
-       now = sched_clock_cpu(smp_processor_id());
-       cfs_b->runtime = cfs_b->quota;
+       if (cfs_b->quota != RUNTIME_INF)
+               cfs_b->runtime = cfs_b->quota;
 }
 
 static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
@@ -4376,15 +4370,6 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
        return &tg->cfs_bandwidth;
 }
 
-/* rq->task_clock normalized against any time this cfs_rq has spent throttled */
-static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
-{
-       if (unlikely(cfs_rq->throttle_count))
-               return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time;
-
-       return rq_clock_task(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
-}
-
 /* returns 0 on failure to allocate runtime */
 static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 {
@@ -4476,7 +4461,6 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
 
        cfs_rq->throttle_count--;
        if (!cfs_rq->throttle_count) {
-               /* adjust cfs_rq_clock_task() */
                cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
                                             cfs_rq->throttled_clock_task;
 
@@ -4942,20 +4926,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
                if (++count > 3) {
                        u64 new, old = ktime_to_ns(cfs_b->period);
 
-                       new = (old * 147) / 128; /* ~115% */
-                       new = min(new, max_cfs_quota_period);
-
-                       cfs_b->period = ns_to_ktime(new);
-
-                       /* since max is 1s, this is limited to 1e9^2, which fits in u64 */
-                       cfs_b->quota *= new;
-                       cfs_b->quota = div64_u64(cfs_b->quota, old);
-
-                       pr_warn_ratelimited(
-       "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n",
-                               smp_processor_id(),
-                               div_u64(new, NSEC_PER_USEC),
-                               div_u64(cfs_b->quota, NSEC_PER_USEC));
+                       /*
+                        * Grow period by a factor of 2 to avoid losing precision.
+                        * Precision loss in the quota/period ratio can cause __cfs_schedulable
+                        * to fail.
+                        */
+                       new = old * 2;
+                       if (new < max_cfs_quota_period) {
+                               cfs_b->period = ns_to_ktime(new);
+                               cfs_b->quota *= 2;
+
+                               pr_warn_ratelimited(
+       "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us = %lld, cfs_quota_us = %lld)\n",
+                                       smp_processor_id(),
+                                       div_u64(new, NSEC_PER_USEC),
+                                       div_u64(cfs_b->quota, NSEC_PER_USEC));
+                       } else {
+                               pr_warn_ratelimited(
+       "cfs_period_timer[cpu%d]: period too short, but cannot scale up without losing precision (cfs_period_us = %lld, cfs_quota_us = %lld)\n",
+                                       smp_processor_id(),
+                                       div_u64(old, NSEC_PER_USEC),
+                                       div_u64(cfs_b->quota, NSEC_PER_USEC));
+                       }
 
                        /* reset count so we don't come right back in here */
                        count = 0;
@@ -4994,15 +4986,13 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 
 void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 {
-       u64 overrun;
-
        lockdep_assert_held(&cfs_b->lock);
 
        if (cfs_b->period_active)
                return;
 
        cfs_b->period_active = 1;
-       overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
+       hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
        hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
 }
 
@@ -5080,11 +5070,6 @@ static inline bool cfs_bandwidth_used(void)
        return false;
 }
 
-static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
-{
-       return rq_clock_task(rq_of(cfs_rq));
-}
-
 static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
 static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
@@ -6412,7 +6397,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                }
 
                /* Evaluate the energy impact of using this CPU. */
-               if (max_spare_cap_cpu >= 0) {
+               if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) {
                        cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
                        cur_delta -= base_energy_pd;
                        if (cur_delta < best_delta) {
index c892c62..8dad5aa 100644 (file)
@@ -238,7 +238,6 @@ static void do_idle(void)
        tick_nohz_idle_enter();
 
        while (!need_resched()) {
-               check_pgt_cache();
                rmb();
 
                local_irq_disable();
index aa8d758..168479a 100644 (file)
@@ -30,10 +30,42 @@ static void ipi_mb(void *info)
        smp_mb();       /* IPIs should be serializing but paranoid. */
 }
 
+static void ipi_sync_rq_state(void *info)
+{
+       struct mm_struct *mm = (struct mm_struct *) info;
+
+       if (current->mm != mm)
+               return;
+       this_cpu_write(runqueues.membarrier_state,
+                      atomic_read(&mm->membarrier_state));
+       /*
+        * Issue a memory barrier after setting
+        * MEMBARRIER_STATE_GLOBAL_EXPEDITED in the current runqueue to
+        * guarantee that no memory access following registration is reordered
+        * before registration.
+        */
+       smp_mb();
+}
+
+void membarrier_exec_mmap(struct mm_struct *mm)
+{
+       /*
+        * Issue a memory barrier before clearing membarrier_state to
+        * guarantee that no memory access prior to exec is reordered after
+        * clearing this state.
+        */
+       smp_mb();
+       atomic_set(&mm->membarrier_state, 0);
+       /*
+        * Keep the runqueue membarrier_state in sync with this mm
+        * membarrier_state.
+        */
+       this_cpu_write(runqueues.membarrier_state, 0);
+}
+
 static int membarrier_global_expedited(void)
 {
        int cpu;
-       bool fallback = false;
        cpumask_var_t tmpmask;
 
        if (num_online_cpus() == 1)
@@ -45,17 +77,11 @@ static int membarrier_global_expedited(void)
         */
        smp_mb();       /* system call entry is not a mb. */
 
-       /*
-        * Expedited membarrier commands guarantee that they won't
-        * block, hence the GFP_NOWAIT allocation flag and fallback
-        * implementation.
-        */
-       if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
-               /* Fallback for OOM. */
-               fallback = true;
-       }
+       if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+               return -ENOMEM;
 
        cpus_read_lock();
+       rcu_read_lock();
        for_each_online_cpu(cpu) {
                struct task_struct *p;
 
@@ -70,23 +96,28 @@ static int membarrier_global_expedited(void)
                if (cpu == raw_smp_processor_id())
                        continue;
 
-               rcu_read_lock();
-               p = task_rcu_dereference(&cpu_rq(cpu)->curr);
-               if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
-                                  MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
-                       if (!fallback)
-                               __cpumask_set_cpu(cpu, tmpmask);
-                       else
-                               smp_call_function_single(cpu, ipi_mb, NULL, 1);
-               }
-               rcu_read_unlock();
-       }
-       if (!fallback) {
-               preempt_disable();
-               smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
-               preempt_enable();
-               free_cpumask_var(tmpmask);
+               if (!(READ_ONCE(cpu_rq(cpu)->membarrier_state) &
+                   MEMBARRIER_STATE_GLOBAL_EXPEDITED))
+                       continue;
+
+               /*
+                * Skip the CPU if it runs a kernel thread. The scheduler
+                * leaves the prior task mm in place as an optimization when
+                * scheduling a kthread.
+                */
+               p = rcu_dereference(cpu_rq(cpu)->curr);
+               if (p->flags & PF_KTHREAD)
+                       continue;
+
+               __cpumask_set_cpu(cpu, tmpmask);
        }
+       rcu_read_unlock();
+
+       preempt_disable();
+       smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
+       preempt_enable();
+
+       free_cpumask_var(tmpmask);
        cpus_read_unlock();
 
        /*
@@ -101,22 +132,22 @@ static int membarrier_global_expedited(void)
 static int membarrier_private_expedited(int flags)
 {
        int cpu;
-       bool fallback = false;
        cpumask_var_t tmpmask;
+       struct mm_struct *mm = current->mm;
 
        if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
                if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
                        return -EINVAL;
-               if (!(atomic_read(&current->mm->membarrier_state) &
+               if (!(atomic_read(&mm->membarrier_state) &
                      MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
                        return -EPERM;
        } else {
-               if (!(atomic_read(&current->mm->membarrier_state) &
+               if (!(atomic_read(&mm->membarrier_state) &
                      MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
                        return -EPERM;
        }
 
-       if (num_online_cpus() == 1)
+       if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1)
                return 0;
 
        /*
@@ -125,17 +156,11 @@ static int membarrier_private_expedited(int flags)
         */
        smp_mb();       /* system call entry is not a mb. */
 
-       /*
-        * Expedited membarrier commands guarantee that they won't
-        * block, hence the GFP_NOWAIT allocation flag and fallback
-        * implementation.
-        */
-       if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
-               /* Fallback for OOM. */
-               fallback = true;
-       }
+       if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+               return -ENOMEM;
 
        cpus_read_lock();
+       rcu_read_lock();
        for_each_online_cpu(cpu) {
                struct task_struct *p;
 
@@ -149,22 +174,17 @@ static int membarrier_private_expedited(int flags)
                 */
                if (cpu == raw_smp_processor_id())
                        continue;
-               rcu_read_lock();
-               p = task_rcu_dereference(&cpu_rq(cpu)->curr);
-               if (p && p->mm == current->mm) {
-                       if (!fallback)
-                               __cpumask_set_cpu(cpu, tmpmask);
-                       else
-                               smp_call_function_single(cpu, ipi_mb, NULL, 1);
-               }
-               rcu_read_unlock();
-       }
-       if (!fallback) {
-               preempt_disable();
-               smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
-               preempt_enable();
-               free_cpumask_var(tmpmask);
+               p = rcu_dereference(cpu_rq(cpu)->curr);
+               if (p && p->mm == mm)
+                       __cpumask_set_cpu(cpu, tmpmask);
        }
+       rcu_read_unlock();
+
+       preempt_disable();
+       smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
+       preempt_enable();
+
+       free_cpumask_var(tmpmask);
        cpus_read_unlock();
 
        /*
@@ -177,32 +197,78 @@ static int membarrier_private_expedited(int flags)
        return 0;
 }
 
+static int sync_runqueues_membarrier_state(struct mm_struct *mm)
+{
+       int membarrier_state = atomic_read(&mm->membarrier_state);
+       cpumask_var_t tmpmask;
+       int cpu;
+
+       if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1) {
+               this_cpu_write(runqueues.membarrier_state, membarrier_state);
+
+               /*
+                * For single mm user, we can simply issue a memory barrier
+                * after setting MEMBARRIER_STATE_GLOBAL_EXPEDITED in the
+                * mm and in the current runqueue to guarantee that no memory
+                * access following registration is reordered before
+                * registration.
+                */
+               smp_mb();
+               return 0;
+       }
+
+       if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+               return -ENOMEM;
+
+       /*
+        * For mm with multiple users, we need to ensure all future
+        * scheduler executions will observe @mm's new membarrier
+        * state.
+        */
+       synchronize_rcu();
+
+       /*
+        * For each cpu runqueue, if the task's mm match @mm, ensure that all
+        * @mm's membarrier state set bits are also set in in the runqueue's
+        * membarrier state. This ensures that a runqueue scheduling
+        * between threads which are users of @mm has its membarrier state
+        * updated.
+        */
+       cpus_read_lock();
+       rcu_read_lock();
+       for_each_online_cpu(cpu) {
+               struct rq *rq = cpu_rq(cpu);
+               struct task_struct *p;
+
+               p = rcu_dereference(rq->curr);
+               if (p && p->mm == mm)
+                       __cpumask_set_cpu(cpu, tmpmask);
+       }
+       rcu_read_unlock();
+
+       preempt_disable();
+       smp_call_function_many(tmpmask, ipi_sync_rq_state, mm, 1);
+       preempt_enable();
+
+       free_cpumask_var(tmpmask);
+       cpus_read_unlock();
+
+       return 0;
+}
+
 static int membarrier_register_global_expedited(void)
 {
        struct task_struct *p = current;
        struct mm_struct *mm = p->mm;
+       int ret;
 
        if (atomic_read(&mm->membarrier_state) &
            MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
                return 0;
        atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
-       if (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) {
-               /*
-                * For single mm user, single threaded process, we can
-                * simply issue a memory barrier after setting
-                * MEMBARRIER_STATE_GLOBAL_EXPEDITED to guarantee that
-                * no memory access following registration is reordered
-                * before registration.
-                */
-               smp_mb();
-       } else {
-               /*
-                * For multi-mm user threads, we need to ensure all
-                * future scheduler executions will observe the new
-                * thread flag state for this mm.
-                */
-               synchronize_rcu();
-       }
+       ret = sync_runqueues_membarrier_state(mm);
+       if (ret)
+               return ret;
        atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
                  &mm->membarrier_state);
 
@@ -213,12 +279,15 @@ static int membarrier_register_private_expedited(int flags)
 {
        struct task_struct *p = current;
        struct mm_struct *mm = p->mm;
-       int state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY;
+       int ready_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
+           set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED,
+           ret;
 
        if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
                if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
                        return -EINVAL;
-               state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
+               ready_state =
+                       MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
        }
 
        /*
@@ -226,20 +295,15 @@ static int membarrier_register_private_expedited(int flags)
         * groups, which use the same mm. (CLONE_VM but not
         * CLONE_THREAD).
         */
-       if (atomic_read(&mm->membarrier_state) & state)
+       if ((atomic_read(&mm->membarrier_state) & ready_state) == ready_state)
                return 0;
-       atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
        if (flags & MEMBARRIER_FLAG_SYNC_CORE)
-               atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE,
-                         &mm->membarrier_state);
-       if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) {
-               /*
-                * Ensure all future scheduler executions will observe the
-                * new thread flag state for this process.
-                */
-               synchronize_rcu();
-       }
-       atomic_or(state, &mm->membarrier_state);
+               set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE;
+       atomic_or(set_state, &mm->membarrier_state);
+       ret = sync_runqueues_membarrier_state(mm);
+       if (ret)
+               return ret;
+       atomic_or(ready_state, &mm->membarrier_state);
 
        return 0;
 }
@@ -253,8 +317,10 @@ static int membarrier_register_private_expedited(int flags)
  * command specified does not exist, not available on the running
  * kernel, or if the command argument is invalid, this system call
  * returns -EINVAL. For a given command, with flags argument set to 0,
- * this system call is guaranteed to always return the same value until
- * reboot.
+ * if this system call returns -ENOSYS or -EINVAL, it is guaranteed to
+ * always return the same value until reboot. In addition, it can return
+ * -ENOMEM if there is not enough memory available to perform the system
+ * call.
  *
  * All memory accesses performed in program order from each targeted thread
  * is guaranteed to be ordered with respect to sys_membarrier(). If we use
index b3cb895..0db2c1b 100644 (file)
@@ -911,6 +911,10 @@ struct rq {
 
        atomic_t                nr_iowait;
 
+#ifdef CONFIG_MEMBARRIER
+       int membarrier_state;
+#endif
+
 #ifdef CONFIG_SMP
        struct root_domain              *rd;
        struct sched_domain __rcu       *sd;
@@ -2438,3 +2442,33 @@ static inline bool sched_energy_enabled(void)
 static inline bool sched_energy_enabled(void) { return false; }
 
 #endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
+
+#ifdef CONFIG_MEMBARRIER
+/*
+ * The scheduler provides memory barriers required by membarrier between:
+ * - prior user-space memory accesses and store to rq->membarrier_state,
+ * - store to rq->membarrier_state and following user-space memory accesses.
+ * In the same way it provides those guarantees around store to rq->curr.
+ */
+static inline void membarrier_switch_mm(struct rq *rq,
+                                       struct mm_struct *prev_mm,
+                                       struct mm_struct *next_mm)
+{
+       int membarrier_state;
+
+       if (prev_mm == next_mm)
+               return;
+
+       membarrier_state = atomic_read(&next_mm->membarrier_state);
+       if (READ_ONCE(rq->membarrier_state) == membarrier_state)
+               return;
+
+       WRITE_ONCE(rq->membarrier_state, membarrier_state);
+}
+#else
+static inline void membarrier_switch_mm(struct rq *rq,
+                                       struct mm_struct *prev_mm,
+                                       struct mm_struct *next_mm)
+{
+}
+#endif
index b5667a2..49b835f 100644 (file)
@@ -1948,7 +1948,7 @@ next_level:
 static int
 build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr)
 {
-       enum s_alloc alloc_state;
+       enum s_alloc alloc_state = sa_none;
        struct sched_domain *sd;
        struct s_data d;
        struct rq *rq = NULL;
@@ -1956,6 +1956,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
        struct sched_domain_topology_level *tl_asym;
        bool has_asym = false;
 
+       if (WARN_ON(cpumask_empty(cpu_map)))
+               goto error;
+
        alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
        if (alloc_state != sa_rootdomain)
                goto error;
@@ -2026,7 +2029,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
        rcu_read_unlock();
 
        if (has_asym)
-               static_branch_enable_cpuslocked(&sched_asym_cpucapacity);
+               static_branch_inc_cpuslocked(&sched_asym_cpucapacity);
 
        if (rq && sched_debug_enabled) {
                pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
@@ -2121,8 +2124,12 @@ int sched_init_domains(const struct cpumask *cpu_map)
  */
 static void detach_destroy_domains(const struct cpumask *cpu_map)
 {
+       unsigned int cpu = cpumask_any(cpu_map);
        int i;
 
+       if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu)))
+               static_branch_dec_cpuslocked(&sched_asym_cpucapacity);
+
        rcu_read_lock();
        for_each_cpu(i, cpu_map)
                cpu_attach_domain(NULL, &def_root_domain, i);
index c7031a2..998d50e 100644 (file)
@@ -7,6 +7,7 @@
  * Copyright (C) 2010          SUSE Linux Products GmbH
  * Copyright (C) 2010          Tejun Heo <tj@kernel.org>
  */
+#include <linux/compiler.h>
 #include <linux/completion.h>
 #include <linux/cpu.h>
 #include <linux/init.h>
@@ -167,7 +168,7 @@ static void set_state(struct multi_stop_data *msdata,
        /* Reset ack counter. */
        atomic_set(&msdata->thread_ack, msdata->num_threads);
        smp_wmb();
-       msdata->state = newstate;
+       WRITE_ONCE(msdata->state, newstate);
 }
 
 /* Last one to ack a state moves to the next state. */
@@ -186,7 +187,7 @@ void __weak stop_machine_yield(const struct cpumask *cpumask)
 static int multi_cpu_stop(void *data)
 {
        struct multi_stop_data *msdata = data;
-       enum multi_stop_state curstate = MULTI_STOP_NONE;
+       enum multi_stop_state newstate, curstate = MULTI_STOP_NONE;
        int cpu = smp_processor_id(), err = 0;
        const struct cpumask *cpumask;
        unsigned long flags;
@@ -210,8 +211,9 @@ static int multi_cpu_stop(void *data)
        do {
                /* Chill out and ensure we re-read multi_stop_state. */
                stop_machine_yield(cpumask);
-               if (msdata->state != curstate) {
-                       curstate = msdata->state;
+               newstate = READ_ONCE(msdata->state);
+               if (newstate != curstate) {
+                       curstate = newstate;
                        switch (curstate) {
                        case MULTI_STOP_DISABLE_IRQ:
                                local_irq_disable();
index 078950d..b6f2f35 100644 (file)
@@ -163,7 +163,7 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
 #ifdef CONFIG_SPARC
 #endif
 
-#ifdef __hppa__
+#ifdef CONFIG_PARISC
 extern int pwrsw_enabled;
 #endif
 
@@ -264,7 +264,8 @@ extern struct ctl_table epoll_table[];
 extern struct ctl_table firmware_config_table[];
 #endif
 
-#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
+#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
+    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
 int sysctl_legacy_va_layout;
 #endif
 
@@ -619,7 +620,7 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = proc_dointvec,
        },
 #endif
-#ifdef __hppa__
+#ifdef CONFIG_PARISC
        {
                .procname       = "soft-power",
                .data           = &pwrsw_enabled,
@@ -1573,7 +1574,8 @@ static struct ctl_table vm_table[] = {
                .proc_handler   = proc_dointvec,
                .extra1         = SYSCTL_ZERO,
        },
-#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
+#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
+    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
        {
                .procname       = "legacy_va_layout",
                .data           = &sysctl_legacy_va_layout,
index 0d4dc24..6560553 100644 (file)
@@ -164,7 +164,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
        struct hrtimer_clock_base *base;
 
        for (;;) {
-               base = timer->base;
+               base = READ_ONCE(timer->base);
                if (likely(base != &migration_base)) {
                        raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
                        if (likely(base == timer->base))
@@ -244,7 +244,7 @@ again:
                        return base;
 
                /* See the comment in lock_hrtimer_base() */
-               timer->base = &migration_base;
+               WRITE_ONCE(timer->base, &migration_base);
                raw_spin_unlock(&base->cpu_base->lock);
                raw_spin_lock(&new_base->cpu_base->lock);
 
@@ -253,10 +253,10 @@ again:
                        raw_spin_unlock(&new_base->cpu_base->lock);
                        raw_spin_lock(&base->cpu_base->lock);
                        new_cpu_base = this_cpu_base;
-                       timer->base = base;
+                       WRITE_ONCE(timer->base, base);
                        goto again;
                }
-               timer->base = new_base;
+               WRITE_ONCE(timer->base, new_base);
        } else {
                if (new_cpu_base != this_cpu_base &&
                    hrtimer_check_target(timer, new_base)) {
index 92a4319..42d512f 100644 (file)
@@ -266,7 +266,7 @@ static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic,
 /**
  * thread_group_sample_cputime - Sample cputime for a given task
  * @tsk:       Task for which cputime needs to be started
- * @iimes:     Storage for time samples
+ * @samples:   Storage for time samples
  *
  * Called from sys_getitimer() to calculate the expiry time of an active
  * timer. That means group cputime accounting is already active. Called
@@ -1038,12 +1038,12 @@ unlock:
  * member of @pct->bases[CLK].nextevt. False otherwise
  */
 static inline bool
-task_cputimers_expired(const u64 *sample, struct posix_cputimers *pct)
+task_cputimers_expired(const u64 *samples, struct posix_cputimers *pct)
 {
        int i;
 
        for (i = 0; i < CPUCLOCK_MAX; i++) {
-               if (sample[i] >= pct->bases[i].nextevt)
+               if (samples[i] >= pct->bases[i].nextevt)
                        return true;
        }
        return false;
index 142b076..dbd6905 100644 (file)
@@ -17,6 +17,8 @@
 #include <linux/seqlock.h>
 #include <linux/bitops.h>
 
+#include "timekeeping.h"
+
 /**
  * struct clock_read_data - data required to read from sched_clock()
  *
index c1f5bb5..b5a65e2 100644 (file)
@@ -42,39 +42,39 @@ static int bc_shutdown(struct clock_event_device *evt)
  */
 static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
 {
-       int bc_moved;
        /*
-        * We try to cancel the timer first. If the callback is on
-        * flight on some other cpu then we let it handle it. If we
-        * were able to cancel the timer nothing can rearm it as we
-        * own broadcast_lock.
+        * This is called either from enter/exit idle code or from the
+        * broadcast handler. In all cases tick_broadcast_lock is held.
         *
-        * However we can also be called from the event handler of
-        * ce_broadcast_hrtimer itself when it expires. We cannot
-        * restart the timer because we are in the callback, but we
-        * can set the expiry time and let the callback return
-        * HRTIMER_RESTART.
+        * hrtimer_cancel() cannot be called here neither from the
+        * broadcast handler nor from the enter/exit idle code. The idle
+        * code can run into the problem described in bc_shutdown() and the
+        * broadcast handler cannot wait for itself to complete for obvious
+        * reasons.
         *
-        * Since we are in the idle loop at this point and because
-        * hrtimer_{start/cancel} functions call into tracing,
-        * calls to these functions must be bound within RCU_NONIDLE.
+        * Each caller tries to arm the hrtimer on its own CPU, but if the
+        * hrtimer callbback function is currently running, then
+        * hrtimer_start() cannot move it and the timer stays on the CPU on
+        * which it is assigned at the moment.
+        *
+        * As this can be called from idle code, the hrtimer_start()
+        * invocation has to be wrapped with RCU_NONIDLE() as
+        * hrtimer_start() can call into tracing.
         */
-       RCU_NONIDLE(
-               {
-                       bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0;
-                       if (bc_moved) {
-                               hrtimer_start(&bctimer, expires,
-                                             HRTIMER_MODE_ABS_PINNED_HARD);
-                       }
-               }
-       );
-
-       if (bc_moved) {
-               /* Bind the "device" to the cpu */
-               bc->bound_on = smp_processor_id();
-       } else if (bc->bound_on == smp_processor_id()) {
-               hrtimer_set_expires(&bctimer, expires);
-       }
+       RCU_NONIDLE( {
+               hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED_HARD);
+               /*
+                * The core tick broadcast mode expects bc->bound_on to be set
+                * correctly to prevent a CPU which has the broadcast hrtimer
+                * armed from going deep idle.
+                *
+                * As tick_broadcast_lock is held, nothing can change the cpu
+                * base which was just established in hrtimer_start() above. So
+                * the below access is safe even without holding the hrtimer
+                * base lock.
+                */
+               bc->bound_on = bctimer.base->cpu_base->cpu;
+       } );
        return 0;
 }
 
@@ -100,10 +100,6 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
 {
        ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer);
 
-       if (clockevent_state_oneshot(&ce_broadcast_hrtimer))
-               if (ce_broadcast_hrtimer.next_event != KTIME_MAX)
-                       return HRTIMER_RESTART;
-
        return HRTIMER_NORESTART;
 }
 
index 0e315a2..4820823 100644 (file)
@@ -1678,24 +1678,26 @@ void timer_clear_idle(void)
 static int collect_expired_timers(struct timer_base *base,
                                  struct hlist_head *heads)
 {
+       unsigned long now = READ_ONCE(jiffies);
+
        /*
         * NOHZ optimization. After a long idle sleep we need to forward the
         * base to current jiffies. Avoid a loop by searching the bitfield for
         * the next expiring timer.
         */
-       if ((long)(jiffies - base->clk) > 2) {
+       if ((long)(now - base->clk) > 2) {
                unsigned long next = __next_timer_interrupt(base);
 
                /*
                 * If the next timer is ahead of time forward to current
                 * jiffies, otherwise forward to the next expiry time:
                 */
-               if (time_after(next, jiffies)) {
+               if (time_after(next, now)) {
                        /*
                         * The call site will increment base->clk and then
                         * terminate the expiry loop immediately.
                         */
-                       base->clk = jiffies;
+                       base->clk = now;
                        return 0;
                }
                base->clk = next;
index ca1255d..44bd08f 100644 (file)
@@ -142,8 +142,13 @@ BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
 {
        int ret;
 
+       ret = security_locked_down(LOCKDOWN_BPF_READ);
+       if (ret < 0)
+               goto out;
+
        ret = probe_kernel_read(dst, unsafe_ptr, size);
        if (unlikely(ret < 0))
+out:
                memset(dst, 0, size);
 
        return ret;
@@ -500,14 +505,17 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
 };
 
-static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
-static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd);
+static DEFINE_PER_CPU(int, bpf_event_output_nest_level);
+struct bpf_nested_pt_regs {
+       struct pt_regs regs[3];
+};
+static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs);
+static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
 
 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
                     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
 {
-       struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd);
-       struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
+       int nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
        struct perf_raw_frag frag = {
                .copy           = ctx_copy,
                .size           = ctx_size,
@@ -522,12 +530,25 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
                        .data   = meta,
                },
        };
+       struct perf_sample_data *sd;
+       struct pt_regs *regs;
+       u64 ret;
+
+       if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
+               ret = -EBUSY;
+               goto out;
+       }
+       sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]);
+       regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]);
 
        perf_fetch_caller_regs(regs);
        perf_sample_data_init(sd, 0, 0);
        sd->raw = &raw;
 
-       return __bpf_perf_event_output(regs, map, flags, sd);
+       ret = __bpf_perf_event_output(regs, map, flags, sd);
+out:
+       this_cpu_dec(bpf_event_output_nest_level);
+       return ret;
 }
 
 BPF_CALL_0(bpf_get_current_task)
@@ -569,6 +590,10 @@ BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
 {
        int ret;
 
+       ret = security_locked_down(LOCKDOWN_BPF_READ);
+       if (ret < 0)
+               goto out;
+
        /*
         * The strncpy_from_unsafe() call will likely not fill the entire
         * buffer, but that's okay in this circumstance as we're probing
@@ -580,6 +605,7 @@ BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
         */
        ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
        if (unlikely(ret < 0))
+out:
                memset(dst, 0, size);
 
        return ret;
index 62a50bf..f296d89 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/clocksource.h>
 #include <linux/sched/task.h>
 #include <linux/kallsyms.h>
+#include <linux/security.h>
 #include <linux/seq_file.h>
 #include <linux/tracefs.h>
 #include <linux/hardirq.h>
@@ -3486,6 +3487,11 @@ static int
 ftrace_avail_open(struct inode *inode, struct file *file)
 {
        struct ftrace_iterator *iter;
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
 
        if (unlikely(ftrace_disabled))
                return -ENODEV;
@@ -3505,6 +3511,15 @@ ftrace_enabled_open(struct inode *inode, struct file *file)
 {
        struct ftrace_iterator *iter;
 
+       /*
+        * This shows us what functions are currently being
+        * traced and by what. Not sure if we want lockdown
+        * to hide such critical information for an admin.
+        * Although, perhaps it can show information we don't
+        * want people to see, but if something is tracing
+        * something, we probably want to know about it.
+        */
+
        iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
        if (!iter)
                return -ENOMEM;
@@ -3540,21 +3555,22 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
        struct ftrace_hash *hash;
        struct list_head *mod_head;
        struct trace_array *tr = ops->private;
-       int ret = 0;
+       int ret = -ENOMEM;
 
        ftrace_ops_init(ops);
 
        if (unlikely(ftrace_disabled))
                return -ENODEV;
 
+       if (tracing_check_open_get_tr(tr))
+               return -ENODEV;
+
        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
        if (!iter)
-               return -ENOMEM;
+               goto out;
 
-       if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) {
-               kfree(iter);
-               return -ENOMEM;
-       }
+       if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX))
+               goto out;
 
        iter->ops = ops;
        iter->flags = flag;
@@ -3584,13 +3600,13 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
 
                if (!iter->hash) {
                        trace_parser_put(&iter->parser);
-                       kfree(iter);
-                       ret = -ENOMEM;
                        goto out_unlock;
                }
        } else
                iter->hash = hash;
 
+       ret = 0;
+
        if (file->f_mode & FMODE_READ) {
                iter->pg = ftrace_pages_start;
 
@@ -3602,7 +3618,6 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
                        /* Failed */
                        free_ftrace_hash(iter->hash);
                        trace_parser_put(&iter->parser);
-                       kfree(iter);
                }
        } else
                file->private_data = iter;
@@ -3610,6 +3625,13 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
  out_unlock:
        mutex_unlock(&ops->func_hash->regex_lock);
 
+ out:
+       if (ret) {
+               kfree(iter);
+               if (tr)
+                       trace_array_put(tr);
+       }
+
        return ret;
 }
 
@@ -3618,6 +3640,7 @@ ftrace_filter_open(struct inode *inode, struct file *file)
 {
        struct ftrace_ops *ops = inode->i_private;
 
+       /* Checks for tracefs lockdown */
        return ftrace_regex_open(ops,
                        FTRACE_ITER_FILTER | FTRACE_ITER_DO_PROBES,
                        inode, file);
@@ -3628,6 +3651,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
 {
        struct ftrace_ops *ops = inode->i_private;
 
+       /* Checks for tracefs lockdown */
        return ftrace_regex_open(ops, FTRACE_ITER_NOTRACE,
                                 inode, file);
 }
@@ -5037,6 +5061,8 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
 
        mutex_unlock(&iter->ops->func_hash->regex_lock);
        free_ftrace_hash(iter->hash);
+       if (iter->tr)
+               trace_array_put(iter->tr);
        kfree(iter);
 
        return 0;
@@ -5194,9 +5220,13 @@ static int
 __ftrace_graph_open(struct inode *inode, struct file *file,
                    struct ftrace_graph_data *fgd)
 {
-       int ret = 0;
+       int ret;
        struct ftrace_hash *new_hash = NULL;
 
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
+
        if (file->f_mode & FMODE_WRITE) {
                const int size_bits = FTRACE_HASH_DEFAULT_BITS;
 
@@ -6537,8 +6567,9 @@ ftrace_pid_open(struct inode *inode, struct file *file)
        struct seq_file *m;
        int ret = 0;
 
-       if (trace_array_get(tr) < 0)
-               return -ENODEV;
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
 
        if ((file->f_mode & FMODE_WRITE) &&
            (file->f_flags & O_TRUNC))
index 252f79c..6a0ee91 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/stacktrace.h>
 #include <linux/writeback.h>
 #include <linux/kallsyms.h>
+#include <linux/security.h>
 #include <linux/seq_file.h>
 #include <linux/notifier.h>
 #include <linux/irqflags.h>
@@ -304,6 +305,23 @@ void trace_array_put(struct trace_array *this_tr)
        mutex_unlock(&trace_types_lock);
 }
 
+int tracing_check_open_get_tr(struct trace_array *tr)
+{
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
+
+       if (tracing_disabled)
+               return -ENODEV;
+
+       if (tr && trace_array_get(tr) < 0)
+               return -ENODEV;
+
+       return 0;
+}
+
 int call_filter_check_discard(struct trace_event_call *call, void *rec,
                              struct ring_buffer *buffer,
                              struct ring_buffer_event *event)
@@ -4140,8 +4158,11 @@ release:
 
 int tracing_open_generic(struct inode *inode, struct file *filp)
 {
-       if (tracing_disabled)
-               return -ENODEV;
+       int ret;
+
+       ret = tracing_check_open_get_tr(NULL);
+       if (ret)
+               return ret;
 
        filp->private_data = inode->i_private;
        return 0;
@@ -4156,15 +4177,14 @@ bool tracing_is_disabled(void)
  * Open and update trace_array ref count.
  * Must have the current trace_array passed to it.
  */
-static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
+int tracing_open_generic_tr(struct inode *inode, struct file *filp)
 {
        struct trace_array *tr = inode->i_private;
+       int ret;
 
-       if (tracing_disabled)
-               return -ENODEV;
-
-       if (trace_array_get(tr) < 0)
-               return -ENODEV;
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
 
        filp->private_data = inode->i_private;
 
@@ -4233,10 +4253,11 @@ static int tracing_open(struct inode *inode, struct file *file)
 {
        struct trace_array *tr = inode->i_private;
        struct trace_iterator *iter;
-       int ret = 0;
+       int ret;
 
-       if (trace_array_get(tr) < 0)
-               return -ENODEV;
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
 
        /* If this file was open for write, then erase contents */
        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
@@ -4352,12 +4373,15 @@ static int show_traces_open(struct inode *inode, struct file *file)
        struct seq_file *m;
        int ret;
 
-       if (tracing_disabled)
-               return -ENODEV;
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
 
        ret = seq_open(file, &show_traces_seq_ops);
-       if (ret)
+       if (ret) {
+               trace_array_put(tr);
                return ret;
+       }
 
        m = file->private_data;
        m->private = tr;
@@ -4365,6 +4389,14 @@ static int show_traces_open(struct inode *inode, struct file *file)
        return 0;
 }
 
+static int show_traces_release(struct inode *inode, struct file *file)
+{
+       struct trace_array *tr = inode->i_private;
+
+       trace_array_put(tr);
+       return seq_release(inode, file);
+}
+
 static ssize_t
 tracing_write_stub(struct file *filp, const char __user *ubuf,
                   size_t count, loff_t *ppos)
@@ -4395,8 +4427,8 @@ static const struct file_operations tracing_fops = {
 static const struct file_operations show_traces_fops = {
        .open           = show_traces_open,
        .read           = seq_read,
-       .release        = seq_release,
        .llseek         = seq_lseek,
+       .release        = show_traces_release,
 };
 
 static ssize_t
@@ -4697,11 +4729,9 @@ static int tracing_trace_options_open(struct inode *inode, struct file *file)
        struct trace_array *tr = inode->i_private;
        int ret;
 
-       if (tracing_disabled)
-               return -ENODEV;
-
-       if (trace_array_get(tr) < 0)
-               return -ENODEV;
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
 
        ret = single_open(file, tracing_trace_options_show, inode->i_private);
        if (ret < 0)
@@ -5038,8 +5068,11 @@ static const struct seq_operations tracing_saved_tgids_seq_ops = {
 
 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
 {
-       if (tracing_disabled)
-               return -ENODEV;
+       int ret;
+
+       ret = tracing_check_open_get_tr(NULL);
+       if (ret)
+               return ret;
 
        return seq_open(filp, &tracing_saved_tgids_seq_ops);
 }
@@ -5115,8 +5148,11 @@ static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
 
 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
 {
-       if (tracing_disabled)
-               return -ENODEV;
+       int ret;
+
+       ret = tracing_check_open_get_tr(NULL);
+       if (ret)
+               return ret;
 
        return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
 }
@@ -5280,8 +5316,11 @@ static const struct seq_operations tracing_eval_map_seq_ops = {
 
 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
 {
-       if (tracing_disabled)
-               return -ENODEV;
+       int ret;
+
+       ret = tracing_check_open_get_tr(NULL);
+       if (ret)
+               return ret;
 
        return seq_open(filp, &tracing_eval_map_seq_ops);
 }
@@ -5804,13 +5843,11 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 {
        struct trace_array *tr = inode->i_private;
        struct trace_iterator *iter;
-       int ret = 0;
-
-       if (tracing_disabled)
-               return -ENODEV;
+       int ret;
 
-       if (trace_array_get(tr) < 0)
-               return -ENODEV;
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
 
        mutex_lock(&trace_types_lock);
 
@@ -5999,6 +6036,7 @@ waitagain:
               sizeof(struct trace_iterator) -
               offsetof(struct trace_iterator, seq));
        cpumask_clear(iter->started);
+       trace_seq_init(&iter->seq);
        iter->pos = -1;
 
        trace_event_read_lock();
@@ -6547,11 +6585,9 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
        struct trace_array *tr = inode->i_private;
        int ret;
 
-       if (tracing_disabled)
-               return -ENODEV;
-
-       if (trace_array_get(tr))
-               return -ENODEV;
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
 
        ret = single_open(file, tracing_clock_show, inode->i_private);
        if (ret < 0)
@@ -6581,11 +6617,9 @@ static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
        struct trace_array *tr = inode->i_private;
        int ret;
 
-       if (tracing_disabled)
-               return -ENODEV;
-
-       if (trace_array_get(tr))
-               return -ENODEV;
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
 
        ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
        if (ret < 0)
@@ -6638,10 +6672,11 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file)
        struct trace_array *tr = inode->i_private;
        struct trace_iterator *iter;
        struct seq_file *m;
-       int ret = 0;
+       int ret;
 
-       if (trace_array_get(tr) < 0)
-               return -ENODEV;
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
 
        if (file->f_mode & FMODE_READ) {
                iter = __tracing_open(inode, file, true);
@@ -6786,6 +6821,7 @@ static int snapshot_raw_open(struct inode *inode, struct file *filp)
        struct ftrace_buffer_info *info;
        int ret;
 
+       /* The following checks for tracefs lockdown */
        ret = tracing_buffers_open(inode, filp);
        if (ret < 0)
                return ret;
@@ -7105,8 +7141,9 @@ static int tracing_err_log_open(struct inode *inode, struct file *file)
        struct trace_array *tr = inode->i_private;
        int ret = 0;
 
-       if (trace_array_get(tr) < 0)
-               return -ENODEV;
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
 
        /* If this file was opened for write, then erase contents */
        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
@@ -7157,11 +7194,9 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
        struct ftrace_buffer_info *info;
        int ret;
 
-       if (tracing_disabled)
-               return -ENODEV;
-
-       if (trace_array_get(tr) < 0)
-               return -ENODEV;
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
 
        info = kzalloc(sizeof(*info), GFP_KERNEL);
        if (!info) {
index 26b0a08..d685c61 100644 (file)
@@ -338,6 +338,7 @@ extern struct mutex trace_types_lock;
 
 extern int trace_array_get(struct trace_array *tr);
 extern void trace_array_put(struct trace_array *tr);
+extern int tracing_check_open_get_tr(struct trace_array *tr);
 
 extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs);
 extern int tracing_set_clock(struct trace_array *tr, const char *clockstr);
@@ -365,11 +366,11 @@ static inline struct trace_array *top_trace_array(void)
        __builtin_types_compatible_p(typeof(var), type *)
 
 #undef IF_ASSIGN
-#define IF_ASSIGN(var, entry, etype, id)               \
-       if (FTRACE_CMP_TYPE(var, etype)) {              \
-               var = (typeof(var))(entry);             \
-               WARN_ON(id && (entry)->type != id);     \
-               break;                                  \
+#define IF_ASSIGN(var, entry, etype, id)                       \
+       if (FTRACE_CMP_TYPE(var, etype)) {                      \
+               var = (typeof(var))(entry);                     \
+               WARN_ON(id != 0 && (entry)->type != id);        \
+               break;                                          \
        }
 
 /* Will cause compile errors if type is not found. */
@@ -681,6 +682,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf);
 void tracing_reset_current(int cpu);
 void tracing_reset_all_online_cpus(void);
 int tracing_open_generic(struct inode *inode, struct file *filp);
+int tracing_open_generic_tr(struct inode *inode, struct file *filp);
 bool tracing_is_disabled(void);
 bool tracer_tracing_is_on(struct trace_array *tr);
 void tracer_tracing_on(struct trace_array *tr);
index a41fed4..89779eb 100644 (file)
@@ -174,6 +174,10 @@ static int dyn_event_open(struct inode *inode, struct file *file)
 {
        int ret;
 
+       ret = tracing_check_open_get_tr(NULL);
+       if (ret)
+               return ret;
+
        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
                ret = dyn_events_release_all(NULL);
                if (ret < 0)
index 0892e38..a9dfa04 100644 (file)
@@ -272,9 +272,11 @@ int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe)
                goto out;
        }
 
+       mutex_lock(&event_mutex);
        ret = perf_trace_event_init(tp_event, p_event);
        if (ret)
                destroy_local_trace_kprobe(tp_event);
+       mutex_unlock(&event_mutex);
 out:
        kfree(func);
        return ret;
@@ -282,8 +284,10 @@ out:
 
 void perf_kprobe_destroy(struct perf_event *p_event)
 {
+       mutex_lock(&event_mutex);
        perf_trace_event_close(p_event);
        perf_trace_event_unreg(p_event);
+       mutex_unlock(&event_mutex);
 
        destroy_local_trace_kprobe(p_event->tp_event);
 }
index b89cdfe..fba87d1 100644 (file)
@@ -12,6 +12,7 @@
 #define pr_fmt(fmt) fmt
 
 #include <linux/workqueue.h>
+#include <linux/security.h>
 #include <linux/spinlock.h>
 #include <linux/kthread.h>
 #include <linux/tracefs.h>
@@ -1294,6 +1295,8 @@ static int trace_format_open(struct inode *inode, struct file *file)
        struct seq_file *m;
        int ret;
 
+       /* Do we want to hide event format files on tracefs lockdown? */
+
        ret = seq_open(file, &trace_format_seq_ops);
        if (ret < 0)
                return ret;
@@ -1440,28 +1443,17 @@ static int system_tr_open(struct inode *inode, struct file *filp)
        struct trace_array *tr = inode->i_private;
        int ret;
 
-       if (tracing_is_disabled())
-               return -ENODEV;
-
-       if (trace_array_get(tr) < 0)
-               return -ENODEV;
-
        /* Make a temporary dir that has no system but points to tr */
        dir = kzalloc(sizeof(*dir), GFP_KERNEL);
-       if (!dir) {
-               trace_array_put(tr);
+       if (!dir)
                return -ENOMEM;
-       }
 
-       dir->tr = tr;
-
-       ret = tracing_open_generic(inode, filp);
+       ret = tracing_open_generic_tr(inode, filp);
        if (ret < 0) {
-               trace_array_put(tr);
                kfree(dir);
                return ret;
        }
-
+       dir->tr = tr;
        filp->private_data = dir;
 
        return 0;
@@ -1771,6 +1763,10 @@ ftrace_event_open(struct inode *inode, struct file *file,
        struct seq_file *m;
        int ret;
 
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
+
        ret = seq_open(file, seq_ops);
        if (ret < 0)
                return ret;
@@ -1795,6 +1791,7 @@ ftrace_event_avail_open(struct inode *inode, struct file *file)
 {
        const struct seq_operations *seq_ops = &show_event_seq_ops;
 
+       /* Checks for tracefs lockdown */
        return ftrace_event_open(inode, file, seq_ops);
 }
 
@@ -1805,8 +1802,9 @@ ftrace_event_set_open(struct inode *inode, struct file *file)
        struct trace_array *tr = inode->i_private;
        int ret;
 
-       if (trace_array_get(tr) < 0)
-               return -ENODEV;
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
 
        if ((file->f_mode & FMODE_WRITE) &&
            (file->f_flags & O_TRUNC))
@@ -1825,8 +1823,9 @@ ftrace_event_set_pid_open(struct inode *inode, struct file *file)
        struct trace_array *tr = inode->i_private;
        int ret;
 
-       if (trace_array_get(tr) < 0)
-               return -ENODEV;
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
 
        if ((file->f_mode & FMODE_WRITE) &&
            (file->f_flags & O_TRUNC))
index c773b8f..c9a74f8 100644 (file)
@@ -452,8 +452,10 @@ predicate_parse(const char *str, int nr_parens, int nr_preds,
 
                switch (*next) {
                case '(':                                       /* #2 */
-                       if (top - op_stack > nr_parens)
-                               return ERR_PTR(-EINVAL);
+                       if (top - op_stack > nr_parens) {
+                               ret = -EINVAL;
+                               goto out_free;
+                       }
                        *(++top) = invert;
                        continue;
                case '!':                                       /* #3 */
index 9468bd8..7482a14 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/module.h>
 #include <linux/kallsyms.h>
+#include <linux/security.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/stacktrace.h>
@@ -678,6 +679,8 @@ static bool synth_field_signed(char *type)
 {
        if (str_has_prefix(type, "u"))
                return false;
+       if (strcmp(type, "gfp_t") == 0)
+               return false;
 
        return true;
 }
@@ -1448,6 +1451,10 @@ static int synth_events_open(struct inode *inode, struct file *file)
 {
        int ret;
 
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
+
        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
                ret = dyn_events_release_all(&synth_event_ops);
                if (ret < 0)
@@ -1680,7 +1687,7 @@ static int save_hist_vars(struct hist_trigger_data *hist_data)
        if (var_data)
                return 0;
 
-       if (trace_array_get(tr) < 0)
+       if (tracing_check_open_get_tr(tr))
                return -ENODEV;
 
        var_data = kzalloc(sizeof(*var_data), GFP_KERNEL);
@@ -5515,6 +5522,12 @@ static int hist_show(struct seq_file *m, void *v)
 
 static int event_hist_open(struct inode *inode, struct file *file)
 {
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
+
        return single_open(file, hist_show, file);
 }
 
index 2a2912c..2cd53ca 100644 (file)
@@ -5,6 +5,7 @@
  * Copyright (C) 2013 Tom Zanussi <tom.zanussi@linux.intel.com>
  */
 
+#include <linux/security.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/mutex.h>
@@ -173,7 +174,11 @@ static const struct seq_operations event_triggers_seq_ops = {
 
 static int event_trigger_regex_open(struct inode *inode, struct file *file)
 {
-       int ret = 0;
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
 
        mutex_lock(&event_mutex);
 
@@ -292,6 +297,7 @@ event_trigger_write(struct file *filp, const char __user *ubuf,
 static int
 event_trigger_open(struct inode *inode, struct file *filp)
 {
+       /* Checks for tracefs lockdown */
        return event_trigger_regex_open(inode, filp);
 }
 
index fa95139..862f4b0 100644 (file)
@@ -150,7 +150,7 @@ void trace_hwlat_callback(bool enter)
                if (enter)
                        nmi_ts_start = time_get();
                else
-                       nmi_total_ts = time_get() - nmi_ts_start;
+                       nmi_total_ts += time_get() - nmi_ts_start;
        }
 
        if (enter)
@@ -256,6 +256,8 @@ static int get_sample(void)
                /* Keep a running maximum ever recorded hardware latency */
                if (sample > tr->max_latency)
                        tr->max_latency = sample;
+               if (outer_sample > tr->max_latency)
+                       tr->max_latency = outer_sample;
        }
 
 out:
index a6697e2..1552a95 100644 (file)
@@ -7,6 +7,7 @@
  */
 #define pr_fmt(fmt)    "trace_kprobe: " fmt
 
+#include <linux/security.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
 #include <linux/rculist.h>
@@ -460,6 +461,10 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
 {
        int i, ret;
 
+       ret = security_locked_down(LOCKDOWN_KPROBES);
+       if (ret)
+               return ret;
+
        if (trace_kprobe_is_registered(tk))
                return -EINVAL;
 
@@ -549,10 +554,11 @@ static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig,
                for (i = 0; i < orig->tp.nr_args; i++) {
                        if (strcmp(orig->tp.args[i].comm,
                                   comp->tp.args[i].comm))
-                               continue;
+                               break;
                }
 
-               return true;
+               if (i == orig->tp.nr_args)
+                       return true;
        }
 
        return false;
@@ -930,6 +936,10 @@ static int probes_open(struct inode *inode, struct file *file)
 {
        int ret;
 
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
+
        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
                ret = dyn_events_release_all(&trace_kprobe_ops);
                if (ret < 0)
@@ -982,6 +992,12 @@ static const struct seq_operations profile_seq_op = {
 
 static int profile_open(struct inode *inode, struct file *file)
 {
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
+
        return seq_open(file, &profile_seq_op);
 }
 
index c3fd849..d4e31e9 100644 (file)
@@ -6,6 +6,7 @@
  *
  */
 #include <linux/seq_file.h>
+#include <linux/security.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/ftrace.h>
@@ -348,6 +349,12 @@ static const struct seq_operations show_format_seq_ops = {
 static int
 ftrace_formats_open(struct inode *inode, struct file *file)
 {
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
+
        return seq_open(file, &show_format_seq_ops);
 }
 
index baf58a3..905b10a 100644 (file)
@@ -178,6 +178,16 @@ void __trace_probe_log_err(int offset, int err_type)
        if (!command)
                return;
 
+       if (trace_probe_log.index >= trace_probe_log.argc) {
+               /**
+                * Set the error position is next to the last arg + space.
+                * Note that len includes the terminal null and the cursor
+                * appaers at pos + 1.
+                */
+               pos = len;
+               offset = 0;
+       }
+
        /* And make a command string from argv array */
        p = command;
        for (i = 0; i < trace_probe_log.argc; i++) {
@@ -1084,6 +1094,12 @@ int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b)
 {
        int i;
 
+       /* In case of more arguments */
+       if (a->nr_args < b->nr_args)
+               return a->nr_args + 1;
+       if (a->nr_args > b->nr_args)
+               return b->nr_args + 1;
+
        for (i = 0; i < a->nr_args; i++) {
                if ((b->nr_args <= i) ||
                    ((a->args[i].type != b->args[i].type) ||
index ec9a34a..4df9a20 100644 (file)
@@ -5,6 +5,7 @@
  */
 #include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
+#include <linux/security.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
@@ -470,6 +471,12 @@ static const struct seq_operations stack_trace_seq_ops = {
 
 static int stack_trace_open(struct inode *inode, struct file *file)
 {
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
+
        return seq_open(file, &stack_trace_seq_ops);
 }
 
@@ -487,6 +494,7 @@ stack_trace_filter_open(struct inode *inode, struct file *file)
 {
        struct ftrace_ops *ops = inode->i_private;
 
+       /* Checks for tracefs lockdown */
        return ftrace_regex_open(ops, FTRACE_ITER_FILTER,
                                 inode, file);
 }
index 75bf1bc..9ab0a1a 100644 (file)
@@ -9,7 +9,7 @@
  *
  */
 
-
+#include <linux/security.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/rbtree.h>
@@ -238,6 +238,10 @@ static int tracing_stat_open(struct inode *inode, struct file *file)
        struct seq_file *m;
        struct stat_session *session = inode->i_private;
 
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
+
        ret = stat_seq_init(session);
        if (ret)
                return ret;
index 34dd6d0..352073d 100644 (file)
@@ -7,6 +7,7 @@
  */
 #define pr_fmt(fmt)    "trace_uprobe: " fmt
 
+#include <linux/security.h>
 #include <linux/ctype.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
@@ -431,10 +432,11 @@ static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig,
                for (i = 0; i < orig->tp.nr_args; i++) {
                        if (strcmp(orig->tp.args[i].comm,
                                   comp->tp.args[i].comm))
-                               continue;
+                               break;
                }
 
-               return true;
+               if (i == orig->tp.nr_args)
+                       return true;
        }
 
        return false;
@@ -768,6 +770,10 @@ static int probes_open(struct inode *inode, struct file *file)
 {
        int ret;
 
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
+
        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
                ret = dyn_events_release_all(&trace_uprobe_ops);
                if (ret)
@@ -817,6 +823,12 @@ static const struct seq_operations profile_seq_op = {
 
 static int profile_open(struct inode *inode, struct file *file)
 {
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
+
        return seq_open(file, &profile_seq_op);
 }
 
index 4e6b1c3..183f92a 100644 (file)
@@ -555,11 +555,10 @@ config SIGNATURE
          Implementation is done using GnuPG MPI library
 
 config DIMLIB
-       bool "DIM library"
-       default y
+       bool
        help
          Dynamic Interrupt Moderation library.
-         Implements an algorithm for dynamically change CQ modertion values
+         Implements an algorithm for dynamically changing CQ moderation values
          according to run time performance.
 
 #
index e0e1478..93d97f9 100644 (file)
@@ -311,7 +311,7 @@ config HEADERS_CHECK
          relevant for userspace, say 'Y'.
 
 config OPTIMIZE_INLINING
-       bool "Allow compiler to uninline functions marked 'inline'"
+       def_bool y
        help
          This option determines if the kernel forces gcc to inline the functions
          developers have marked 'inline'. Doing so takes away freedom from gcc to
@@ -322,8 +322,6 @@ config OPTIMIZE_INLINING
          decision will become the default in the future. Until then this option
          is there to test gcc for this.
 
-         If unsure, say N.
-
 config DEBUG_SECTION_MISMATCH
        bool "Enable full Section mismatch analysis"
        help
@@ -576,17 +574,18 @@ config DEBUG_KMEMLEAK
          In order to access the kmemleak file, debugfs needs to be
          mounted (usually at /sys/kernel/debug).
 
-config DEBUG_KMEMLEAK_EARLY_LOG_SIZE
-       int "Maximum kmemleak early log entries"
+config DEBUG_KMEMLEAK_MEM_POOL_SIZE
+       int "Kmemleak memory pool size"
        depends on DEBUG_KMEMLEAK
-       range 200 40000
-       default 400
+       range 200 1000000
+       default 16000
        help
          Kmemleak must track all the memory allocations to avoid
          reporting false positives. Since memory may be allocated or
-         freed before kmemleak is initialised, an early log buffer is
-         used to store these actions. If kmemleak reports "early log
-         buffer exceeded", please increase this value.
+         freed before kmemleak is fully initialised, use a static pool
+         of metadata objects to track such callbacks. After kmemleak is
+         fully initialised, this memory pool acts as an emergency one
+         if slab allocations fail.
 
 config DEBUG_KMEMLEAK_TEST
        tristate "Simple test for the kernel memory leak detector"
index 7fa97a8..6c9682c 100644 (file)
@@ -134,6 +134,14 @@ config KASAN_S390_4_LEVEL_PAGING
          to 3TB of RAM with KASan enabled). This options allows to force
          4-level paging instead.
 
+config KASAN_SW_TAGS_IDENTIFY
+       bool "Enable memory corruption identification"
+       depends on KASAN_SW_TAGS
+       help
+         This option enables best-effort identification of bug type
+         (use-after-free or out-of-bounds) at the cost of increased
+         memory consumption.
+
 config TEST_KASAN
        tristate "Module for testing KASAN for bug detection"
        depends on m && KASAN
index 1077366..8c98af0 100644 (file)
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -181,6 +181,15 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
                }
        }
 
+       /*
+        * BUG() and WARN_ON() families don't print a custom debug message
+        * before triggering the exception handler, so we must add the
+        * "cut here" line now. WARN() issues its own "cut here" before the
+        * extra debugging message it writes before triggering the handler.
+        */
+       if ((bug->flags & BUGFLAG_NO_CUT_HERE) == 0)
+               printk(KERN_DEFAULT CUT_HERE);
+
        if (warning) {
                /* this is a WARN_ON rather than BUG/BUG_ON */
                __warn(file, line, (void *)bugaddr, BUG_GET_TAINT(bug), regs,
@@ -188,8 +197,6 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
                return BUG_TRAP_TYPE_WARN;
        }
 
-       printk(KERN_DEFAULT CUT_HERE);
-
        if (file)
                pr_crit("kernel BUG at %s:%u!\n", file, line);
        else
index 5cff72f..33ffbf3 100644 (file)
@@ -106,7 +106,12 @@ retry:
                was_locked = 1;
        } else {
                local_irq_restore(flags);
-               cpu_relax();
+               /*
+                * Wait for the lock to release before jumping to
+                * atomic_cmpxchg() in order to mitigate the thundering herd
+                * problem.
+                */
+               do { cpu_relax(); } while (atomic_read(&dump_lock) != -1);
                goto retry;
        }
 
index 25da407..c3e59ca 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/init.h>
 #include <linux/sort.h>
 #include <linux/uaccess.h>
+#include <linux/extable.h>
 
 #ifndef ARCH_HAS_RELATIVE_EXTABLE
 #define ex_to_insn(x)  ((x)->insn)
index a7bafc4..f25eb11 100644 (file)
@@ -2,6 +2,7 @@
 #include <linux/export.h>
 #include <linux/generic-radix-tree.h>
 #include <linux/gfp.h>
+#include <linux/kmemleak.h>
 
 #define GENRADIX_ARY           (PAGE_SIZE / sizeof(struct genradix_node *))
 #define GENRADIX_ARY_SHIFT     ilog2(GENRADIX_ARY)
@@ -36,12 +37,12 @@ static inline size_t genradix_depth_size(unsigned depth)
 #define GENRADIX_DEPTH_MASK                            \
        ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
 
-unsigned genradix_root_to_depth(struct genradix_root *r)
+static inline unsigned genradix_root_to_depth(struct genradix_root *r)
 {
        return (unsigned long) r & GENRADIX_DEPTH_MASK;
 }
 
-struct genradix_node *genradix_root_to_node(struct genradix_root *r)
+static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r)
 {
        return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
 }
@@ -75,6 +76,27 @@ void *__genradix_ptr(struct __genradix *radix, size_t offset)
 }
 EXPORT_SYMBOL(__genradix_ptr);
 
+static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
+{
+       struct genradix_node *node;
+
+       node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO);
+
+       /*
+        * We're using pages (not slab allocations) directly for kernel data
+        * structures, so we need to explicitly inform kmemleak of them in order
+        * to avoid false positive memory leak reports.
+        */
+       kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask);
+       return node;
+}
+
+static inline void genradix_free_node(struct genradix_node *node)
+{
+       kmemleak_free(node);
+       free_page((unsigned long)node);
+}
+
 /*
  * Returns pointer to the specified byte @offset within @radix, allocating it if
  * necessary - newly allocated slots are always zeroed out:
@@ -97,8 +119,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
                        break;
 
                if (!new_node) {
-                       new_node = (void *)
-                               __get_free_page(gfp_mask|__GFP_ZERO);
+                       new_node = genradix_alloc_node(gfp_mask);
                        if (!new_node)
                                return NULL;
                }
@@ -121,8 +142,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
                n = READ_ONCE(*p);
                if (!n) {
                        if (!new_node) {
-                               new_node = (void *)
-                                       __get_free_page(gfp_mask|__GFP_ZERO);
+                               new_node = genradix_alloc_node(gfp_mask);
                                if (!new_node)
                                        return NULL;
                        }
@@ -133,7 +153,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
        }
 
        if (new_node)
-               free_page((unsigned long) new_node);
+               genradix_free_node(new_node);
 
        return &n->data[offset];
 }
@@ -191,7 +211,7 @@ static void genradix_free_recurse(struct genradix_node *n, unsigned level)
                                genradix_free_recurse(n->children[i], level - 1);
        }
 
-       free_page((unsigned long) n);
+       genradix_free_node(n);
 }
 
 int __genradix_prealloc(struct __genradix *radix, size_t size,
index b1d55b6..147133f 100644 (file)
@@ -270,25 +270,4 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
 }
 EXPORT_SYMBOL(print_hex_dump);
 
-#if !defined(CONFIG_DYNAMIC_DEBUG)
-/**
- * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params
- * @prefix_str: string to prefix each line with;
- *  caller supplies trailing spaces for alignment if desired
- * @prefix_type: controls whether prefix of an offset, address, or none
- *  is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE)
- * @buf: data blob to dump
- * @len: number of bytes in the @buf
- *
- * Calls print_hex_dump(), with log level of KERN_DEBUG,
- * rowsize of 16, groupsize of 1, and ASCII output included.
- */
-void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
-                         const void *buf, size_t len)
-{
-       print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1,
-                      buf, len, true);
-}
-EXPORT_SYMBOL(print_hex_dump_bytes);
-#endif /* !defined(CONFIG_DYNAMIC_DEBUG) */
 #endif /* defined(CONFIG_PRINTK) */
index f1e0569..639d5e7 100644 (file)
@@ -878,7 +878,7 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
        head = compound_head(page);
        v += (page - head) << PAGE_SHIFT;
 
-       if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head))))
+       if (likely(n <= v && v <= (page_size(head))))
                return true;
        WARN_ON(1);
        return false;
index ba16c08..717c940 100644 (file)
@@ -83,17 +83,19 @@ next:
                                        ALIGN((uintptr_t)ir, 4)) &&
                                        (ir < limit) && (*ir == 0))
                                ir++;
-                       for (; (ir + 4) <= limit; ir += 4) {
-                               dv = *((u32 *)ir);
-                               if (dv) {
+                       if (IS_ALIGNED((uintptr_t)ir, 4)) {
+                               for (; (ir + 4) <= limit; ir += 4) {
+                                       dv = *((u32 *)ir);
+                                       if (dv) {
 #  if defined(__LITTLE_ENDIAN)
-                                       ir += __builtin_ctz(dv) >> 3;
+                                               ir += __builtin_ctz(dv) >> 3;
 #  elif defined(__BIG_ENDIAN)
-                                       ir += __builtin_clz(dv) >> 3;
+                                               ir += __builtin_clz(dv) >> 3;
 #  else
 #    error "missing endian definition"
 #  endif
-                                       break;
+                                               break;
+                                       }
                                }
                        }
 #endif
index 62b8ee9..41ae3c7 100644 (file)
@@ -77,26 +77,10 @@ static inline void erase_cached(struct test_node *node, struct rb_root_cached *r
 }
 
 
-static inline u32 augment_recompute(struct test_node *node)
-{
-       u32 max = node->val, child_augmented;
-       if (node->rb.rb_left) {
-               child_augmented = rb_entry(node->rb.rb_left, struct test_node,
-                                          rb)->augmented;
-               if (max < child_augmented)
-                       max = child_augmented;
-       }
-       if (node->rb.rb_right) {
-               child_augmented = rb_entry(node->rb.rb_right, struct test_node,
-                                          rb)->augmented;
-               if (max < child_augmented)
-                       max = child_augmented;
-       }
-       return max;
-}
+#define NODE_VAL(node) ((node)->val)
 
-RB_DECLARE_CALLBACKS(static, augment_callbacks, struct test_node, rb,
-                    u32, augmented, augment_recompute)
+RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks,
+                        struct test_node, rb, u32, augmented, NODE_VAL)
 
 static void insert_augmented(struct test_node *node,
                             struct rb_root_cached *root)
@@ -238,7 +222,20 @@ static void check_augmented(int nr_nodes)
        check(nr_nodes);
        for (rb = rb_first(&root.rb_root); rb; rb = rb_next(rb)) {
                struct test_node *node = rb_entry(rb, struct test_node, rb);
-               WARN_ON_ONCE(node->augmented != augment_recompute(node));
+               u32 subtree, max = node->val;
+               if (node->rb.rb_left) {
+                       subtree = rb_entry(node->rb.rb_left, struct test_node,
+                                          rb)->augmented;
+                       if (max < subtree)
+                               max = subtree;
+               }
+               if (node->rb.rb_right) {
+                       subtree = rb_entry(node->rb.rb_right, struct test_node,
+                                          rb)->augmented;
+                       if (max < subtree)
+                               max = subtree;
+               }
+               WARN_ON_ONCE(node->augmented != max);
        }
 }
 
index 5c86ef4..1c26c14 100644 (file)
@@ -6,7 +6,6 @@
  */
 
 #include <linux/mm.h>
-#include <linux/quicklist.h>
 #include <linux/cma.h>
 
 void show_mem(unsigned int filter, nodemask_t *nodemask)
@@ -39,10 +38,6 @@ void show_mem(unsigned int filter, nodemask_t *nodemask)
 #ifdef CONFIG_CMA
        printk("%lu pages cma reserved\n", totalcma_pages);
 #endif
-#ifdef CONFIG_QUICKLIST
-       printk("%lu pages in pagetable cache\n",
-               quicklist_total_size());
-#endif
 #ifdef CONFIG_MEMORY_FAILURE
        printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
 #endif
index 461fb62..08ec58c 100644 (file)
@@ -173,8 +173,9 @@ EXPORT_SYMBOL(strlcpy);
  * doesn't unnecessarily force the tail of the destination buffer to be
  * zeroed.  If zeroing is desired please use strscpy_pad().
  *
- * Return: The number of characters copied (not including the trailing
- *         %NUL) or -E2BIG if the destination buffer wasn't big enough.
+ * Returns:
+ * * The number of characters copied (not including the trailing %NUL)
+ * * -E2BIG if count is 0 or @src was truncated.
  */
 ssize_t strscpy(char *dest, const char *src, size_t count)
 {
@@ -182,7 +183,7 @@ ssize_t strscpy(char *dest, const char *src, size_t count)
        size_t max = count;
        long res = 0;
 
-       if (count == 0)
+       if (count == 0 || WARN_ON_ONCE(count > INT_MAX))
                return -E2BIG;
 
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -253,8 +254,9 @@ EXPORT_SYMBOL(strscpy);
  * For full explanation of why you may want to consider using the
  * 'strscpy' functions please see the function docstring for strscpy().
  *
- * Return: The number of characters copied (not including the trailing
- *         %NUL) or -E2BIG if the destination buffer wasn't big enough.
+ * Returns:
+ * * The number of characters copied (not including the trailing %NUL)
+ * * -E2BIG if count is 0 or @src was truncated.
  */
 ssize_t strscpy_pad(char *dest, const char *src, size_t count)
 {
@@ -746,27 +748,6 @@ void *memset(void *s, int c, size_t count)
 EXPORT_SYMBOL(memset);
 #endif
 
-/**
- * memzero_explicit - Fill a region of memory (e.g. sensitive
- *                   keying data) with 0s.
- * @s: Pointer to the start of the area.
- * @count: The size of the area.
- *
- * Note: usually using memset() is just fine (!), but in cases
- * where clearing out _local_ data at the end of a scope is
- * necessary, memzero_explicit() should be used instead in
- * order to prevent the compiler from optimising away zeroing.
- *
- * memzero_explicit() doesn't need an arch-specific version as
- * it just invokes the one of memset() implicitly.
- */
-void memzero_explicit(void *s, size_t count)
-{
-       memset(s, 0, count);
-       barrier_data(s);
-}
-EXPORT_SYMBOL(memzero_explicit);
-
 #ifndef __HAVE_ARCH_MEMSET16
 /**
  * memset16() - Fill a memory area with a uint16_t
index 023ba9f..dccb95a 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <linux/mm.h>
 
 #include <asm/byteorder.h>
 #include <asm/word-at-a-time.h>
@@ -108,7 +109,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
                return 0;
 
        max_addr = user_addr_max();
-       src_addr = (unsigned long)src;
+       src_addr = (unsigned long)untagged_addr(src);
        if (likely(src_addr < max_addr)) {
                unsigned long max = max_addr - src_addr;
                long retval;
index 7f2db3f..6c0005d 100644 (file)
@@ -2,16 +2,11 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <linux/bitops.h>
 
 #include <asm/word-at-a-time.h>
 
-/* Set bits in the first 'n' bytes when loaded from memory */
-#ifdef __LITTLE_ENDIAN
-#  define aligned_byte_mask(n) ((1ul << 8*(n))-1)
-#else
-#  define aligned_byte_mask(n) (~0xfful << (BITS_PER_LONG - 8 - 8*(n)))
-#endif
-
 /*
  * Do a strnlen, return length of string *with* final '\0'.
  * 'count' is the user-supplied count, while 'max' is the
@@ -109,7 +104,7 @@ long strnlen_user(const char __user *str, long count)
                return 0;
 
        max_addr = user_addr_max();
-       src_addr = (unsigned long)str;
+       src_addr = (unsigned long)untagged_addr(str);
        if (likely(src_addr < max_addr)) {
                unsigned long max = max_addr - src_addr;
                long retval;
index b63b367..49cc4d5 100644 (file)
@@ -18,6 +18,9 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
+#include <linux/io.h>
+
+#include <asm/page.h>
 
 /*
  * Note: test functions are marked noinline so that their names appear in
@@ -337,6 +340,42 @@ static noinline void __init kmalloc_uaf2(void)
        kfree(ptr2);
 }
 
+static noinline void __init kfree_via_page(void)
+{
+       char *ptr;
+       size_t size = 8;
+       struct page *page;
+       unsigned long offset;
+
+       pr_info("invalid-free false positive (via page)\n");
+       ptr = kmalloc(size, GFP_KERNEL);
+       if (!ptr) {
+               pr_err("Allocation failed\n");
+               return;
+       }
+
+       page = virt_to_page(ptr);
+       offset = offset_in_page(ptr);
+       kfree(page_address(page) + offset);
+}
+
+static noinline void __init kfree_via_phys(void)
+{
+       char *ptr;
+       size_t size = 8;
+       phys_addr_t phys;
+
+       pr_info("invalid-free false positive (via phys)\n");
+       ptr = kmalloc(size, GFP_KERNEL);
+       if (!ptr) {
+               pr_err("Allocation failed\n");
+               return;
+       }
+
+       phys = virt_to_phys(ptr);
+       kfree(phys_to_virt(phys));
+}
+
 static noinline void __init kmem_cache_oob(void)
 {
        char *p;
@@ -737,6 +776,8 @@ static int __init kmalloc_tests_init(void)
        kmalloc_uaf();
        kmalloc_uaf_memset();
        kmalloc_uaf2();
+       kfree_via_page();
+       kfree_via_phys();
        kmem_cache_oob();
        memcg_accounted_kmem_cache();
        kasan_stack_oob();
index 9729f27..9742e5c 100644 (file)
@@ -297,6 +297,32 @@ out:
        return 1;
 }
 
+static int __init do_kmem_cache_size_bulk(int size, int *total_failures)
+{
+       struct kmem_cache *c;
+       int i, iter, maxiter = 1024;
+       int num, bytes;
+       bool fail = false;
+       void *objects[10];
+
+       c = kmem_cache_create("test_cache", size, size, 0, NULL);
+       for (iter = 0; (iter < maxiter) && !fail; iter++) {
+               num = kmem_cache_alloc_bulk(c, GFP_KERNEL, ARRAY_SIZE(objects),
+                                           objects);
+               for (i = 0; i < num; i++) {
+                       bytes = count_nonzero_bytes(objects[i], size);
+                       if (bytes)
+                               fail = true;
+                       fill_with_garbage(objects[i], size);
+               }
+
+               if (num)
+                       kmem_cache_free_bulk(c, num, objects);
+       }
+       *total_failures += fail;
+       return 1;
+}
+
 /*
  * Test kmem_cache allocation by creating caches of different sizes, with and
  * without constructors, with and without SLAB_TYPESAFE_BY_RCU.
@@ -318,6 +344,7 @@ static int __init test_kmemcache(int *total_failures)
                        num_tests += do_kmem_cache_size(size, ctor, rcu, zero,
                                                        &failures);
                }
+               num_tests += do_kmem_cache_size_bulk(size, &failures);
        }
        REPORT_FAILURES_IN_FN();
        *total_failures += failures;
index 67bcd5d..5ff04d8 100644 (file)
 # define TEST_U64
 #endif
 
-#define test(condition, msg)           \
-({                                     \
-       int cond = (condition);         \
-       if (cond)                       \
-               pr_warn("%s\n", msg);   \
-       cond;                           \
+#define test(condition, msg, ...)                                      \
+({                                                                     \
+       int cond = (condition);                                         \
+       if (cond)                                                       \
+               pr_warn("[%d] " msg "\n", __LINE__, ##__VA_ARGS__);     \
+       cond;                                                           \
 })
 
+static bool is_zeroed(void *from, size_t size)
+{
+       return memchr_inv(from, 0x0, size) == NULL;
+}
+
+static int test_check_nonzero_user(char *kmem, char __user *umem, size_t size)
+{
+       int ret = 0;
+       size_t start, end, i, zero_start, zero_end;
+
+       if (test(size < 2 * PAGE_SIZE, "buffer too small"))
+               return -EINVAL;
+
+       /*
+        * We want to cross a page boundary to exercise the code more
+        * effectively. We also don't want to make the size we scan too large,
+        * otherwise the test can take a long time and cause soft lockups. So
+        * scan a 1024 byte region across the page boundary.
+        */
+       size = 1024;
+       start = PAGE_SIZE - (size / 2);
+
+       kmem += start;
+       umem += start;
+
+       zero_start = size / 4;
+       zero_end = size - zero_start;
+
+       /*
+        * We conduct a series of check_nonzero_user() tests on a block of
+        * memory with the following byte-pattern (trying every possible
+        * [start,end] pair):
+        *
+        *   [ 00 ff 00 ff ... 00 00 00 00 ... ff 00 ff 00 ]
+        *
+        * And we verify that check_nonzero_user() acts identically to
+        * memchr_inv().
+        */
+
+       memset(kmem, 0x0, size);
+       for (i = 1; i < zero_start; i += 2)
+               kmem[i] = 0xff;
+       for (i = zero_end; i < size; i += 2)
+               kmem[i] = 0xff;
+
+       ret |= test(copy_to_user(umem, kmem, size),
+                   "legitimate copy_to_user failed");
+
+       for (start = 0; start <= size; start++) {
+               for (end = start; end <= size; end++) {
+                       size_t len = end - start;
+                       int retval = check_zeroed_user(umem + start, len);
+                       int expected = is_zeroed(kmem + start, len);
+
+                       ret |= test(retval != expected,
+                                   "check_nonzero_user(=%d) != memchr_inv(=%d) mismatch (start=%zu, end=%zu)",
+                                   retval, expected, start, end);
+               }
+       }
+
+       return ret;
+}
+
+static int test_copy_struct_from_user(char *kmem, char __user *umem,
+                                     size_t size)
+{
+       int ret = 0;
+       char *umem_src = NULL, *expected = NULL;
+       size_t ksize, usize;
+
+       umem_src = kmalloc(size, GFP_KERNEL);
+       ret = test(umem_src == NULL, "kmalloc failed");
+       if (ret)
+               goto out_free;
+
+       expected = kmalloc(size, GFP_KERNEL);
+       ret = test(expected == NULL, "kmalloc failed");
+       if (ret)
+               goto out_free;
+
+       /* Fill umem with a fixed byte pattern. */
+       memset(umem_src, 0x3e, size);
+       ret |= test(copy_to_user(umem, umem_src, size),
+                   "legitimate copy_to_user failed");
+
+       /* Check basic case -- (usize == ksize). */
+       ksize = size;
+       usize = size;
+
+       memcpy(expected, umem_src, ksize);
+
+       memset(kmem, 0x0, size);
+       ret |= test(copy_struct_from_user(kmem, ksize, umem, usize),
+                   "copy_struct_from_user(usize == ksize) failed");
+       ret |= test(memcmp(kmem, expected, ksize),
+                   "copy_struct_from_user(usize == ksize) gives unexpected copy");
+
+       /* Old userspace case -- (usize < ksize). */
+       ksize = size;
+       usize = size / 2;
+
+       memcpy(expected, umem_src, usize);
+       memset(expected + usize, 0x0, ksize - usize);
+
+       memset(kmem, 0x0, size);
+       ret |= test(copy_struct_from_user(kmem, ksize, umem, usize),
+                   "copy_struct_from_user(usize < ksize) failed");
+       ret |= test(memcmp(kmem, expected, ksize),
+                   "copy_struct_from_user(usize < ksize) gives unexpected copy");
+
+       /* New userspace (-E2BIG) case -- (usize > ksize). */
+       ksize = size / 2;
+       usize = size;
+
+       memset(kmem, 0x0, size);
+       ret |= test(copy_struct_from_user(kmem, ksize, umem, usize) != -E2BIG,
+                   "copy_struct_from_user(usize > ksize) didn't give E2BIG");
+
+       /* New userspace (success) case -- (usize > ksize). */
+       ksize = size / 2;
+       usize = size;
+
+       memcpy(expected, umem_src, ksize);
+       ret |= test(clear_user(umem + ksize, usize - ksize),
+                   "legitimate clear_user failed");
+
+       memset(kmem, 0x0, size);
+       ret |= test(copy_struct_from_user(kmem, ksize, umem, usize),
+                   "copy_struct_from_user(usize > ksize) failed");
+       ret |= test(memcmp(kmem, expected, ksize),
+                   "copy_struct_from_user(usize > ksize) gives unexpected copy");
+
+out_free:
+       kfree(expected);
+       kfree(umem_src);
+       return ret;
+}
+
 static int __init test_user_copy_init(void)
 {
        int ret = 0;
@@ -106,6 +244,11 @@ static int __init test_user_copy_init(void)
 #endif
 #undef test_legit
 
+       /* Test usage of check_nonzero_user(). */
+       ret |= test_check_nonzero_user(kmem, usermem, 2 * PAGE_SIZE);
+       /* Test usage of copy_struct_from_user(). */
+       ret |= test_copy_struct_from_user(kmem, usermem, 2 * PAGE_SIZE);
+
        /*
         * Invalid usage: none of these copies should succeed.
         */
index 4f16eec..f68dea8 100644 (file)
@@ -89,9 +89,9 @@
  *       goto errout;
  *   }
  *
- *   pos = textsearch_find_continuous(conf, \&state, example, strlen(example));
+ *   pos = textsearch_find_continuous(conf, &state, example, strlen(example));
  *   if (pos != UINT_MAX)
- *       panic("Oh my god, dancing chickens at \%d\n", pos);
+ *       panic("Oh my god, dancing chickens at %d\n", pos);
  *
  *   textsearch_destroy(conf);
  */
index c2bfbca..cbb4d9e 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/uaccess.h>
+#include <linux/bitops.h>
 
 /* out-of-line parts */
 
@@ -31,3 +32,57 @@ unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n)
 }
 EXPORT_SYMBOL(_copy_to_user);
 #endif
+
+/**
+ * check_zeroed_user: check if a userspace buffer only contains zero bytes
+ * @from: Source address, in userspace.
+ * @size: Size of buffer.
+ *
+ * This is effectively shorthand for "memchr_inv(from, 0, size) == NULL" for
+ * userspace addresses (and is more efficient because we don't care where the
+ * first non-zero byte is).
+ *
+ * Returns:
+ *  * 0: There were non-zero bytes present in the buffer.
+ *  * 1: The buffer was full of zero bytes.
+ *  * -EFAULT: access to userspace failed.
+ */
+int check_zeroed_user(const void __user *from, size_t size)
+{
+       unsigned long val;
+       uintptr_t align = (uintptr_t) from % sizeof(unsigned long);
+
+       if (unlikely(size == 0))
+               return 1;
+
+       from -= align;
+       size += align;
+
+       if (!user_access_begin(from, size))
+               return -EFAULT;
+
+       unsafe_get_user(val, (unsigned long __user *) from, err_fault);
+       if (align)
+               val &= ~aligned_byte_mask(align);
+
+       while (size > sizeof(unsigned long)) {
+               if (unlikely(val))
+                       goto done;
+
+               from += sizeof(unsigned long);
+               size -= sizeof(unsigned long);
+
+               unsafe_get_user(val, (unsigned long __user *) from, err_fault);
+       }
+
+       if (size < sizeof(unsigned long))
+               val &= aligned_byte_mask(size);
+
+done:
+       user_access_end();
+       return (val == 0);
+err_fault:
+       user_access_end();
+       return -EFAULT;
+}
+EXPORT_SYMBOL(check_zeroed_user);
index cc00364..9fe698f 100644 (file)
@@ -24,13 +24,4 @@ config GENERIC_COMPAT_VDSO
        help
          This config option enables the compat VDSO layer.
 
-config CROSS_COMPILE_COMPAT_VDSO
-       string "32 bit Toolchain prefix for compat vDSO"
-       default ""
-       depends on GENERIC_COMPAT_VDSO
-       help
-         Defines the cross-compiler prefix for compiling compat vDSO.
-         If a 64 bit compiler (i.e. x86_64) can compile the VDSO for
-         32 bit, it does not need to define this parameter.
-
 endif
index e630e7f..45f57fd 100644 (file)
@@ -214,9 +214,10 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res)
                return -1;
        }
 
-       res->tv_sec = 0;
-       res->tv_nsec = ns;
-
+       if (likely(res)) {
+               res->tv_sec = 0;
+               res->tv_nsec = ns;
+       }
        return 0;
 }
 
@@ -245,7 +246,7 @@ __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
                ret = clock_getres_fallback(clock, &ts);
 #endif
 
-       if (likely(!ret)) {
+       if (likely(!ret && res)) {
                res->tv_sec = ts.tv_sec;
                res->tv_nsec = ts.tv_nsec;
        }
index 2fe4902..a5dae9a 100644 (file)
@@ -273,11 +273,6 @@ config BOUNCE
          by default when ZONE_DMA or HIGHMEM is selected, but you
          may say n to override this.
 
-config NR_QUICK
-       int
-       depends on QUICKLIST
-       default "1"
-
 config VIRT_TO_BUS
        bool
        help
@@ -717,6 +712,17 @@ config GUP_BENCHMARK
 config GUP_GET_PTE_LOW_HIGH
        bool
 
+config READ_ONLY_THP_FOR_FS
+       bool "Read-only THP for filesystems (EXPERIMENTAL)"
+       depends on TRANSPARENT_HUGE_PAGECACHE && SHMEM
+
+       help
+         Allow khugepaged to put read-only file-backed pages in THP.
+
+         This is marked experimental because it is a new feature. Write
+         support of file THPs will be developed in the next few release
+         cycles.
+
 config ARCH_HAS_PTE_SPECIAL
        bool
 
index 82b6a20..327b3eb 100644 (file)
@@ -21,7 +21,9 @@ config DEBUG_PAGEALLOC
          Also, the state of page tracking structures is checked more often as
          pages are being allocated and freed, as unexpected state changes
          often happen for same reasons as memory corruption (e.g. double free,
-         use-after-free).
+         use-after-free). The error reports for these checks can be augmented
+         with stack traces of last allocation and freeing of the page, when
+         PAGE_OWNER is also selected and enabled on boot.
 
          For architectures which don't enable ARCH_SUPPORTS_DEBUG_PAGEALLOC,
          fill the pages with poison patterns after free_pages() and verify
index d0b295c..d996846 100644 (file)
@@ -21,6 +21,9 @@ KCOV_INSTRUMENT_memcontrol.o := n
 KCOV_INSTRUMENT_mmzone.o := n
 KCOV_INSTRUMENT_vmstat.o := n
 
+CFLAGS_init-mm.o += $(call cc-disable-warning, override-init)
+CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides)
+
 mmu-y                  := nommu.o
 mmu-$(CONFIG_MMU)      := highmem.o memory.o mincore.o \
                           mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \
@@ -72,7 +75,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_MEMTEST)          += memtest.o
 obj-$(CONFIG_MIGRATION) += migrate.o
-obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
 obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
 obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
index d9daa3e..c360f6a 100644 (file)
@@ -239,8 +239,8 @@ static int __init default_bdi_init(void)
 {
        int err;
 
-       bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
-                                             WQ_UNBOUND | WQ_SYSFS, 0);
+       bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_UNBOUND |
+                                WQ_SYSFS, 0);
        if (!bdi_wq)
                return -ENOMEM;
 
index 952dc2f..672d3c7 100644 (file)
@@ -270,14 +270,15 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
 
        /* Ensure the start of the pageblock or zone is online and valid */
        block_pfn = pageblock_start_pfn(pfn);
-       block_page = pfn_to_online_page(max(block_pfn, zone->zone_start_pfn));
+       block_pfn = max(block_pfn, zone->zone_start_pfn);
+       block_page = pfn_to_online_page(block_pfn);
        if (block_page) {
                page = block_page;
                pfn = block_pfn;
        }
 
        /* Ensure the end of the pageblock or zone is online and valid */
-       block_pfn += pageblock_nr_pages;
+       block_pfn = pageblock_end_pfn(pfn) - 1;
        block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
        end_page = pfn_to_online_page(block_pfn);
        if (!end_page)
@@ -303,7 +304,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
 
                page += (1 << PAGE_ALLOC_COSTLY_ORDER);
                pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
-       } while (page < end_page);
+       } while (page <= end_page);
 
        return false;
 }
@@ -969,7 +970,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                         * is safe to read and it's 0 for tail pages.
                         */
                        if (unlikely(PageCompound(page))) {
-                               low_pfn += (1UL << compound_order(page)) - 1;
+                               low_pfn += compound_nr(page) - 1;
                                goto isolate_fail;
                        }
                }
@@ -1737,8 +1738,7 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
  * starting at the block pointed to by the migrate scanner pfn within
  * compact_control.
  */
-static isolate_migrate_t isolate_migratepages(struct zone *zone,
-                                       struct compact_control *cc)
+static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
 {
        unsigned long block_start_pfn;
        unsigned long block_end_pfn;
@@ -1756,8 +1756,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
         */
        low_pfn = fast_find_migrateblock(cc);
        block_start_pfn = pageblock_start_pfn(low_pfn);
-       if (block_start_pfn < zone->zone_start_pfn)
-               block_start_pfn = zone->zone_start_pfn;
+       if (block_start_pfn < cc->zone->zone_start_pfn)
+               block_start_pfn = cc->zone->zone_start_pfn;
 
        /*
         * fast_find_migrateblock marks a pageblock skipped so to avoid
@@ -1787,8 +1787,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
                if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
                        cond_resched();
 
-               page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
-                                                                       zone);
+               page = pageblock_pfn_to_page(block_start_pfn,
+                                               block_end_pfn, cc->zone);
                if (!page)
                        continue;
 
@@ -2078,6 +2078,17 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
        const bool sync = cc->mode != MIGRATE_ASYNC;
        bool update_cached;
 
+       /*
+        * These counters track activities during zone compaction.  Initialize
+        * them before compacting a new zone.
+        */
+       cc->total_migrate_scanned = 0;
+       cc->total_free_scanned = 0;
+       cc->nr_migratepages = 0;
+       cc->nr_freepages = 0;
+       INIT_LIST_HEAD(&cc->freepages);
+       INIT_LIST_HEAD(&cc->migratepages);
+
        cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask);
        ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags,
                                                        cc->classzone_idx);
@@ -2158,7 +2169,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
                        cc->rescan = true;
                }
 
-               switch (isolate_migratepages(cc->zone, cc)) {
+               switch (isolate_migratepages(cc)) {
                case ISOLATE_ABORT:
                        ret = COMPACT_CONTENDED;
                        putback_movable_pages(&cc->migratepages);
@@ -2281,10 +2292,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
 {
        enum compact_result ret;
        struct compact_control cc = {
-               .nr_freepages = 0,
-               .nr_migratepages = 0,
-               .total_migrate_scanned = 0,
-               .total_free_scanned = 0,
                .order = order,
                .search_order = order,
                .gfp_mask = gfp_mask,
@@ -2305,8 +2312,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
 
        if (capture)
                current->capture_control = &capc;
-       INIT_LIST_HEAD(&cc.freepages);
-       INIT_LIST_HEAD(&cc.migratepages);
 
        ret = compact_zone(&cc, &capc);
 
@@ -2408,8 +2413,6 @@ static void compact_node(int nid)
        struct zone *zone;
        struct compact_control cc = {
                .order = -1,
-               .total_migrate_scanned = 0,
-               .total_free_scanned = 0,
                .mode = MIGRATE_SYNC,
                .ignore_skip_hint = true,
                .whole_zone = true,
@@ -2423,11 +2426,7 @@ static void compact_node(int nid)
                if (!populated_zone(zone))
                        continue;
 
-               cc.nr_freepages = 0;
-               cc.nr_migratepages = 0;
                cc.zone = zone;
-               INIT_LIST_HEAD(&cc.freepages);
-               INIT_LIST_HEAD(&cc.migratepages);
 
                compact_zone(&cc, NULL);
 
@@ -2529,8 +2528,6 @@ static void kcompactd_do_work(pg_data_t *pgdat)
        struct compact_control cc = {
                .order = pgdat->kcompactd_max_order,
                .search_order = pgdat->kcompactd_max_order,
-               .total_migrate_scanned = 0,
-               .total_free_scanned = 0,
                .classzone_idx = pgdat->kcompactd_classzone_idx,
                .mode = MIGRATE_SYNC_LIGHT,
                .ignore_skip_hint = false,
@@ -2554,16 +2551,10 @@ static void kcompactd_do_work(pg_data_t *pgdat)
                                                        COMPACT_CONTINUE)
                        continue;
 
-               cc.nr_freepages = 0;
-               cc.nr_migratepages = 0;
-               cc.total_migrate_scanned = 0;
-               cc.total_free_scanned = 0;
-               cc.zone = zone;
-               INIT_LIST_HEAD(&cc.freepages);
-               INIT_LIST_HEAD(&cc.migratepages);
-
                if (kthread_should_stop())
                        return;
+
+               cc.zone = zone;
                status = compact_zone(&cc, NULL);
 
                if (status == COMPACT_SUCCESS) {
index 40667c2..85b7d08 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/rmap.h>
 #include <linux/delayacct.h>
 #include <linux/psi.h>
+#include <linux/ramfs.h>
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -126,7 +127,7 @@ static void page_cache_delete(struct address_space *mapping,
        /* hugetlb pages are represented by a single entry in the xarray */
        if (!PageHuge(page)) {
                xas_set_order(&xas, page->index, compound_order(page));
-               nr = 1U << compound_order(page);
+               nr = compound_nr(page);
        }
 
        VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -203,8 +204,9 @@ static void unaccount_page_cache_page(struct address_space *mapping,
                __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
                if (PageTransHuge(page))
                        __dec_node_page_state(page, NR_SHMEM_THPS);
-       } else {
-               VM_BUG_ON_PAGE(PageTransHuge(page), page);
+       } else if (PageTransHuge(page)) {
+               __dec_node_page_state(page, NR_FILE_THPS);
+               filemap_nr_thps_dec(mapping);
        }
 
        /*
@@ -281,11 +283,11 @@ EXPORT_SYMBOL(delete_from_page_cache);
  * @pvec: pagevec with pages to delete
  *
  * The function walks over mapping->i_pages and removes pages passed in @pvec
- * from the mapping. The function expects @pvec to be sorted by page index.
+ * from the mapping. The function expects @pvec to be sorted by page index
+ * and is optimised for it to be dense.
  * It tolerates holes in @pvec (mapping entries at those indices are not
  * modified). The function expects only THP head pages to be present in the
- * @pvec and takes care to delete all corresponding tail pages from the
- * mapping as well.
+ * @pvec.
  *
  * The function expects the i_pages lock to be held.
  */
@@ -294,40 +296,43 @@ static void page_cache_delete_batch(struct address_space *mapping,
 {
        XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
        int total_pages = 0;
-       int i = 0, tail_pages = 0;
+       int i = 0;
        struct page *page;
 
        mapping_set_update(&xas, mapping);
        xas_for_each(&xas, page, ULONG_MAX) {
-               if (i >= pagevec_count(pvec) && !tail_pages)
+               if (i >= pagevec_count(pvec))
                        break;
+
+               /* A swap/dax/shadow entry got inserted? Skip it. */
                if (xa_is_value(page))
                        continue;
-               if (!tail_pages) {
-                       /*
-                        * Some page got inserted in our range? Skip it. We
-                        * have our pages locked so they are protected from
-                        * being removed.
-                        */
-                       if (page != pvec->pages[i]) {
-                               VM_BUG_ON_PAGE(page->index >
-                                               pvec->pages[i]->index, page);
-                               continue;
-                       }
-                       WARN_ON_ONCE(!PageLocked(page));
-                       if (PageTransHuge(page) && !PageHuge(page))
-                               tail_pages = HPAGE_PMD_NR - 1;
+               /*
+                * A page got inserted in our range? Skip it. We have our
+                * pages locked so they are protected from being removed.
+                * If we see a page whose index is higher than ours, it
+                * means our page has been removed, which shouldn't be
+                * possible because we're holding the PageLock.
+                */
+               if (page != pvec->pages[i]) {
+                       VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
+                                       page);
+                       continue;
+               }
+
+               WARN_ON_ONCE(!PageLocked(page));
+
+               if (page->index == xas.xa_index)
                        page->mapping = NULL;
-                       /*
-                        * Leave page->index set: truncation lookup relies
-                        * upon it
-                        */
+               /* Leave page->index set: truncation lookup relies on it */
+
+               /*
+                * Move to the next page in the vector if this is a regular
+                * page or the index is of the last sub-page of this compound
+                * page.
+                */
+               if (page->index + compound_nr(page) - 1 == xas.xa_index)
                        i++;
-               } else {
-                       VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
-                                       != pvec->pages[i]->index, page);
-                       tail_pages--;
-               }
                xas_store(&xas, NULL);
                total_pages++;
        }
@@ -408,7 +413,8 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
                .range_end = end,
        };
 
-       if (!mapping_cap_writeback_dirty(mapping))
+       if (!mapping_cap_writeback_dirty(mapping) ||
+           !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
                return 0;
 
        wbc_attach_fdatawrite_inode(&wbc, mapping->host);
@@ -617,10 +623,13 @@ int filemap_fdatawait_keep_errors(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
 
+/* Returns true if writeback might be needed or already in progress. */
 static bool mapping_needs_writeback(struct address_space *mapping)
 {
-       return (!dax_mapping(mapping) && mapping->nrpages) ||
-           (dax_mapping(mapping) && mapping->nrexceptional);
+       if (dax_mapping(mapping))
+               return mapping->nrexceptional;
+
+       return mapping->nrpages;
 }
 
 int filemap_write_and_wait(struct address_space *mapping)
@@ -1516,7 +1525,7 @@ EXPORT_SYMBOL(page_cache_prev_miss);
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
 {
        XA_STATE(xas, &mapping->i_pages, offset);
-       struct page *head, *page;
+       struct page *page;
 
        rcu_read_lock();
 repeat:
@@ -1531,25 +1540,19 @@ repeat:
        if (!page || xa_is_value(page))
                goto out;
 
-       head = compound_head(page);
-       if (!page_cache_get_speculative(head))
+       if (!page_cache_get_speculative(page))
                goto repeat;
 
-       /* The page was split under us? */
-       if (compound_head(page) != head) {
-               put_page(head);
-               goto repeat;
-       }
-
        /*
-        * Has the page moved?
+        * Has the page moved or been split?
         * This is part of the lockless pagecache protocol. See
         * include/linux/pagemap.h for details.
         */
        if (unlikely(page != xas_reload(&xas))) {
-               put_page(head);
+               put_page(page);
                goto repeat;
        }
+       page = find_subpage(page, offset);
 out:
        rcu_read_unlock();
 
@@ -1646,7 +1649,7 @@ repeat:
                }
 
                /* Has the page been truncated? */
-               if (unlikely(page->mapping != mapping)) {
+               if (unlikely(compound_head(page)->mapping != mapping)) {
                        unlock_page(page);
                        put_page(page);
                        goto repeat;
@@ -1731,7 +1734,6 @@ unsigned find_get_entries(struct address_space *mapping,
 
        rcu_read_lock();
        xas_for_each(&xas, page, ULONG_MAX) {
-               struct page *head;
                if (xas_retry(&xas, page))
                        continue;
                /*
@@ -1742,17 +1744,13 @@ unsigned find_get_entries(struct address_space *mapping,
                if (xa_is_value(page))
                        goto export;
 
-               head = compound_head(page);
-               if (!page_cache_get_speculative(head))
+               if (!page_cache_get_speculative(page))
                        goto retry;
 
-               /* The page was split under us? */
-               if (compound_head(page) != head)
-                       goto put_page;
-
-               /* Has the page moved? */
+               /* Has the page moved or been split? */
                if (unlikely(page != xas_reload(&xas)))
                        goto put_page;
+               page = find_subpage(page, xas.xa_index);
 
 export:
                indices[ret] = xas.xa_index;
@@ -1761,7 +1759,7 @@ export:
                        break;
                continue;
 put_page:
-               put_page(head);
+               put_page(page);
 retry:
                xas_reset(&xas);
        }
@@ -1803,33 +1801,27 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
 
        rcu_read_lock();
        xas_for_each(&xas, page, end) {
-               struct page *head;
                if (xas_retry(&xas, page))
                        continue;
                /* Skip over shadow, swap and DAX entries */
                if (xa_is_value(page))
                        continue;
 
-               head = compound_head(page);
-               if (!page_cache_get_speculative(head))
+               if (!page_cache_get_speculative(page))
                        goto retry;
 
-               /* The page was split under us? */
-               if (compound_head(page) != head)
-                       goto put_page;
-
-               /* Has the page moved? */
+               /* Has the page moved or been split? */
                if (unlikely(page != xas_reload(&xas)))
                        goto put_page;
 
-               pages[ret] = page;
+               pages[ret] = find_subpage(page, xas.xa_index);
                if (++ret == nr_pages) {
                        *start = xas.xa_index + 1;
                        goto out;
                }
                continue;
 put_page:
-               put_page(head);
+               put_page(page);
 retry:
                xas_reset(&xas);
        }
@@ -1874,7 +1866,6 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 
        rcu_read_lock();
        for (page = xas_load(&xas); page; page = xas_next(&xas)) {
-               struct page *head;
                if (xas_retry(&xas, page))
                        continue;
                /*
@@ -1884,24 +1875,19 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
                if (xa_is_value(page))
                        break;
 
-               head = compound_head(page);
-               if (!page_cache_get_speculative(head))
+               if (!page_cache_get_speculative(page))
                        goto retry;
 
-               /* The page was split under us? */
-               if (compound_head(page) != head)
-                       goto put_page;
-
-               /* Has the page moved? */
+               /* Has the page moved or been split? */
                if (unlikely(page != xas_reload(&xas)))
                        goto put_page;
 
-               pages[ret] = page;
+               pages[ret] = find_subpage(page, xas.xa_index);
                if (++ret == nr_pages)
                        break;
                continue;
 put_page:
-               put_page(head);
+               put_page(page);
 retry:
                xas_reset(&xas);
        }
@@ -1937,7 +1923,6 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
 
        rcu_read_lock();
        xas_for_each_marked(&xas, page, end, tag) {
-               struct page *head;
                if (xas_retry(&xas, page))
                        continue;
                /*
@@ -1948,26 +1933,21 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
                if (xa_is_value(page))
                        continue;
 
-               head = compound_head(page);
-               if (!page_cache_get_speculative(head))
+               if (!page_cache_get_speculative(page))
                        goto retry;
 
-               /* The page was split under us? */
-               if (compound_head(page) != head)
-                       goto put_page;
-
-               /* Has the page moved? */
+               /* Has the page moved or been split? */
                if (unlikely(page != xas_reload(&xas)))
                        goto put_page;
 
-               pages[ret] = page;
+               pages[ret] = find_subpage(page, xas.xa_index);
                if (++ret == nr_pages) {
                        *index = xas.xa_index + 1;
                        goto out;
                }
                continue;
 put_page:
-               put_page(head);
+               put_page(page);
 retry:
                xas_reset(&xas);
        }
@@ -2562,12 +2542,12 @@ retry_find:
                goto out_retry;
 
        /* Did it get truncated? */
-       if (unlikely(page->mapping != mapping)) {
+       if (unlikely(compound_head(page)->mapping != mapping)) {
                unlock_page(page);
                put_page(page);
                goto retry_find;
        }
-       VM_BUG_ON_PAGE(page->index != offset, page);
+       VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
 
        /*
         * We have a locked page in the page cache, now we need to check
@@ -2648,7 +2628,7 @@ void filemap_map_pages(struct vm_fault *vmf,
        pgoff_t last_pgoff = start_pgoff;
        unsigned long max_idx;
        XA_STATE(xas, &mapping->i_pages, start_pgoff);
-       struct page *head, *page;
+       struct page *page;
 
        rcu_read_lock();
        xas_for_each(&xas, page, end_pgoff) {
@@ -2657,24 +2637,19 @@ void filemap_map_pages(struct vm_fault *vmf,
                if (xa_is_value(page))
                        goto next;
 
-               head = compound_head(page);
-
                /*
                 * Check for a locked page first, as a speculative
                 * reference may adversely influence page migration.
                 */
-               if (PageLocked(head))
+               if (PageLocked(page))
                        goto next;
-               if (!page_cache_get_speculative(head))
+               if (!page_cache_get_speculative(page))
                        goto next;
 
-               /* The page was split under us? */
-               if (compound_head(page) != head)
-                       goto skip;
-
-               /* Has the page moved? */
+               /* Has the page moved or been split? */
                if (unlikely(page != xas_reload(&xas)))
                        goto skip;
+               page = find_subpage(page, xas.xa_index);
 
                if (!PageUptodate(page) ||
                                PageReadahead(page) ||
index c64dca6..c431ca8 100644 (file)
@@ -46,6 +46,8 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
        if (WARN_ON_ONCE(nr_frames > vec->nr_allocated))
                nr_frames = vec->nr_allocated;
 
+       start = untagged_addr(start);
+
        down_read(&mm->mmap_sem);
        locked = 1;
        vma = find_vma_intersection(mm, start, start + 1);
index 98f13ab..8f236a3 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -29,85 +29,70 @@ struct follow_page_context {
        unsigned int page_mask;
 };
 
-typedef int (*set_dirty_func_t)(struct page *page);
-
-static void __put_user_pages_dirty(struct page **pages,
-                                  unsigned long npages,
-                                  set_dirty_func_t sdf)
-{
-       unsigned long index;
-
-       for (index = 0; index < npages; index++) {
-               struct page *page = compound_head(pages[index]);
-
-               /*
-                * Checking PageDirty at this point may race with
-                * clear_page_dirty_for_io(), but that's OK. Two key cases:
-                *
-                * 1) This code sees the page as already dirty, so it skips
-                * the call to sdf(). That could happen because
-                * clear_page_dirty_for_io() called page_mkclean(),
-                * followed by set_page_dirty(). However, now the page is
-                * going to get written back, which meets the original
-                * intention of setting it dirty, so all is well:
-                * clear_page_dirty_for_io() goes on to call
-                * TestClearPageDirty(), and write the page back.
-                *
-                * 2) This code sees the page as clean, so it calls sdf().
-                * The page stays dirty, despite being written back, so it
-                * gets written back again in the next writeback cycle.
-                * This is harmless.
-                */
-               if (!PageDirty(page))
-                       sdf(page);
-
-               put_user_page(page);
-       }
-}
-
 /**
- * put_user_pages_dirty() - release and dirty an array of gup-pinned pages
- * @pages:  array of pages to be marked dirty and released.
+ * put_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
+ * @pages:  array of pages to be maybe marked dirty, and definitely released.
  * @npages: number of pages in the @pages array.
+ * @make_dirty: whether to mark the pages dirty
  *
  * "gup-pinned page" refers to a page that has had one of the get_user_pages()
  * variants called on that page.
  *
  * For each page in the @pages array, make that page (or its head page, if a
- * compound page) dirty, if it was previously listed as clean. Then, release
- * the page using put_user_page().
+ * compound page) dirty, if @make_dirty is true, and if the page was previously
+ * listed as clean. In any case, releases all pages using put_user_page(),
+ * possibly via put_user_pages(), for the non-dirty case.
  *
  * Please see the put_user_page() documentation for details.
  *
- * set_page_dirty(), which does not lock the page, is used here.
- * Therefore, it is the caller's responsibility to ensure that this is
- * safe. If not, then put_user_pages_dirty_lock() should be called instead.
+ * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
+ * required, then the caller should a) verify that this is really correct,
+ * because _lock() is usually required, and b) hand code it:
+ * set_page_dirty_lock(), put_user_page().
  *
  */
-void put_user_pages_dirty(struct page **pages, unsigned long npages)
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+                              bool make_dirty)
 {
-       __put_user_pages_dirty(pages, npages, set_page_dirty);
-}
-EXPORT_SYMBOL(put_user_pages_dirty);
+       unsigned long index;
 
-/**
- * put_user_pages_dirty_lock() - release and dirty an array of gup-pinned pages
- * @pages:  array of pages to be marked dirty and released.
- * @npages: number of pages in the @pages array.
- *
- * For each page in the @pages array, make that page (or its head page, if a
- * compound page) dirty, if it was previously listed as clean. Then, release
- * the page using put_user_page().
- *
- * Please see the put_user_page() documentation for details.
- *
- * This is just like put_user_pages_dirty(), except that it invokes
- * set_page_dirty_lock(), instead of set_page_dirty().
- *
- */
-void put_user_pages_dirty_lock(struct page **pages, unsigned long npages)
-{
-       __put_user_pages_dirty(pages, npages, set_page_dirty_lock);
+       /*
+        * TODO: this can be optimized for huge pages: if a series of pages is
+        * physically contiguous and part of the same compound page, then a
+        * single operation to the head page should suffice.
+        */
+
+       if (!make_dirty) {
+               put_user_pages(pages, npages);
+               return;
+       }
+
+       for (index = 0; index < npages; index++) {
+               struct page *page = compound_head(pages[index]);
+               /*
+                * Checking PageDirty at this point may race with
+                * clear_page_dirty_for_io(), but that's OK. Two key
+                * cases:
+                *
+                * 1) This code sees the page as already dirty, so it
+                * skips the call to set_page_dirty(). That could happen
+                * because clear_page_dirty_for_io() called
+                * page_mkclean(), followed by set_page_dirty().
+                * However, now the page is going to get written back,
+                * which meets the original intention of setting it
+                * dirty, so all is well: clear_page_dirty_for_io() goes
+                * on to call TestClearPageDirty(), and write the page
+                * back.
+                *
+                * 2) This code sees the page as clean, so it calls
+                * set_page_dirty(). The page stays dirty, despite being
+                * written back, so it gets written back again in the
+                * next writeback cycle. This is harmless.
+                */
+               if (!PageDirty(page))
+                       set_page_dirty_lock(page);
+               put_user_page(page);
+       }
 }
 EXPORT_SYMBOL(put_user_pages_dirty_lock);
 
@@ -399,7 +384,7 @@ retry_locked:
                spin_unlock(ptl);
                return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
        }
-       if (flags & FOLL_SPLIT) {
+       if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
                int ret;
                page = pmd_page(*pmd);
                if (is_huge_zero_page(page)) {
@@ -408,7 +393,7 @@ retry_locked:
                        split_huge_pmd(vma, pmd, address);
                        if (pmd_trans_unstable(pmd))
                                ret = -EBUSY;
-               } else {
+               } else if (flags & FOLL_SPLIT) {
                        if (unlikely(!try_get_page(page))) {
                                spin_unlock(ptl);
                                return ERR_PTR(-ENOMEM);
@@ -420,6 +405,10 @@ retry_locked:
                        put_page(page);
                        if (pmd_none(*pmd))
                                return no_page_table(vma, flags);
+               } else {  /* flags & FOLL_SPLIT_PMD */
+                       spin_unlock(ptl);
+                       split_huge_pmd(vma, pmd, address);
+                       ret = pte_alloc(mm, pmd) ? -ENOMEM : 0;
                }
 
                return ret ? ERR_PTR(ret) :
@@ -799,6 +788,8 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
        if (!nr_pages)
                return 0;
 
+       start = untagged_addr(start);
+
        VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
 
        /*
@@ -961,6 +952,8 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
        struct vm_area_struct *vma;
        vm_fault_t ret, major = 0;
 
+       address = untagged_addr(address);
+
        if (unlocked)
                fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 
@@ -1460,7 +1453,7 @@ check_again:
                 * gup may start from a tail page. Advance step by the left
                 * part.
                 */
-               step = (1 << compound_order(head)) - (pages[i] - head);
+               step = compound_nr(head) - (pages[i] - head);
                /*
                 * If we get a page from the CMA zone, since we are going to
                 * be pinning these entries, we might as well move them out
@@ -1980,7 +1973,8 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
 }
 
 static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
-                      unsigned long end, int write, struct page **pages, int *nr)
+                      unsigned long end, unsigned int flags,
+                      struct page **pages, int *nr)
 {
        unsigned long pte_end;
        struct page *head, *page;
@@ -1993,7 +1987,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 
        pte = READ_ONCE(*ptep);
 
-       if (!pte_access_permitted(pte, write))
+       if (!pte_access_permitted(pte, flags & FOLL_WRITE))
                return 0;
 
        /* hugepages are never "special" */
@@ -2030,7 +2024,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 }
 
 static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
-               unsigned int pdshift, unsigned long end, int write,
+               unsigned int pdshift, unsigned long end, unsigned int flags,
                struct page **pages, int *nr)
 {
        pte_t *ptep;
@@ -2040,7 +2034,7 @@ static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
        ptep = hugepte_offset(hugepd, addr, pdshift);
        do {
                next = hugepte_addr_end(addr, end, sz);
-               if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
+               if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr))
                        return 0;
        } while (ptep++, addr = next, addr != end);
 
@@ -2048,7 +2042,7 @@ static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
 }
 #else
 static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
-               unsigned pdshift, unsigned long end, int write,
+               unsigned int pdshift, unsigned long end, unsigned int flags,
                struct page **pages, int *nr)
 {
        return 0;
@@ -2056,7 +2050,8 @@ static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
 #endif /* CONFIG_ARCH_HAS_HUGEPD */
 
 static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
-               unsigned long end, unsigned int flags, struct page **pages, int *nr)
+                       unsigned long end, unsigned int flags,
+                       struct page **pages, int *nr)
 {
        struct page *head, *page;
        int refs;
index de1f159..13cc937 100644 (file)
@@ -496,11 +496,25 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
        return pmd;
 }
 
-static inline struct list_head *page_deferred_list(struct page *page)
+#ifdef CONFIG_MEMCG
+static inline struct deferred_split *get_deferred_split_queue(struct page *page)
 {
-       /* ->lru in the tail pages is occupied by compound_head. */
-       return &page[2].deferred_list;
+       struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+       struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+
+       if (memcg)
+               return &memcg->deferred_split_queue;
+       else
+               return &pgdat->deferred_split_queue;
 }
+#else
+static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+{
+       struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+
+       return &pgdat->deferred_split_queue;
+}
+#endif
 
 void prep_transhuge_page(struct page *page)
 {
@@ -645,40 +659,30 @@ release:
  *         available
  * never: never stall for any thp allocation
  */
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
 {
        const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
-       gfp_t this_node = 0;
-
-#ifdef CONFIG_NUMA
-       struct mempolicy *pol;
-       /*
-        * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
-        * specified, to express a general desire to stay on the current
-        * node for optimistic allocation attempts. If the defrag mode
-        * and/or madvise hint requires the direct reclaim then we prefer
-        * to fallback to other node rather than node reclaim because that
-        * can lead to excessive reclaim even though there is free memory
-        * on other nodes. We expect that NUMA preferences are specified
-        * by memory policies.
-        */
-       pol = get_vma_policy(vma, addr);
-       if (pol->mode != MPOL_BIND)
-               this_node = __GFP_THISNODE;
-       mpol_cond_put(pol);
-#endif
 
+       /* Always do synchronous compaction */
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
                return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
+
+       /* Kick kcompactd and fail quickly */
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
+               return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
+
+       /* Synchronous compaction if madvised, otherwise kick kcompactd */
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-                                                            __GFP_KSWAPD_RECLAIM | this_node);
+               return GFP_TRANSHUGE_LIGHT |
+                       (vma_madvised ? __GFP_DIRECT_RECLAIM :
+                                       __GFP_KSWAPD_RECLAIM);
+
+       /* Only do synchronous compaction if madvised */
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-                                                            this_node);
-       return GFP_TRANSHUGE_LIGHT | this_node;
+               return GFP_TRANSHUGE_LIGHT |
+                      (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
+
+       return GFP_TRANSHUGE_LIGHT;
 }
 
 /* Caller must hold page table lock. */
@@ -750,8 +754,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
                        pte_free(vma->vm_mm, pgtable);
                return ret;
        }
-       gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
-       page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
+       gfp = alloc_hugepage_direct_gfpmask(vma);
+       page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
        if (unlikely(!page)) {
                count_vm_event(THP_FAULT_FALLBACK);
                return VM_FAULT_FALLBACK;
@@ -1358,9 +1362,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 alloc:
        if (__transparent_hugepage_enabled(vma) &&
            !transparent_hugepage_debug_cow()) {
-               huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
-               new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
-                               haddr, numa_node_id());
+               huge_gfp = alloc_hugepage_direct_gfpmask(vma);
+               new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
        } else
                new_page = NULL;
 
@@ -2497,6 +2500,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        struct page *head = compound_head(page);
        pg_data_t *pgdat = page_pgdat(head);
        struct lruvec *lruvec;
+       struct address_space *swap_cache = NULL;
+       unsigned long offset = 0;
        int i;
 
        lruvec = mem_cgroup_page_lruvec(head, pgdat);
@@ -2504,6 +2509,14 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        /* complete memcg works before add pages to LRU */
        mem_cgroup_split_huge_fixup(head);
 
+       if (PageAnon(head) && PageSwapCache(head)) {
+               swp_entry_t entry = { .val = page_private(head) };
+
+               offset = swp_offset(entry);
+               swap_cache = swap_address_space(entry);
+               xa_lock(&swap_cache->i_pages);
+       }
+
        for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
                __split_huge_page_tail(head, i, lruvec, list);
                /* Some pages can be beyond i_size: drop them from page cache */
@@ -2513,6 +2526,12 @@ static void __split_huge_page(struct page *page, struct list_head *list,
                        if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
                                shmem_uncharge(head->mapping->host, 1);
                        put_page(head + i);
+               } else if (!PageAnon(page)) {
+                       __xa_store(&head->mapping->i_pages, head[i].index,
+                                       head + i, 0);
+               } else if (swap_cache) {
+                       __xa_store(&swap_cache->i_pages, offset + i,
+                                       head + i, 0);
                }
        }
 
@@ -2523,10 +2542,12 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        /* See comment in __split_huge_page_tail() */
        if (PageAnon(head)) {
                /* Additional pin to swap cache */
-               if (PageSwapCache(head))
+               if (PageSwapCache(head)) {
                        page_ref_add(head, 2);
-               else
+                       xa_unlock(&swap_cache->i_pages);
+               } else {
                        page_ref_inc(head);
+               }
        } else {
                /* Additional pin to page cache */
                page_ref_add(head, 2);
@@ -2673,6 +2694,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
        struct page *head = compound_head(page);
        struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
+       struct deferred_split *ds_queue = get_deferred_split_queue(page);
        struct anon_vma *anon_vma = NULL;
        struct address_space *mapping = NULL;
        int count, mapcount, extra_pins, ret;
@@ -2759,17 +2781,22 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        }
 
        /* Prevent deferred_split_scan() touching ->_refcount */
-       spin_lock(&pgdata->split_queue_lock);
+       spin_lock(&ds_queue->split_queue_lock);
        count = page_count(head);
        mapcount = total_mapcount(head);
        if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
                if (!list_empty(page_deferred_list(head))) {
-                       pgdata->split_queue_len--;
+                       ds_queue->split_queue_len--;
                        list_del(page_deferred_list(head));
                }
-               if (mapping)
-                       __dec_node_page_state(page, NR_SHMEM_THPS);
-               spin_unlock(&pgdata->split_queue_lock);
+               if (mapping) {
+                       if (PageSwapBacked(page))
+                               __dec_node_page_state(page, NR_SHMEM_THPS);
+                       else
+                               __dec_node_page_state(page, NR_FILE_THPS);
+               }
+
+               spin_unlock(&ds_queue->split_queue_lock);
                __split_huge_page(page, list, end, flags);
                if (PageSwapCache(head)) {
                        swp_entry_t entry = { .val = page_private(head) };
@@ -2786,7 +2813,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                        dump_page(page, "total_mapcount(head) > 0");
                        BUG();
                }
-               spin_unlock(&pgdata->split_queue_lock);
+               spin_unlock(&ds_queue->split_queue_lock);
 fail:          if (mapping)
                        xa_unlock(&mapping->i_pages);
                spin_unlock_irqrestore(&pgdata->lru_lock, flags);
@@ -2808,53 +2835,86 @@ out:
 
 void free_transhuge_page(struct page *page)
 {
-       struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
+       struct deferred_split *ds_queue = get_deferred_split_queue(page);
        unsigned long flags;
 
-       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+       spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
        if (!list_empty(page_deferred_list(page))) {
-               pgdata->split_queue_len--;
+               ds_queue->split_queue_len--;
                list_del(page_deferred_list(page));
        }
-       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+       spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
        free_compound_page(page);
 }
 
 void deferred_split_huge_page(struct page *page)
 {
-       struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
+       struct deferred_split *ds_queue = get_deferred_split_queue(page);
+#ifdef CONFIG_MEMCG
+       struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+#endif
        unsigned long flags;
 
        VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 
-       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+       /*
+        * The try_to_unmap() in page reclaim path might reach here too,
+        * this may cause a race condition to corrupt deferred split queue.
+        * And, if page reclaim is already handling the same page, it is
+        * unnecessary to handle it again in shrinker.
+        *
+        * Check PageSwapCache to determine if the page is being
+        * handled by page reclaim since THP swap would add the page into
+        * swap cache before calling try_to_unmap().
+        */
+       if (PageSwapCache(page))
+               return;
+
+       spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
        if (list_empty(page_deferred_list(page))) {
                count_vm_event(THP_DEFERRED_SPLIT_PAGE);
-               list_add_tail(page_deferred_list(page), &pgdata->split_queue);
-               pgdata->split_queue_len++;
+               list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
+               ds_queue->split_queue_len++;
+#ifdef CONFIG_MEMCG
+               if (memcg)
+                       memcg_set_shrinker_bit(memcg, page_to_nid(page),
+                                              deferred_split_shrinker.id);
+#endif
        }
-       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+       spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 }
 
 static unsigned long deferred_split_count(struct shrinker *shrink,
                struct shrink_control *sc)
 {
        struct pglist_data *pgdata = NODE_DATA(sc->nid);
-       return READ_ONCE(pgdata->split_queue_len);
+       struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+
+#ifdef CONFIG_MEMCG
+       if (sc->memcg)
+               ds_queue = &sc->memcg->deferred_split_queue;
+#endif
+       return READ_ONCE(ds_queue->split_queue_len);
 }
 
 static unsigned long deferred_split_scan(struct shrinker *shrink,
                struct shrink_control *sc)
 {
        struct pglist_data *pgdata = NODE_DATA(sc->nid);
+       struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
        unsigned long flags;
        LIST_HEAD(list), *pos, *next;
        struct page *page;
        int split = 0;
 
-       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+#ifdef CONFIG_MEMCG
+       if (sc->memcg)
+               ds_queue = &sc->memcg->deferred_split_queue;
+#endif
+
+       spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
        /* Take pin on all head pages to avoid freeing them under us */
-       list_for_each_safe(pos, next, &pgdata->split_queue) {
+       list_for_each_safe(pos, next, &ds_queue->split_queue) {
                page = list_entry((void *)pos, struct page, mapping);
                page = compound_head(page);
                if (get_page_unless_zero(page)) {
@@ -2862,12 +2922,12 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
                } else {
                        /* We lost race with put_compound_page() */
                        list_del_init(page_deferred_list(page));
-                       pgdata->split_queue_len--;
+                       ds_queue->split_queue_len--;
                }
                if (!--sc->nr_to_scan)
                        break;
        }
-       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+       spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 
        list_for_each_safe(pos, next, &list) {
                page = list_entry((void *)pos, struct page, mapping);
@@ -2881,15 +2941,15 @@ next:
                put_page(page);
        }
 
-       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
-       list_splice_tail(&list, &pgdata->split_queue);
-       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+       spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+       list_splice_tail(&list, &ds_queue->split_queue);
+       spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 
        /*
         * Stop shrinker if we didn't split any page, but the queue is empty.
         * This can happen if pages were freed under us.
         */
-       if (!split && list_empty(&pgdata->split_queue))
+       if (!split && list_empty(&ds_queue->split_queue))
                return SHRINK_STOP;
        return split;
 }
@@ -2898,7 +2958,8 @@ static struct shrinker deferred_split_shrinker = {
        .count_objects = deferred_split_count,
        .scan_objects = deferred_split_scan,
        .seeks = DEFAULT_SEEKS,
-       .flags = SHRINKER_NUMA_AWARE,
+       .flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE |
+                SHRINKER_NONSLAB,
 };
 
 #ifdef CONFIG_DEBUG_FS
index 6d7296d..b45a953 100644 (file)
@@ -1084,11 +1084,10 @@ static bool pfn_range_valid_gigantic(struct zone *z,
        struct page *page;
 
        for (i = start_pfn; i < end_pfn; i++) {
-               if (!pfn_valid(i))
+               page = pfn_to_online_page(i);
+               if (!page)
                        return false;
 
-               page = pfn_to_page(i);
-
                if (page_zone(page) != z)
                        return false;
 
@@ -1405,12 +1404,25 @@ pgoff_t __basepage_index(struct page *page)
 }
 
 static struct page *alloc_buddy_huge_page(struct hstate *h,
-               gfp_t gfp_mask, int nid, nodemask_t *nmask)
+               gfp_t gfp_mask, int nid, nodemask_t *nmask,
+               nodemask_t *node_alloc_noretry)
 {
        int order = huge_page_order(h);
        struct page *page;
+       bool alloc_try_hard = true;
 
-       gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
+       /*
+        * By default we always try hard to allocate the page with
+        * __GFP_RETRY_MAYFAIL flag.  However, if we are allocating pages in
+        * a loop (to adjust global huge page counts) and previous allocation
+        * failed, do not continue to try hard on the same node.  Use the
+        * node_alloc_noretry bitmap to manage this state information.
+        */
+       if (node_alloc_noretry && node_isset(nid, *node_alloc_noretry))
+               alloc_try_hard = false;
+       gfp_mask |= __GFP_COMP|__GFP_NOWARN;
+       if (alloc_try_hard)
+               gfp_mask |= __GFP_RETRY_MAYFAIL;
        if (nid == NUMA_NO_NODE)
                nid = numa_mem_id();
        page = __alloc_pages_nodemask(gfp_mask, order, nid, nmask);
@@ -1419,6 +1431,22 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
        else
                __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
 
+       /*
+        * If we did not specify __GFP_RETRY_MAYFAIL, but still got a page this
+        * indicates an overall state change.  Clear bit so that we resume
+        * normal 'try hard' allocations.
+        */
+       if (node_alloc_noretry && page && !alloc_try_hard)
+               node_clear(nid, *node_alloc_noretry);
+
+       /*
+        * If we tried hard to get a page but failed, set bit so that
+        * subsequent attempts will not try as hard until there is an
+        * overall state change.
+        */
+       if (node_alloc_noretry && !page && alloc_try_hard)
+               node_set(nid, *node_alloc_noretry);
+
        return page;
 }
 
@@ -1427,7 +1455,8 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
  * should use this function to get new hugetlb pages
  */
 static struct page *alloc_fresh_huge_page(struct hstate *h,
-               gfp_t gfp_mask, int nid, nodemask_t *nmask)
+               gfp_t gfp_mask, int nid, nodemask_t *nmask,
+               nodemask_t *node_alloc_noretry)
 {
        struct page *page;
 
@@ -1435,7 +1464,7 @@ static struct page *alloc_fresh_huge_page(struct hstate *h,
                page = alloc_gigantic_page(h, gfp_mask, nid, nmask);
        else
                page = alloc_buddy_huge_page(h, gfp_mask,
-                               nid, nmask);
+                               nid, nmask, node_alloc_noretry);
        if (!page)
                return NULL;
 
@@ -1450,14 +1479,16 @@ static struct page *alloc_fresh_huge_page(struct hstate *h,
  * Allocates a fresh page to the hugetlb allocator pool in the node interleaved
  * manner.
  */
-static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
+static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
+                               nodemask_t *node_alloc_noretry)
 {
        struct page *page;
        int nr_nodes, node;
        gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
 
        for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
-               page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed);
+               page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed,
+                                               node_alloc_noretry);
                if (page)
                        break;
        }
@@ -1601,7 +1632,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
                goto out_unlock;
        spin_unlock(&hugetlb_lock);
 
-       page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask);
+       page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
        if (!page)
                return NULL;
 
@@ -1637,7 +1668,7 @@ struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
        if (hstate_is_gigantic(h))
                return NULL;
 
-       page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask);
+       page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
        if (!page)
                return NULL;
 
@@ -2207,13 +2238,33 @@ static void __init gather_bootmem_prealloc(void)
 static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 {
        unsigned long i;
+       nodemask_t *node_alloc_noretry;
+
+       if (!hstate_is_gigantic(h)) {
+               /*
+                * Bit mask controlling how hard we retry per-node allocations.
+                * Ignore errors as lower level routines can deal with
+                * node_alloc_noretry == NULL.  If this kmalloc fails at boot
+                * time, we are likely in bigger trouble.
+                */
+               node_alloc_noretry = kmalloc(sizeof(*node_alloc_noretry),
+                                               GFP_KERNEL);
+       } else {
+               /* allocations done at boot time */
+               node_alloc_noretry = NULL;
+       }
+
+       /* bit mask controlling how hard we retry per-node allocations */
+       if (node_alloc_noretry)
+               nodes_clear(*node_alloc_noretry);
 
        for (i = 0; i < h->max_huge_pages; ++i) {
                if (hstate_is_gigantic(h)) {
                        if (!alloc_bootmem_huge_page(h))
                                break;
                } else if (!alloc_pool_huge_page(h,
-                                        &node_states[N_MEMORY]))
+                                        &node_states[N_MEMORY],
+                                        node_alloc_noretry))
                        break;
                cond_resched();
        }
@@ -2225,6 +2276,8 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
                        h->max_huge_pages, buf, i);
                h->max_huge_pages = i;
        }
+
+       kfree(node_alloc_noretry);
 }
 
 static void __init hugetlb_init_hstates(void)
@@ -2323,6 +2376,17 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
                              nodemask_t *nodes_allowed)
 {
        unsigned long min_count, ret;
+       NODEMASK_ALLOC(nodemask_t, node_alloc_noretry, GFP_KERNEL);
+
+       /*
+        * Bit mask controlling how hard we retry per-node allocations.
+        * If we can not allocate the bit mask, do not attempt to allocate
+        * the requested huge pages.
+        */
+       if (node_alloc_noretry)
+               nodes_clear(*node_alloc_noretry);
+       else
+               return -ENOMEM;
 
        spin_lock(&hugetlb_lock);
 
@@ -2356,6 +2420,7 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
        if (hstate_is_gigantic(h) && !IS_ENABLED(CONFIG_CONTIG_ALLOC)) {
                if (count > persistent_huge_pages(h)) {
                        spin_unlock(&hugetlb_lock);
+                       NODEMASK_FREE(node_alloc_noretry);
                        return -EINVAL;
                }
                /* Fall through to decrease pool */
@@ -2388,7 +2453,8 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
                /* yield cpu to avoid soft lockup */
                cond_resched();
 
-               ret = alloc_pool_huge_page(h, nodes_allowed);
+               ret = alloc_pool_huge_page(h, nodes_allowed,
+                                               node_alloc_noretry);
                spin_lock(&hugetlb_lock);
                if (!ret)
                        goto out;
@@ -2429,6 +2495,8 @@ out:
        h->max_huge_pages = persistent_huge_pages(h);
        spin_unlock(&hugetlb_lock);
 
+       NODEMASK_FREE(node_alloc_noretry);
+
        return 0;
 }
 
index 68c2f2f..f1930fa 100644 (file)
@@ -139,7 +139,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
        if (!page_hcg || page_hcg != h_cg)
                goto out;
 
-       nr_pages = 1 << compound_order(page);
+       nr_pages = compound_nr(page);
        if (!parent) {
                parent = root_h_cgroup;
                /* root has no limit */
index a787a31..1960330 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/cpumask.h>
+#include <linux/mman.h>
 
 #include <linux/atomic.h>
 #include <linux/user_namespace.h>
@@ -35,6 +36,6 @@ struct mm_struct init_mm = {
        .arg_lock       =  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
        .mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
        .user_ns        = &init_user_ns,
-       .cpu_bitmap     = { [BITS_TO_LONGS(NR_CPUS)] = 0},
+       .cpu_bitmap     = CPU_BITS_NONE,
        INIT_MM_CONTEXT(init_mm)
 };
index e323908..0d5f720 100644 (file)
@@ -39,7 +39,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf);
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
                unsigned long floor, unsigned long ceiling);
 
-static inline bool can_madv_dontneed_vma(struct vm_area_struct *vma)
+static inline bool can_madv_lru_vma(struct vm_area_struct *vma)
 {
        return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP));
 }
index 95d16a4..6814d6d 100644 (file)
@@ -304,7 +304,6 @@ size_t kasan_metadata_size(struct kmem_cache *cache)
 struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
                                        const void *object)
 {
-       BUILD_BUG_ON(sizeof(struct kasan_alloc_meta) > 32);
        return (void *)object + cache->kasan_info.alloc_meta_offset;
 }
 
@@ -315,14 +314,31 @@ struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
        return (void *)object + cache->kasan_info.free_meta_offset;
 }
 
+
+static void kasan_set_free_info(struct kmem_cache *cache,
+               void *object, u8 tag)
+{
+       struct kasan_alloc_meta *alloc_meta;
+       u8 idx = 0;
+
+       alloc_meta = get_alloc_info(cache, object);
+
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+       idx = alloc_meta->free_track_idx;
+       alloc_meta->free_pointer_tag[idx] = tag;
+       alloc_meta->free_track_idx = (idx + 1) % KASAN_NR_FREE_STACKS;
+#endif
+
+       set_track(&alloc_meta->free_track[idx], GFP_NOWAIT);
+}
+
 void kasan_poison_slab(struct page *page)
 {
        unsigned long i;
 
-       for (i = 0; i < (1 << compound_order(page)); i++)
+       for (i = 0; i < compound_nr(page); i++)
                page_kasan_tag_reset(page + i);
-       kasan_poison_shadow(page_address(page),
-                       PAGE_SIZE << compound_order(page),
+       kasan_poison_shadow(page_address(page), page_size(page),
                        KASAN_KMALLOC_REDZONE);
 }
 
@@ -452,7 +468,8 @@ static bool __kasan_slab_free(struct kmem_cache *cache, void *object,
                        unlikely(!(cache->flags & SLAB_KASAN)))
                return false;
 
-       set_track(&get_alloc_info(cache, object)->free_track, GFP_NOWAIT);
+       kasan_set_free_info(cache, object, tag);
+
        quarantine_put(get_free_info(cache, object), cache);
 
        return IS_ENABLED(CONFIG_KASAN_GENERIC);
@@ -524,7 +541,7 @@ void * __must_check kasan_kmalloc_large(const void *ptr, size_t size,
        page = virt_to_page(ptr);
        redzone_start = round_up((unsigned long)(ptr + size),
                                KASAN_SHADOW_SCALE_SIZE);
-       redzone_end = (unsigned long)ptr + (PAGE_SIZE << compound_order(page));
+       redzone_end = (unsigned long)ptr + page_size(page);
 
        kasan_unpoison_shadow(ptr, size);
        kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
@@ -560,8 +577,7 @@ void kasan_poison_kfree(void *ptr, unsigned long ip)
                        kasan_report_invalid_free(ptr, ip);
                        return;
                }
-               kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page),
-                               KASAN_FREE_PAGE);
+               kasan_poison_shadow(ptr, page_size(page), KASAN_FREE_PAGE);
        } else {
                __kasan_slab_free(page->slab_cache, ptr, ip, false);
        }
index 014f19e..35cff6b 100644 (file)
@@ -95,9 +95,19 @@ struct kasan_track {
        depot_stack_handle_t stack;
 };
 
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+#define KASAN_NR_FREE_STACKS 5
+#else
+#define KASAN_NR_FREE_STACKS 1
+#endif
+
 struct kasan_alloc_meta {
        struct kasan_track alloc_track;
-       struct kasan_track free_track;
+       struct kasan_track free_track[KASAN_NR_FREE_STACKS];
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+       u8 free_pointer_tag[KASAN_NR_FREE_STACKS];
+       u8 free_track_idx;
+#endif
 };
 
 struct qlist_node {
@@ -146,6 +156,8 @@ void kasan_report(unsigned long addr, size_t size,
                bool is_write, unsigned long ip);
 void kasan_report_invalid_free(void *object, unsigned long ip);
 
+struct page *kasan_addr_to_page(const void *addr);
+
 #if defined(CONFIG_KASAN_GENERIC) && \
        (defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
 void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache);
index 0e5f965..6217821 100644 (file)
@@ -111,7 +111,7 @@ static void print_track(struct kasan_track *track, const char *prefix)
        }
 }
 
-static struct page *addr_to_page(const void *addr)
+struct page *kasan_addr_to_page(const void *addr)
 {
        if ((addr >= (void *)PAGE_OFFSET) &&
                        (addr < high_memory))
@@ -151,15 +151,38 @@ static void describe_object_addr(struct kmem_cache *cache, void *object,
                (void *)(object_addr + cache->object_size));
 }
 
+static struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
+               void *object, u8 tag)
+{
+       struct kasan_alloc_meta *alloc_meta;
+       int i = 0;
+
+       alloc_meta = get_alloc_info(cache, object);
+
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+       for (i = 0; i < KASAN_NR_FREE_STACKS; i++) {
+               if (alloc_meta->free_pointer_tag[i] == tag)
+                       break;
+       }
+       if (i == KASAN_NR_FREE_STACKS)
+               i = alloc_meta->free_track_idx;
+#endif
+
+       return &alloc_meta->free_track[i];
+}
+
 static void describe_object(struct kmem_cache *cache, void *object,
-                               const void *addr)
+                               const void *addr, u8 tag)
 {
        struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object);
 
        if (cache->flags & SLAB_KASAN) {
+               struct kasan_track *free_track;
+
                print_track(&alloc_info->alloc_track, "Allocated");
                pr_err("\n");
-               print_track(&alloc_info->free_track, "Freed");
+               free_track = kasan_get_free_track(cache, object, tag);
+               print_track(free_track, "Freed");
                pr_err("\n");
        }
 
@@ -344,9 +367,9 @@ static void print_address_stack_frame(const void *addr)
        print_decoded_frame_descr(frame_descr);
 }
 
-static void print_address_description(void *addr)
+static void print_address_description(void *addr, u8 tag)
 {
-       struct page *page = addr_to_page(addr);
+       struct page *page = kasan_addr_to_page(addr);
 
        dump_stack();
        pr_err("\n");
@@ -355,7 +378,7 @@ static void print_address_description(void *addr)
                struct kmem_cache *cache = page->slab_cache;
                void *object = nearest_obj(cache, page, addr);
 
-               describe_object(cache, object, addr);
+               describe_object(cache, object, addr, tag);
        }
 
        if (kernel_or_module_addr(addr) && !init_task_stack_addr(addr)) {
@@ -435,13 +458,14 @@ static bool report_enabled(void)
 void kasan_report_invalid_free(void *object, unsigned long ip)
 {
        unsigned long flags;
+       u8 tag = get_tag(object);
 
+       object = reset_tag(object);
        start_report(&flags);
        pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", (void *)ip);
-       print_tags(get_tag(object), reset_tag(object));
-       object = reset_tag(object);
+       print_tags(tag, object);
        pr_err("\n");
-       print_address_description(object);
+       print_address_description(object, tag);
        pr_err("\n");
        print_shadow_for_address(object);
        end_report(&flags);
@@ -479,7 +503,7 @@ void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned lon
        pr_err("\n");
 
        if (addr_has_shadow(untagged_addr)) {
-               print_address_description(untagged_addr);
+               print_address_description(untagged_addr, get_tag(tagged_addr));
                pr_err("\n");
                print_shadow_for_address(info.first_bad_addr);
        } else {
index 8eaf5f7..969ae08 100644 (file)
 
 const char *get_bug_type(struct kasan_access_info *info)
 {
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+       struct kasan_alloc_meta *alloc_meta;
+       struct kmem_cache *cache;
+       struct page *page;
+       const void *addr;
+       void *object;
+       u8 tag;
+       int i;
+
+       tag = get_tag(info->access_addr);
+       addr = reset_tag(info->access_addr);
+       page = kasan_addr_to_page(addr);
+       if (page && PageSlab(page)) {
+               cache = page->slab_cache;
+               object = nearest_obj(cache, page, (void *)addr);
+               alloc_meta = get_alloc_info(cache, object);
+
+               for (i = 0; i < KASAN_NR_FREE_STACKS; i++)
+                       if (alloc_meta->free_pointer_tag[i] == tag)
+                               return "use-after-free";
+               return "out-of-bounds";
+       }
+
+#endif
        return "invalid-access";
 }
 
index ccede24..f05d27b 100644 (file)
@@ -48,6 +48,7 @@ enum scan_result {
        SCAN_CGROUP_CHARGE_FAIL,
        SCAN_EXCEED_SWAP_PTE,
        SCAN_TRUNCATED,
+       SCAN_PAGE_HAS_PRIVATE,
 };
 
 #define CREATE_TRACE_POINTS
@@ -76,6 +77,8 @@ static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
 
 static struct kmem_cache *mm_slot_cache __read_mostly;
 
+#define MAX_PTE_MAPPED_THP 8
+
 /**
  * struct mm_slot - hash lookup from mm to mm_slot
  * @hash: hash collision list
@@ -86,6 +89,10 @@ struct mm_slot {
        struct hlist_node hash;
        struct list_head mm_node;
        struct mm_struct *mm;
+
+       /* pte-mapped THP in this mm */
+       int nr_pte_mapped_thp;
+       unsigned long pte_mapped_thp[MAX_PTE_MAPPED_THP];
 };
 
 /**
@@ -404,7 +411,11 @@ static bool hugepage_vma_check(struct vm_area_struct *vma,
            (vm_flags & VM_NOHUGEPAGE) ||
            test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
                return false;
-       if (shmem_file(vma->vm_file)) {
+
+       if (shmem_file(vma->vm_file) ||
+           (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+            vma->vm_file &&
+            (vm_flags & VM_DENYWRITE))) {
                if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
                        return false;
                return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
@@ -456,8 +467,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
        unsigned long hstart, hend;
 
        /*
-        * khugepaged does not yet work on non-shmem files or special
-        * mappings. And file-private shmem THP is not supported.
+        * khugepaged only supports read-only files for non-shmem files.
+        * khugepaged does not yet work on special mappings. And
+        * file-private shmem THP is not supported.
         */
        if (!hugepage_vma_check(vma, vm_flags))
                return 0;
@@ -1016,12 +1028,13 @@ static void collapse_huge_page(struct mm_struct *mm,
 
        anon_vma_lock_write(vma->anon_vma);
 
-       pte = pte_offset_map(pmd, address);
-       pte_ptl = pte_lockptr(mm, pmd);
-
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm,
                                address, address + HPAGE_PMD_SIZE);
        mmu_notifier_invalidate_range_start(&range);
+
+       pte = pte_offset_map(pmd, address);
+       pte_ptl = pte_lockptr(mm, pmd);
+
        pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */
        /*
         * After this gup_fast can't run anymore. This also removes
@@ -1248,6 +1261,159 @@ static void collect_mm_slot(struct mm_slot *mm_slot)
 }
 
 #if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE)
+/*
+ * Notify khugepaged that given addr of the mm is pte-mapped THP. Then
+ * khugepaged should try to collapse the page table.
+ */
+static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm,
+                                        unsigned long addr)
+{
+       struct mm_slot *mm_slot;
+
+       VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
+
+       spin_lock(&khugepaged_mm_lock);
+       mm_slot = get_mm_slot(mm);
+       if (likely(mm_slot && mm_slot->nr_pte_mapped_thp < MAX_PTE_MAPPED_THP))
+               mm_slot->pte_mapped_thp[mm_slot->nr_pte_mapped_thp++] = addr;
+       spin_unlock(&khugepaged_mm_lock);
+       return 0;
+}
+
+/**
+ * Try to collapse a pte-mapped THP for mm at address haddr.
+ *
+ * This function checks whether all the PTEs in the PMD are pointing to the
+ * right THP. If so, retract the page table so the THP can refault in with
+ * as pmd-mapped.
+ */
+void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+{
+       unsigned long haddr = addr & HPAGE_PMD_MASK;
+       struct vm_area_struct *vma = find_vma(mm, haddr);
+       struct page *hpage = NULL;
+       pte_t *start_pte, *pte;
+       pmd_t *pmd, _pmd;
+       spinlock_t *ptl;
+       int count = 0;
+       int i;
+
+       if (!vma || !vma->vm_file ||
+           vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE)
+               return;
+
+       /*
+        * This vm_flags may not have VM_HUGEPAGE if the page was not
+        * collapsed by this mm. But we can still collapse if the page is
+        * the valid THP. Add extra VM_HUGEPAGE so hugepage_vma_check()
+        * will not fail the vma for missing VM_HUGEPAGE
+        */
+       if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
+               return;
+
+       pmd = mm_find_pmd(mm, haddr);
+       if (!pmd)
+               return;
+
+       start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
+
+       /* step 1: check all mapped PTEs are to the right huge page */
+       for (i = 0, addr = haddr, pte = start_pte;
+            i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
+               struct page *page;
+
+               /* empty pte, skip */
+               if (pte_none(*pte))
+                       continue;
+
+               /* page swapped out, abort */
+               if (!pte_present(*pte))
+                       goto abort;
+
+               page = vm_normal_page(vma, addr, *pte);
+
+               if (!page || !PageCompound(page))
+                       goto abort;
+
+               if (!hpage) {
+                       hpage = compound_head(page);
+                       /*
+                        * The mapping of the THP should not change.
+                        *
+                        * Note that uprobe, debugger, or MAP_PRIVATE may
+                        * change the page table, but the new page will
+                        * not pass PageCompound() check.
+                        */
+                       if (WARN_ON(hpage->mapping != vma->vm_file->f_mapping))
+                               goto abort;
+               }
+
+               /*
+                * Confirm the page maps to the correct subpage.
+                *
+                * Note that uprobe, debugger, or MAP_PRIVATE may change
+                * the page table, but the new page will not pass
+                * PageCompound() check.
+                */
+               if (WARN_ON(hpage + i != page))
+                       goto abort;
+               count++;
+       }
+
+       /* step 2: adjust rmap */
+       for (i = 0, addr = haddr, pte = start_pte;
+            i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
+               struct page *page;
+
+               if (pte_none(*pte))
+                       continue;
+               page = vm_normal_page(vma, addr, *pte);
+               page_remove_rmap(page, false);
+       }
+
+       pte_unmap_unlock(start_pte, ptl);
+
+       /* step 3: set proper refcount and mm_counters. */
+       if (hpage) {
+               page_ref_sub(hpage, count);
+               add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count);
+       }
+
+       /* step 4: collapse pmd */
+       ptl = pmd_lock(vma->vm_mm, pmd);
+       _pmd = pmdp_collapse_flush(vma, addr, pmd);
+       spin_unlock(ptl);
+       mm_dec_nr_ptes(mm);
+       pte_free(mm, pmd_pgtable(_pmd));
+       return;
+
+abort:
+       pte_unmap_unlock(start_pte, ptl);
+}
+
+static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
+{
+       struct mm_struct *mm = mm_slot->mm;
+       int i;
+
+       if (likely(mm_slot->nr_pte_mapped_thp == 0))
+               return 0;
+
+       if (!down_write_trylock(&mm->mmap_sem))
+               return -EBUSY;
+
+       if (unlikely(khugepaged_test_exit(mm)))
+               goto out;
+
+       for (i = 0; i < mm_slot->nr_pte_mapped_thp; i++)
+               collapse_pte_mapped_thp(mm, mm_slot->pte_mapped_thp[i]);
+
+out:
+       mm_slot->nr_pte_mapped_thp = 0;
+       up_write(&mm->mmap_sem);
+       return 0;
+}
+
 static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 {
        struct vm_area_struct *vma;
@@ -1256,7 +1422,22 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 
        i_mmap_lock_write(mapping);
        vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
-               /* probably overkill */
+               /*
+                * Check vma->anon_vma to exclude MAP_PRIVATE mappings that
+                * got written to. These VMAs are likely not worth investing
+                * down_write(mmap_sem) as PMD-mapping is likely to be split
+                * later.
+                *
+                * Not that vma->anon_vma check is racy: it can be set up after
+                * the check but before we took mmap_sem by the fault path.
+                * But page lock would prevent establishing any new ptes of the
+                * page, so we are safe.
+                *
+                * An alternative would be drop the check, but check that page
+                * table is clear before calling pmdp_collapse_flush() under
+                * ptl. It has higher chance to recover THP for the VMA, but
+                * has higher cost too.
+                */
                if (vma->anon_vma)
                        continue;
                addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
@@ -1269,9 +1450,10 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
                        continue;
                /*
                 * We need exclusive mmap_sem to retract page table.
-                * If trylock fails we would end up with pte-mapped THP after
-                * re-fault. Not ideal, but it's more important to not disturb
-                * the system too much.
+                *
+                * We use trylock due to lock inversion: we need to acquire
+                * mmap_sem while holding page lock. Fault path does it in
+                * reverse order. Trylock is a way to avoid deadlock.
                 */
                if (down_write_trylock(&vma->vm_mm->mmap_sem)) {
                        spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
@@ -1281,18 +1463,21 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
                        up_write(&vma->vm_mm->mmap_sem);
                        mm_dec_nr_ptes(vma->vm_mm);
                        pte_free(vma->vm_mm, pmd_pgtable(_pmd));
+               } else {
+                       /* Try again later */
+                       khugepaged_add_pte_mapped_thp(vma->vm_mm, addr);
                }
        }
        i_mmap_unlock_write(mapping);
 }
 
 /**
- * collapse_shmem - collapse small tmpfs/shmem pages into huge one.
+ * collapse_file - collapse filemap/tmpfs/shmem pages into huge one.
  *
  * Basic scheme is simple, details are more complex:
  *  - allocate and lock a new huge page;
  *  - scan page cache replacing old pages with the new one
- *    + swap in pages if necessary;
+ *    + swap/gup in pages if necessary;
  *    + fill in gaps;
  *    + keep old pages around in case rollback is required;
  *  - if replacing succeeds:
@@ -1304,10 +1489,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
  *    + restore gaps in the page cache;
  *    + unlock and free huge page;
  */
-static void collapse_shmem(struct mm_struct *mm,
-               struct address_space *mapping, pgoff_t start,
+static void collapse_file(struct mm_struct *mm,
+               struct file *file, pgoff_t start,
                struct page **hpage, int node)
 {
+       struct address_space *mapping = file->f_mapping;
        gfp_t gfp;
        struct page *new_page;
        struct mem_cgroup *memcg;
@@ -1315,7 +1501,9 @@ static void collapse_shmem(struct mm_struct *mm,
        LIST_HEAD(pagelist);
        XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
        int nr_none = 0, result = SCAN_SUCCEED;
+       bool is_shmem = shmem_file(file);
 
+       VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
        VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
 
        /* Only allocate from the target node */
@@ -1347,7 +1535,8 @@ static void collapse_shmem(struct mm_struct *mm,
        } while (1);
 
        __SetPageLocked(new_page);
-       __SetPageSwapBacked(new_page);
+       if (is_shmem)
+               __SetPageSwapBacked(new_page);
        new_page->index = start;
        new_page->mapping = mapping;
 
@@ -1362,41 +1551,75 @@ static void collapse_shmem(struct mm_struct *mm,
                struct page *page = xas_next(&xas);
 
                VM_BUG_ON(index != xas.xa_index);
-               if (!page) {
-                       /*
-                        * Stop if extent has been truncated or hole-punched,
-                        * and is now completely empty.
-                        */
-                       if (index == start) {
-                               if (!xas_next_entry(&xas, end - 1)) {
-                                       result = SCAN_TRUNCATED;
+               if (is_shmem) {
+                       if (!page) {
+                               /*
+                                * Stop if extent has been truncated or
+                                * hole-punched, and is now completely
+                                * empty.
+                                */
+                               if (index == start) {
+                                       if (!xas_next_entry(&xas, end - 1)) {
+                                               result = SCAN_TRUNCATED;
+                                               goto xa_locked;
+                                       }
+                                       xas_set(&xas, index);
+                               }
+                               if (!shmem_charge(mapping->host, 1)) {
+                                       result = SCAN_FAIL;
                                        goto xa_locked;
                                }
-                               xas_set(&xas, index);
+                               xas_store(&xas, new_page);
+                               nr_none++;
+                               continue;
                        }
-                       if (!shmem_charge(mapping->host, 1)) {
-                               result = SCAN_FAIL;
+
+                       if (xa_is_value(page) || !PageUptodate(page)) {
+                               xas_unlock_irq(&xas);
+                               /* swap in or instantiate fallocated page */
+                               if (shmem_getpage(mapping->host, index, &page,
+                                                 SGP_NOHUGE)) {
+                                       result = SCAN_FAIL;
+                                       goto xa_unlocked;
+                               }
+                       } else if (trylock_page(page)) {
+                               get_page(page);
+                               xas_unlock_irq(&xas);
+                       } else {
+                               result = SCAN_PAGE_LOCK;
                                goto xa_locked;
                        }
-                       xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
-                       nr_none++;
-                       continue;
-               }
-
-               if (xa_is_value(page) || !PageUptodate(page)) {
-                       xas_unlock_irq(&xas);
-                       /* swap in or instantiate fallocated page */
-                       if (shmem_getpage(mapping->host, index, &page,
-                                               SGP_NOHUGE)) {
+               } else {        /* !is_shmem */
+                       if (!page || xa_is_value(page)) {
+                               xas_unlock_irq(&xas);
+                               page_cache_sync_readahead(mapping, &file->f_ra,
+                                                         file, index,
+                                                         PAGE_SIZE);
+                               /* drain pagevecs to help isolate_lru_page() */
+                               lru_add_drain();
+                               page = find_lock_page(mapping, index);
+                               if (unlikely(page == NULL)) {
+                                       result = SCAN_FAIL;
+                                       goto xa_unlocked;
+                               }
+                       } else if (!PageUptodate(page)) {
+                               xas_unlock_irq(&xas);
+                               wait_on_page_locked(page);
+                               if (!trylock_page(page)) {
+                                       result = SCAN_PAGE_LOCK;
+                                       goto xa_unlocked;
+                               }
+                               get_page(page);
+                       } else if (PageDirty(page)) {
                                result = SCAN_FAIL;
-                               goto xa_unlocked;
+                               goto xa_locked;
+                       } else if (trylock_page(page)) {
+                               get_page(page);
+                               xas_unlock_irq(&xas);
+                       } else {
+                               result = SCAN_PAGE_LOCK;
+                               goto xa_locked;
                        }
-               } else if (trylock_page(page)) {
-                       get_page(page);
-                       xas_unlock_irq(&xas);
-               } else {
-                       result = SCAN_PAGE_LOCK;
-                       goto xa_locked;
                }
 
                /*
@@ -1425,6 +1648,12 @@ static void collapse_shmem(struct mm_struct *mm,
                        goto out_unlock;
                }
 
+               if (page_has_private(page) &&
+                   !try_to_release_page(page, GFP_KERNEL)) {
+                       result = SCAN_PAGE_HAS_PRIVATE;
+                       goto out_unlock;
+               }
+
                if (page_mapped(page))
                        unmap_mapping_pages(mapping, index, 1, false);
 
@@ -1454,7 +1683,7 @@ static void collapse_shmem(struct mm_struct *mm,
                list_add_tail(&page->lru, &pagelist);
 
                /* Finally, replace with the new page. */
-               xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
+               xas_store(&xas, new_page);
                continue;
 out_unlock:
                unlock_page(page);
@@ -1462,12 +1691,20 @@ out_unlock:
                goto xa_unlocked;
        }
 
-       __inc_node_page_state(new_page, NR_SHMEM_THPS);
+       if (is_shmem)
+               __inc_node_page_state(new_page, NR_SHMEM_THPS);
+       else {
+               __inc_node_page_state(new_page, NR_FILE_THPS);
+               filemap_nr_thps_inc(mapping);
+       }
+
        if (nr_none) {
                struct zone *zone = page_zone(new_page);
 
                __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
-               __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+               if (is_shmem)
+                       __mod_node_page_state(zone->zone_pgdat,
+                                             NR_SHMEM, nr_none);
        }
 
 xa_locked:
@@ -1505,10 +1742,15 @@ xa_unlocked:
 
                SetPageUptodate(new_page);
                page_ref_add(new_page, HPAGE_PMD_NR - 1);
-               set_page_dirty(new_page);
                mem_cgroup_commit_charge(new_page, memcg, false, true);
+
+               if (is_shmem) {
+                       set_page_dirty(new_page);
+                       lru_cache_add_anon(new_page);
+               } else {
+                       lru_cache_add_file(new_page);
+               }
                count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1);
-               lru_cache_add_anon(new_page);
 
                /*
                 * Remove pte page tables, so we can re-fault the page as huge.
@@ -1523,7 +1765,9 @@ xa_unlocked:
                /* Something went wrong: roll back page cache changes */
                xas_lock_irq(&xas);
                mapping->nrpages -= nr_none;
-               shmem_uncharge(mapping->host, nr_none);
+
+               if (is_shmem)
+                       shmem_uncharge(mapping->host, nr_none);
 
                xas_set(&xas, start);
                xas_for_each(&xas, page, end - 1) {
@@ -1563,11 +1807,11 @@ out:
        /* TODO: tracepoints */
 }
 
-static void khugepaged_scan_shmem(struct mm_struct *mm,
-               struct address_space *mapping,
-               pgoff_t start, struct page **hpage)
+static void khugepaged_scan_file(struct mm_struct *mm,
+               struct file *file, pgoff_t start, struct page **hpage)
 {
        struct page *page = NULL;
+       struct address_space *mapping = file->f_mapping;
        XA_STATE(xas, &mapping->i_pages, start);
        int present, swap;
        int node = NUMA_NO_NODE;
@@ -1606,7 +1850,8 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
                        break;
                }
 
-               if (page_count(page) != 1 + page_mapcount(page)) {
+               if (page_count(page) !=
+                   1 + page_mapcount(page) + page_has_private(page)) {
                        result = SCAN_PAGE_COUNT;
                        break;
                }
@@ -1631,19 +1876,23 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
                        result = SCAN_EXCEED_NONE_PTE;
                } else {
                        node = khugepaged_find_target_node();
-                       collapse_shmem(mm, mapping, start, hpage, node);
+                       collapse_file(mm, file, start, hpage, node);
                }
        }
 
        /* TODO: tracepoints */
 }
 #else
-static void khugepaged_scan_shmem(struct mm_struct *mm,
-               struct address_space *mapping,
-               pgoff_t start, struct page **hpage)
+static void khugepaged_scan_file(struct mm_struct *mm,
+               struct file *file, pgoff_t start, struct page **hpage)
 {
        BUILD_BUG();
 }
+
+static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
+{
+       return 0;
+}
 #endif
 
 static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
@@ -1668,6 +1917,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
                khugepaged_scan.mm_slot = mm_slot;
        }
        spin_unlock(&khugepaged_mm_lock);
+       khugepaged_collapse_pte_mapped_thps(mm_slot);
 
        mm = mm_slot->mm;
        /*
@@ -1713,17 +1963,18 @@ skip:
                        VM_BUG_ON(khugepaged_scan.address < hstart ||
                                  khugepaged_scan.address + HPAGE_PMD_SIZE >
                                  hend);
-                       if (shmem_file(vma->vm_file)) {
+                       if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) {
                                struct file *file;
                                pgoff_t pgoff = linear_page_index(vma,
                                                khugepaged_scan.address);
-                               if (!shmem_huge_enabled(vma))
+
+                               if (shmem_file(vma->vm_file)
+                                   && !shmem_huge_enabled(vma))
                                        goto skip;
                                file = get_file(vma->vm_file);
                                up_read(&mm->mmap_sem);
                                ret = 1;
-                               khugepaged_scan_shmem(mm, file->f_mapping,
-                                               pgoff, hpage);
+                               khugepaged_scan_file(mm, file, pgoff, hpage);
                                fput(file);
                        } else {
                                ret = khugepaged_scan_pmd(mm, vma,
index f6e6029..2446076 100644 (file)
@@ -168,6 +168,8 @@ struct kmemleak_object {
 #define OBJECT_REPORTED                (1 << 1)
 /* flag set to not scan the object */
 #define OBJECT_NO_SCAN         (1 << 2)
+/* flag set to fully scan the object when scan_area allocation failed */
+#define OBJECT_FULL_SCAN       (1 << 3)
 
 #define HEX_PREFIX             "    "
 /* number of bytes to print per line; must be 16 or 32 */
@@ -183,6 +185,10 @@ struct kmemleak_object {
 static LIST_HEAD(object_list);
 /* the list of gray-colored objects (see color_gray comment below) */
 static LIST_HEAD(gray_list);
+/* memory pool allocation */
+static struct kmemleak_object mem_pool[CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE];
+static int mem_pool_free_count = ARRAY_SIZE(mem_pool);
+static LIST_HEAD(mem_pool_free_list);
 /* search tree for object boundaries */
 static struct rb_root object_tree_root = RB_ROOT;
 /* rw_lock protecting the access to object_list and object_tree_root */
@@ -193,13 +199,11 @@ static struct kmem_cache *object_cache;
 static struct kmem_cache *scan_area_cache;
 
 /* set if tracing memory operations is enabled */
-static int kmemleak_enabled;
+static int kmemleak_enabled = 1;
 /* same as above but only for the kmemleak_free() callback */
-static int kmemleak_free_enabled;
+static int kmemleak_free_enabled = 1;
 /* set in the late_initcall if there were no errors */
 static int kmemleak_initialized;
-/* enables or disables early logging of the memory operations */
-static int kmemleak_early_log = 1;
 /* set if a kmemleak warning was issued */
 static int kmemleak_warning;
 /* set if a fatal kmemleak error has occurred */
@@ -227,49 +231,6 @@ static bool kmemleak_found_leaks;
 static bool kmemleak_verbose;
 module_param_named(verbose, kmemleak_verbose, bool, 0600);
 
-/*
- * Early object allocation/freeing logging. Kmemleak is initialized after the
- * kernel allocator. However, both the kernel allocator and kmemleak may
- * allocate memory blocks which need to be tracked. Kmemleak defines an
- * arbitrary buffer to hold the allocation/freeing information before it is
- * fully initialized.
- */
-
-/* kmemleak operation type for early logging */
-enum {
-       KMEMLEAK_ALLOC,
-       KMEMLEAK_ALLOC_PERCPU,
-       KMEMLEAK_FREE,
-       KMEMLEAK_FREE_PART,
-       KMEMLEAK_FREE_PERCPU,
-       KMEMLEAK_NOT_LEAK,
-       KMEMLEAK_IGNORE,
-       KMEMLEAK_SCAN_AREA,
-       KMEMLEAK_NO_SCAN,
-       KMEMLEAK_SET_EXCESS_REF
-};
-
-/*
- * Structure holding the information passed to kmemleak callbacks during the
- * early logging.
- */
-struct early_log {
-       int op_type;                    /* kmemleak operation type */
-       int min_count;                  /* minimum reference count */
-       const void *ptr;                /* allocated/freed memory block */
-       union {
-               size_t size;            /* memory block size */
-               unsigned long excess_ref; /* surplus reference passing */
-       };
-       unsigned long trace[MAX_TRACE]; /* stack trace */
-       unsigned int trace_len;         /* stack trace length */
-};
-
-/* early logging buffer and current position */
-static struct early_log
-       early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE] __initdata;
-static int crt_early_log __initdata;
-
 static void kmemleak_disable(void);
 
 /*
@@ -449,6 +410,54 @@ static int get_object(struct kmemleak_object *object)
        return atomic_inc_not_zero(&object->use_count);
 }
 
+/*
+ * Memory pool allocation and freeing. kmemleak_lock must not be held.
+ */
+static struct kmemleak_object *mem_pool_alloc(gfp_t gfp)
+{
+       unsigned long flags;
+       struct kmemleak_object *object;
+
+       /* try the slab allocator first */
+       if (object_cache) {
+               object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp));
+               if (object)
+                       return object;
+       }
+
+       /* slab allocation failed, try the memory pool */
+       write_lock_irqsave(&kmemleak_lock, flags);
+       object = list_first_entry_or_null(&mem_pool_free_list,
+                                         typeof(*object), object_list);
+       if (object)
+               list_del(&object->object_list);
+       else if (mem_pool_free_count)
+               object = &mem_pool[--mem_pool_free_count];
+       else
+               pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n");
+       write_unlock_irqrestore(&kmemleak_lock, flags);
+
+       return object;
+}
+
+/*
+ * Return the object to either the slab allocator or the memory pool.
+ */
+static void mem_pool_free(struct kmemleak_object *object)
+{
+       unsigned long flags;
+
+       if (object < mem_pool || object >= mem_pool + ARRAY_SIZE(mem_pool)) {
+               kmem_cache_free(object_cache, object);
+               return;
+       }
+
+       /* add the object to the memory pool free list */
+       write_lock_irqsave(&kmemleak_lock, flags);
+       list_add(&object->object_list, &mem_pool_free_list);
+       write_unlock_irqrestore(&kmemleak_lock, flags);
+}
+
 /*
  * RCU callback to free a kmemleak_object.
  */
@@ -467,7 +476,7 @@ static void free_object_rcu(struct rcu_head *rcu)
                hlist_del(&area->node);
                kmem_cache_free(scan_area_cache, area);
        }
-       kmem_cache_free(object_cache, object);
+       mem_pool_free(object);
 }
 
 /*
@@ -485,7 +494,15 @@ static void put_object(struct kmemleak_object *object)
        /* should only get here after delete_object was called */
        WARN_ON(object->flags & OBJECT_ALLOCATED);
 
-       call_rcu(&object->rcu, free_object_rcu);
+       /*
+        * It may be too early for the RCU callbacks, however, there is no
+        * concurrent object_list traversal when !object_cache and all objects
+        * came from the memory pool. Free the object directly.
+        */
+       if (object_cache)
+               call_rcu(&object->rcu, free_object_rcu);
+       else
+               free_object_rcu(&object->rcu);
 }
 
 /*
@@ -509,6 +526,16 @@ static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
        return object;
 }
 
+/*
+ * Remove an object from the object_tree_root and object_list. Must be called
+ * with the kmemleak_lock held _if_ kmemleak is still enabled.
+ */
+static void __remove_object(struct kmemleak_object *object)
+{
+       rb_erase(&object->rb_node, &object_tree_root);
+       list_del_rcu(&object->object_list);
+}
+
 /*
  * Look up an object in the object search tree and remove it from both
  * object_tree_root and object_list. The returned object's use_count should be
@@ -521,10 +548,8 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali
 
        write_lock_irqsave(&kmemleak_lock, flags);
        object = lookup_object(ptr, alias);
-       if (object) {
-               rb_erase(&object->rb_node, &object_tree_root);
-               list_del_rcu(&object->object_list);
-       }
+       if (object)
+               __remove_object(object);
        write_unlock_irqrestore(&kmemleak_lock, flags);
 
        return object;
@@ -550,7 +575,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
        struct rb_node **link, *rb_parent;
        unsigned long untagged_ptr;
 
-       object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp));
+       object = mem_pool_alloc(gfp);
        if (!object) {
                pr_warn("Cannot allocate a kmemleak_object structure\n");
                kmemleak_disable();
@@ -689,9 +714,7 @@ static void delete_object_part(unsigned long ptr, size_t size)
        /*
         * Create one or two objects that may result from the memory block
         * split. Note that partial freeing is only done by free_bootmem() and
-        * this happens before kmemleak_init() is called. The path below is
-        * only executed during early log recording in kmemleak_init(), so
-        * GFP_KERNEL is enough.
+        * this happens before kmemleak_init() is called.
         */
        start = object->pointer;
        end = object->pointer + object->size;
@@ -763,7 +786,7 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
 {
        unsigned long flags;
        struct kmemleak_object *object;
-       struct kmemleak_scan_area *area;
+       struct kmemleak_scan_area *area = NULL;
 
        object = find_and_get_object(ptr, 1);
        if (!object) {
@@ -772,13 +795,16 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
                return;
        }
 
-       area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp));
-       if (!area) {
-               pr_warn("Cannot allocate a scan area\n");
-               goto out;
-       }
+       if (scan_area_cache)
+               area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp));
 
        spin_lock_irqsave(&object->lock, flags);
+       if (!area) {
+               pr_warn_once("Cannot allocate a scan area, scanning the full object\n");
+               /* mark the object for full scan to avoid false positives */
+               object->flags |= OBJECT_FULL_SCAN;
+               goto out_unlock;
+       }
        if (size == SIZE_MAX) {
                size = object->pointer + object->size - ptr;
        } else if (ptr + size > object->pointer + object->size) {
@@ -795,7 +821,6 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
        hlist_add_head(&area->node, &object->area_list);
 out_unlock:
        spin_unlock_irqrestore(&object->lock, flags);
-out:
        put_object(object);
 }
 
@@ -845,86 +870,6 @@ static void object_no_scan(unsigned long ptr)
        put_object(object);
 }
 
-/*
- * Log an early kmemleak_* call to the early_log buffer. These calls will be
- * processed later once kmemleak is fully initialized.
- */
-static void __init log_early(int op_type, const void *ptr, size_t size,
-                            int min_count)
-{
-       unsigned long flags;
-       struct early_log *log;
-
-       if (kmemleak_error) {
-               /* kmemleak stopped recording, just count the requests */
-               crt_early_log++;
-               return;
-       }
-
-       if (crt_early_log >= ARRAY_SIZE(early_log)) {
-               crt_early_log++;
-               kmemleak_disable();
-               return;
-       }
-
-       /*
-        * There is no need for locking since the kernel is still in UP mode
-        * at this stage. Disabling the IRQs is enough.
-        */
-       local_irq_save(flags);
-       log = &early_log[crt_early_log];
-       log->op_type = op_type;
-       log->ptr = ptr;
-       log->size = size;
-       log->min_count = min_count;
-       log->trace_len = __save_stack_trace(log->trace);
-       crt_early_log++;
-       local_irq_restore(flags);
-}
-
-/*
- * Log an early allocated block and populate the stack trace.
- */
-static void early_alloc(struct early_log *log)
-{
-       struct kmemleak_object *object;
-       unsigned long flags;
-       int i;
-
-       if (!kmemleak_enabled || !log->ptr || IS_ERR(log->ptr))
-               return;
-
-       /*
-        * RCU locking needed to ensure object is not freed via put_object().
-        */
-       rcu_read_lock();
-       object = create_object((unsigned long)log->ptr, log->size,
-                              log->min_count, GFP_ATOMIC);
-       if (!object)
-               goto out;
-       spin_lock_irqsave(&object->lock, flags);
-       for (i = 0; i < log->trace_len; i++)
-               object->trace[i] = log->trace[i];
-       object->trace_len = log->trace_len;
-       spin_unlock_irqrestore(&object->lock, flags);
-out:
-       rcu_read_unlock();
-}
-
-/*
- * Log an early allocated block and populate the stack trace.
- */
-static void early_alloc_percpu(struct early_log *log)
-{
-       unsigned int cpu;
-       const void __percpu *ptr = log->ptr;
-
-       for_each_possible_cpu(cpu) {
-               log->ptr = per_cpu_ptr(ptr, cpu);
-               early_alloc(log);
-       }
-}
-
 /**
  * kmemleak_alloc - register a newly allocated object
  * @ptr:       pointer to beginning of the object
@@ -946,8 +891,6 @@ void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
 
        if (kmemleak_enabled && ptr && !IS_ERR(ptr))
                create_object((unsigned long)ptr, size, min_count, gfp);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_ALLOC, ptr, size, min_count);
 }
 EXPORT_SYMBOL_GPL(kmemleak_alloc);
 
@@ -975,8 +918,6 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
                for_each_possible_cpu(cpu)
                        create_object((unsigned long)per_cpu_ptr(ptr, cpu),
                                      size, 0, gfp);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_ALLOC_PERCPU, ptr, size, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu);
 
@@ -1001,11 +942,6 @@ void __ref kmemleak_vmalloc(const struct vm_struct *area, size_t size, gfp_t gfp
                create_object((unsigned long)area->addr, size, 2, gfp);
                object_set_excess_ref((unsigned long)area,
                                      (unsigned long)area->addr);
-       } else if (kmemleak_early_log) {
-               log_early(KMEMLEAK_ALLOC, area->addr, size, 2);
-               /* reusing early_log.size for storing area->addr */
-               log_early(KMEMLEAK_SET_EXCESS_REF,
-                         area, (unsigned long)area->addr, 0);
        }
 }
 EXPORT_SYMBOL_GPL(kmemleak_vmalloc);
@@ -1023,8 +959,6 @@ void __ref kmemleak_free(const void *ptr)
 
        if (kmemleak_free_enabled && ptr && !IS_ERR(ptr))
                delete_object_full((unsigned long)ptr);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_FREE, ptr, 0, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free);
 
@@ -1043,8 +977,6 @@ void __ref kmemleak_free_part(const void *ptr, size_t size)
 
        if (kmemleak_enabled && ptr && !IS_ERR(ptr))
                delete_object_part((unsigned long)ptr, size);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_FREE_PART, ptr, size, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free_part);
 
@@ -1065,8 +997,6 @@ void __ref kmemleak_free_percpu(const void __percpu *ptr)
                for_each_possible_cpu(cpu)
                        delete_object_full((unsigned long)per_cpu_ptr(ptr,
                                                                      cpu));
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_FREE_PERCPU, ptr, 0, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free_percpu);
 
@@ -1117,8 +1047,6 @@ void __ref kmemleak_not_leak(const void *ptr)
 
        if (kmemleak_enabled && ptr && !IS_ERR(ptr))
                make_gray_object((unsigned long)ptr);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_not_leak);
 
@@ -1137,8 +1065,6 @@ void __ref kmemleak_ignore(const void *ptr)
 
        if (kmemleak_enabled && ptr && !IS_ERR(ptr))
                make_black_object((unsigned long)ptr);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_IGNORE, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_ignore);
 
@@ -1159,8 +1085,6 @@ void __ref kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp)
 
        if (kmemleak_enabled && ptr && size && !IS_ERR(ptr))
                add_scan_area((unsigned long)ptr, size, gfp);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_SCAN_AREA, ptr, size, 0);
 }
 EXPORT_SYMBOL(kmemleak_scan_area);
 
@@ -1179,8 +1103,6 @@ void __ref kmemleak_no_scan(const void *ptr)
 
        if (kmemleak_enabled && ptr && !IS_ERR(ptr))
                object_no_scan((unsigned long)ptr);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_no_scan);
 
@@ -1408,7 +1330,8 @@ static void scan_object(struct kmemleak_object *object)
        if (!(object->flags & OBJECT_ALLOCATED))
                /* already freed object */
                goto out;
-       if (hlist_empty(&object->area_list)) {
+       if (hlist_empty(&object->area_list) ||
+           object->flags & OBJECT_FULL_SCAN) {
                void *start = (void *)object->pointer;
                void *end = (void *)(object->pointer + object->size);
                void *next;
@@ -1919,12 +1842,16 @@ static const struct file_operations kmemleak_fops = {
 
 static void __kmemleak_do_cleanup(void)
 {
-       struct kmemleak_object *object;
+       struct kmemleak_object *object, *tmp;
 
-       rcu_read_lock();
-       list_for_each_entry_rcu(object, &object_list, object_list)
-               delete_object_full(object->pointer);
-       rcu_read_unlock();
+       /*
+        * Kmemleak has already been disabled, no need for RCU list traversal
+        * or kmemleak_lock held.
+        */
+       list_for_each_entry_safe(object, tmp, &object_list, object_list) {
+               __remove_object(object);
+               __delete_object(object);
+       }
 }
 
 /*
@@ -1966,7 +1893,6 @@ static void kmemleak_disable(void)
 
        /* stop any memory operation tracing */
        kmemleak_enabled = 0;
-       kmemleak_early_log = 0;
 
        /* check whether it is too early for a kernel thread */
        if (kmemleak_initialized)
@@ -1994,20 +1920,11 @@ static int __init kmemleak_boot_config(char *str)
 }
 early_param("kmemleak", kmemleak_boot_config);
 
-static void __init print_log_trace(struct early_log *log)
-{
-       pr_notice("Early log backtrace:\n");
-       stack_trace_print(log->trace, log->trace_len, 2);
-}
-
 /*
  * Kmemleak initialization.
  */
 void __init kmemleak_init(void)
 {
-       int i;
-       unsigned long flags;
-
 #ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF
        if (!kmemleak_skip_disable) {
                kmemleak_disable();
@@ -2015,28 +1932,15 @@ void __init kmemleak_init(void)
        }
 #endif
 
+       if (kmemleak_error)
+               return;
+
        jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
        jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
 
        object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE);
        scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE);
 
-       if (crt_early_log > ARRAY_SIZE(early_log))
-               pr_warn("Early log buffer exceeded (%d), please increase DEBUG_KMEMLEAK_EARLY_LOG_SIZE\n",
-                       crt_early_log);
-
-       /* the kernel is still in UP mode, so disabling the IRQs is enough */
-       local_irq_save(flags);
-       kmemleak_early_log = 0;
-       if (kmemleak_error) {
-               local_irq_restore(flags);
-               return;
-       } else {
-               kmemleak_enabled = 1;
-               kmemleak_free_enabled = 1;
-       }
-       local_irq_restore(flags);
-
        /* register the data/bss sections */
        create_object((unsigned long)_sdata, _edata - _sdata,
                      KMEMLEAK_GREY, GFP_ATOMIC);
@@ -2047,57 +1951,6 @@ void __init kmemleak_init(void)
                create_object((unsigned long)__start_ro_after_init,
                              __end_ro_after_init - __start_ro_after_init,
                              KMEMLEAK_GREY, GFP_ATOMIC);
-
-       /*
-        * This is the point where tracking allocations is safe. Automatic
-        * scanning is started during the late initcall. Add the early logged
-        * callbacks to the kmemleak infrastructure.
-        */
-       for (i = 0; i < crt_early_log; i++) {
-               struct early_log *log = &early_log[i];
-
-               switch (log->op_type) {
-               case KMEMLEAK_ALLOC:
-                       early_alloc(log);
-                       break;
-               case KMEMLEAK_ALLOC_PERCPU:
-                       early_alloc_percpu(log);
-                       break;
-               case KMEMLEAK_FREE:
-                       kmemleak_free(log->ptr);
-                       break;
-               case KMEMLEAK_FREE_PART:
-                       kmemleak_free_part(log->ptr, log->size);
-                       break;
-               case KMEMLEAK_FREE_PERCPU:
-                       kmemleak_free_percpu(log->ptr);
-                       break;
-               case KMEMLEAK_NOT_LEAK:
-                       kmemleak_not_leak(log->ptr);
-                       break;
-               case KMEMLEAK_IGNORE:
-                       kmemleak_ignore(log->ptr);
-                       break;
-               case KMEMLEAK_SCAN_AREA:
-                       kmemleak_scan_area(log->ptr, log->size, GFP_KERNEL);
-                       break;
-               case KMEMLEAK_NO_SCAN:
-                       kmemleak_no_scan(log->ptr);
-                       break;
-               case KMEMLEAK_SET_EXCESS_REF:
-                       object_set_excess_ref((unsigned long)log->ptr,
-                                             log->excess_ref);
-                       break;
-               default:
-                       kmemleak_warn("Unknown early log operation: %d\n",
-                                     log->op_type);
-               }
-
-               if (kmemleak_warning) {
-                       print_log_trace(log);
-                       kmemleak_warning = 0;
-               }
-       }
 }
 
 /*
@@ -2126,7 +1979,8 @@ static int __init kmemleak_late_init(void)
                mutex_unlock(&scan_mutex);
        }
 
-       pr_info("Kernel memory leak detector initialized\n");
+       pr_info("Kernel memory leak detector initialized (mem pool available: %d)\n",
+               mem_pool_free_count);
 
        return 0;
 }
index 3dc4346..dbee2eb 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1029,24 +1029,6 @@ static u32 calc_checksum(struct page *page)
        return checksum;
 }
 
-static int memcmp_pages(struct page *page1, struct page *page2)
-{
-       char *addr1, *addr2;
-       int ret;
-
-       addr1 = kmap_atomic(page1);
-       addr2 = kmap_atomic(page2);
-       ret = memcmp(addr1, addr2, PAGE_SIZE);
-       kunmap_atomic(addr2);
-       kunmap_atomic(addr1);
-       return ret;
-}
-
-static inline int pages_identical(struct page *page1, struct page *page2)
-{
-       return !memcmp_pages(page1, page2);
-}
-
 static int write_protect_page(struct vm_area_struct *vma, struct page *page,
                              pte_t *orig_pte)
 {
index 88babcc..2be9f3f 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/syscalls.h>
 #include <linux/mempolicy.h>
 #include <linux/page-isolation.h>
+#include <linux/page_idle.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/hugetlb.h>
 #include <linux/falloc.h>
 
 #include "internal.h"
 
+struct madvise_walk_private {
+       struct mmu_gather *tlb;
+       bool pageout;
+};
+
 /*
  * Any behaviour which results in changes to the vma->vm_flags needs to
  * take mmap_sem for writing. Others, which simply traverse vmas, need
@@ -42,6 +48,8 @@ static int madvise_need_mmap_write(int behavior)
        case MADV_REMOVE:
        case MADV_WILLNEED:
        case MADV_DONTNEED:
+       case MADV_COLD:
+       case MADV_PAGEOUT:
        case MADV_FREE:
                return 0;
        default:
@@ -107,28 +115,14 @@ static long madvise_behavior(struct vm_area_struct *vma,
        case MADV_MERGEABLE:
        case MADV_UNMERGEABLE:
                error = ksm_madvise(vma, start, end, behavior, &new_flags);
-               if (error) {
-                       /*
-                        * madvise() returns EAGAIN if kernel resources, such as
-                        * slab, are temporarily unavailable.
-                        */
-                       if (error == -ENOMEM)
-                               error = -EAGAIN;
-                       goto out;
-               }
+               if (error)
+                       goto out_convert_errno;
                break;
        case MADV_HUGEPAGE:
        case MADV_NOHUGEPAGE:
                error = hugepage_madvise(vma, &new_flags, behavior);
-               if (error) {
-                       /*
-                        * madvise() returns EAGAIN if kernel resources, such as
-                        * slab, are temporarily unavailable.
-                        */
-                       if (error == -ENOMEM)
-                               error = -EAGAIN;
-                       goto out;
-               }
+               if (error)
+                       goto out_convert_errno;
                break;
        }
 
@@ -154,15 +148,8 @@ static long madvise_behavior(struct vm_area_struct *vma,
                        goto out;
                }
                error = __split_vma(mm, vma, start, 1);
-               if (error) {
-                       /*
-                        * madvise() returns EAGAIN if kernel resources, such as
-                        * slab, are temporarily unavailable.
-                        */
-                       if (error == -ENOMEM)
-                               error = -EAGAIN;
-                       goto out;
-               }
+               if (error)
+                       goto out_convert_errno;
        }
 
        if (end != vma->vm_end) {
@@ -171,15 +158,8 @@ static long madvise_behavior(struct vm_area_struct *vma,
                        goto out;
                }
                error = __split_vma(mm, vma, end, 0);
-               if (error) {
-                       /*
-                        * madvise() returns EAGAIN if kernel resources, such as
-                        * slab, are temporarily unavailable.
-                        */
-                       if (error == -ENOMEM)
-                               error = -EAGAIN;
-                       goto out;
-               }
+               if (error)
+                       goto out_convert_errno;
        }
 
 success:
@@ -187,6 +167,14 @@ success:
         * vm_flags is protected by the mmap_sem held in write mode.
         */
        vma->vm_flags = new_flags;
+
+out_convert_errno:
+       /*
+        * madvise() returns EAGAIN if kernel resources, such as
+        * slab, are temporarily unavailable.
+        */
+       if (error == -ENOMEM)
+               error = -EAGAIN;
 out:
        return error;
 }
@@ -309,6 +297,254 @@ static long madvise_willneed(struct vm_area_struct *vma,
        return 0;
 }
 
+static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
+                               unsigned long addr, unsigned long end,
+                               struct mm_walk *walk)
+{
+       struct madvise_walk_private *private = walk->private;
+       struct mmu_gather *tlb = private->tlb;
+       bool pageout = private->pageout;
+       struct mm_struct *mm = tlb->mm;
+       struct vm_area_struct *vma = walk->vma;
+       pte_t *orig_pte, *pte, ptent;
+       spinlock_t *ptl;
+       struct page *page = NULL;
+       LIST_HEAD(page_list);
+
+       if (fatal_signal_pending(current))
+               return -EINTR;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       if (pmd_trans_huge(*pmd)) {
+               pmd_t orig_pmd;
+               unsigned long next = pmd_addr_end(addr, end);
+
+               tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
+               ptl = pmd_trans_huge_lock(pmd, vma);
+               if (!ptl)
+                       return 0;
+
+               orig_pmd = *pmd;
+               if (is_huge_zero_pmd(orig_pmd))
+                       goto huge_unlock;
+
+               if (unlikely(!pmd_present(orig_pmd))) {
+                       VM_BUG_ON(thp_migration_supported() &&
+                                       !is_pmd_migration_entry(orig_pmd));
+                       goto huge_unlock;
+               }
+
+               page = pmd_page(orig_pmd);
+               if (next - addr != HPAGE_PMD_SIZE) {
+                       int err;
+
+                       if (page_mapcount(page) != 1)
+                               goto huge_unlock;
+
+                       get_page(page);
+                       spin_unlock(ptl);
+                       lock_page(page);
+                       err = split_huge_page(page);
+                       unlock_page(page);
+                       put_page(page);
+                       if (!err)
+                               goto regular_page;
+                       return 0;
+               }
+
+               if (pmd_young(orig_pmd)) {
+                       pmdp_invalidate(vma, addr, pmd);
+                       orig_pmd = pmd_mkold(orig_pmd);
+
+                       set_pmd_at(mm, addr, pmd, orig_pmd);
+                       tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
+               }
+
+               ClearPageReferenced(page);
+               test_and_clear_page_young(page);
+               if (pageout) {
+                       if (!isolate_lru_page(page))
+                               list_add(&page->lru, &page_list);
+               } else
+                       deactivate_page(page);
+huge_unlock:
+               spin_unlock(ptl);
+               if (pageout)
+                       reclaim_pages(&page_list);
+               return 0;
+       }
+
+       if (pmd_trans_unstable(pmd))
+               return 0;
+regular_page:
+#endif
+       tlb_change_page_size(tlb, PAGE_SIZE);
+       orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+       flush_tlb_batched_pending(mm);
+       arch_enter_lazy_mmu_mode();
+       for (; addr < end; pte++, addr += PAGE_SIZE) {
+               ptent = *pte;
+
+               if (pte_none(ptent))
+                       continue;
+
+               if (!pte_present(ptent))
+                       continue;
+
+               page = vm_normal_page(vma, addr, ptent);
+               if (!page)
+                       continue;
+
+               /*
+                * Creating a THP page is expensive so split it only if we
+                * are sure it's worth. Split it if we are only owner.
+                */
+               if (PageTransCompound(page)) {
+                       if (page_mapcount(page) != 1)
+                               break;
+                       get_page(page);
+                       if (!trylock_page(page)) {
+                               put_page(page);
+                               break;
+                       }
+                       pte_unmap_unlock(orig_pte, ptl);
+                       if (split_huge_page(page)) {
+                               unlock_page(page);
+                               put_page(page);
+                               pte_offset_map_lock(mm, pmd, addr, &ptl);
+                               break;
+                       }
+                       unlock_page(page);
+                       put_page(page);
+                       pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+                       pte--;
+                       addr -= PAGE_SIZE;
+                       continue;
+               }
+
+               VM_BUG_ON_PAGE(PageTransCompound(page), page);
+
+               if (pte_young(ptent)) {
+                       ptent = ptep_get_and_clear_full(mm, addr, pte,
+                                                       tlb->fullmm);
+                       ptent = pte_mkold(ptent);
+                       set_pte_at(mm, addr, pte, ptent);
+                       tlb_remove_tlb_entry(tlb, pte, addr);
+               }
+
+               /*
+                * We are deactivating a page for accelerating reclaiming.
+                * VM couldn't reclaim the page unless we clear PG_young.
+                * As a side effect, it makes confuse idle-page tracking
+                * because they will miss recent referenced history.
+                */
+               ClearPageReferenced(page);
+               test_and_clear_page_young(page);
+               if (pageout) {
+                       if (!isolate_lru_page(page))
+                               list_add(&page->lru, &page_list);
+               } else
+                       deactivate_page(page);
+       }
+
+       arch_leave_lazy_mmu_mode();
+       pte_unmap_unlock(orig_pte, ptl);
+       if (pageout)
+               reclaim_pages(&page_list);
+       cond_resched();
+
+       return 0;
+}
+
+static const struct mm_walk_ops cold_walk_ops = {
+       .pmd_entry = madvise_cold_or_pageout_pte_range,
+};
+
+static void madvise_cold_page_range(struct mmu_gather *tlb,
+                            struct vm_area_struct *vma,
+                            unsigned long addr, unsigned long end)
+{
+       struct madvise_walk_private walk_private = {
+               .pageout = false,
+               .tlb = tlb,
+       };
+
+       tlb_start_vma(tlb, vma);
+       walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private);
+       tlb_end_vma(tlb, vma);
+}
+
+static long madvise_cold(struct vm_area_struct *vma,
+                       struct vm_area_struct **prev,
+                       unsigned long start_addr, unsigned long end_addr)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       struct mmu_gather tlb;
+
+       *prev = vma;
+       if (!can_madv_lru_vma(vma))
+               return -EINVAL;
+
+       lru_add_drain();
+       tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
+       madvise_cold_page_range(&tlb, vma, start_addr, end_addr);
+       tlb_finish_mmu(&tlb, start_addr, end_addr);
+
+       return 0;
+}
+
+static void madvise_pageout_page_range(struct mmu_gather *tlb,
+                            struct vm_area_struct *vma,
+                            unsigned long addr, unsigned long end)
+{
+       struct madvise_walk_private walk_private = {
+               .pageout = true,
+               .tlb = tlb,
+       };
+
+       tlb_start_vma(tlb, vma);
+       walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private);
+       tlb_end_vma(tlb, vma);
+}
+
+static inline bool can_do_pageout(struct vm_area_struct *vma)
+{
+       if (vma_is_anonymous(vma))
+               return true;
+       if (!vma->vm_file)
+               return false;
+       /*
+        * paging out pagecache only for non-anonymous mappings that correspond
+        * to the files the calling process could (if tried) open for writing;
+        * otherwise we'd be including shared non-exclusive mappings, which
+        * opens a side channel.
+        */
+       return inode_owner_or_capable(file_inode(vma->vm_file)) ||
+               inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
+}
+
+static long madvise_pageout(struct vm_area_struct *vma,
+                       struct vm_area_struct **prev,
+                       unsigned long start_addr, unsigned long end_addr)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       struct mmu_gather tlb;
+
+       *prev = vma;
+       if (!can_madv_lru_vma(vma))
+               return -EINVAL;
+
+       if (!can_do_pageout(vma))
+               return 0;
+
+       lru_add_drain();
+       tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
+       madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
+       tlb_finish_mmu(&tlb, start_addr, end_addr);
+
+       return 0;
+}
+
 static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
                                unsigned long end, struct mm_walk *walk)
 
@@ -513,7 +749,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
                                  int behavior)
 {
        *prev = vma;
-       if (!can_madv_dontneed_vma(vma))
+       if (!can_madv_lru_vma(vma))
                return -EINVAL;
 
        if (!userfaultfd_remove(vma, start, end)) {
@@ -535,7 +771,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
                         */
                        return -ENOMEM;
                }
-               if (!can_madv_dontneed_vma(vma))
+               if (!can_madv_lru_vma(vma))
                        return -EINVAL;
                if (end > vma->vm_end) {
                        /*
@@ -689,6 +925,10 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
                return madvise_remove(vma, prev, start, end);
        case MADV_WILLNEED:
                return madvise_willneed(vma, prev, start, end);
+       case MADV_COLD:
+               return madvise_cold(vma, prev, start, end);
+       case MADV_PAGEOUT:
+               return madvise_pageout(vma, prev, start, end);
        case MADV_FREE:
        case MADV_DONTNEED:
                return madvise_dontneed_free(vma, prev, start, end, behavior);
@@ -710,6 +950,8 @@ madvise_behavior_valid(int behavior)
        case MADV_WILLNEED:
        case MADV_DONTNEED:
        case MADV_FREE:
+       case MADV_COLD:
+       case MADV_PAGEOUT:
 #ifdef CONFIG_KSM
        case MADV_MERGEABLE:
        case MADV_UNMERGEABLE:
@@ -804,6 +1046,8 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
        size_t len;
        struct blk_plug plug;
 
+       start = untagged_addr(start);
+
        if (!madvise_behavior_valid(behavior))
                return error;
 
index 7d4f61a..c4b16ca 100644 (file)
@@ -1356,9 +1356,6 @@ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
                align = SMP_CACHE_BYTES;
        }
 
-       if (end > memblock.current_limit)
-               end = memblock.current_limit;
-
 again:
        found = memblock_find_in_range_node(size, align, start, end, nid,
                                            flags);
@@ -1469,6 +1466,9 @@ static void * __init memblock_alloc_internal(
        if (WARN_ON_ONCE(slab_is_available()))
                return kzalloc_node(size, GFP_NOWAIT, nid);
 
+       if (max_addr > memblock.current_limit)
+               max_addr = memblock.current_limit;
+
        alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid);
 
        /* retry allocation without lower limit */
index f3c15bb..37592dd 100644 (file)
@@ -57,6 +57,7 @@
 #include <linux/lockdep.h>
 #include <linux/file.h>
 #include <linux/tracehook.h>
+#include <linux/psi.h>
 #include <linux/seq_buf.h>
 #include "internal.h"
 #include <net/sock.h>
@@ -317,6 +318,7 @@ DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key);
 EXPORT_SYMBOL(memcg_kmem_enabled_key);
 
 struct workqueue_struct *memcg_kmem_cache_wq;
+#endif
 
 static int memcg_shrinker_map_size;
 static DEFINE_MUTEX(memcg_shrinker_map_mutex);
@@ -440,14 +442,6 @@ void memcg_set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
        }
 }
 
-#else /* CONFIG_MEMCG_KMEM */
-static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg)
-{
-       return 0;
-}
-static void memcg_free_shrinker_maps(struct mem_cgroup *memcg) { }
-#endif /* CONFIG_MEMCG_KMEM */
-
 /**
  * mem_cgroup_css_from_page - css of the memcg associated with a page
  * @page: page of interest
@@ -490,7 +484,7 @@ ino_t page_cgroup_ino(struct page *page)
        unsigned long ino = 0;
 
        rcu_read_lock();
-       if (PageHead(page) && PageSlab(page))
+       if (PageSlab(page) && !PageTail(page))
                memcg = memcg_from_slab_page(page);
        else
                memcg = READ_ONCE(page->mem_cgroup);
@@ -1573,6 +1567,11 @@ unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg)
        return max;
 }
 
+unsigned long mem_cgroup_size(struct mem_cgroup *memcg)
+{
+       return page_counter_read(&memcg->memory);
+}
+
 static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
                                     int order)
 {
@@ -2270,21 +2269,22 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
        for_each_online_cpu(cpu) {
                struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
                struct mem_cgroup *memcg;
+               bool flush = false;
 
+               rcu_read_lock();
                memcg = stock->cached;
-               if (!memcg || !stock->nr_pages || !css_tryget(&memcg->css))
-                       continue;
-               if (!mem_cgroup_is_descendant(memcg, root_memcg)) {
-                       css_put(&memcg->css);
-                       continue;
-               }
-               if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
+               if (memcg && stock->nr_pages &&
+                   mem_cgroup_is_descendant(memcg, root_memcg))
+                       flush = true;
+               rcu_read_unlock();
+
+               if (flush &&
+                   !test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
                        if (cpu == curcpu)
                                drain_local_stock(&stock->work);
                        else
                                schedule_work_on(cpu, &stock->work);
                }
-               css_put(&memcg->css);
        }
        put_cpu();
        mutex_unlock(&percpu_charge_mutex);
@@ -2358,12 +2358,68 @@ static void high_work_func(struct work_struct *work)
        reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL);
 }
 
+/*
+ * Clamp the maximum sleep time per allocation batch to 2 seconds. This is
+ * enough to still cause a significant slowdown in most cases, while still
+ * allowing diagnostics and tracing to proceed without becoming stuck.
+ */
+#define MEMCG_MAX_HIGH_DELAY_JIFFIES (2UL*HZ)
+
+/*
+ * When calculating the delay, we use these either side of the exponentiation to
+ * maintain precision and scale to a reasonable number of jiffies (see the table
+ * below.
+ *
+ * - MEMCG_DELAY_PRECISION_SHIFT: Extra precision bits while translating the
+ *   overage ratio to a delay.
+ * - MEMCG_DELAY_SCALING_SHIFT: The number of bits to scale down down the
+ *   proposed penalty in order to reduce to a reasonable number of jiffies, and
+ *   to produce a reasonable delay curve.
+ *
+ * MEMCG_DELAY_SCALING_SHIFT just happens to be a number that produces a
+ * reasonable delay curve compared to precision-adjusted overage, not
+ * penalising heavily at first, but still making sure that growth beyond the
+ * limit penalises misbehaviour cgroups by slowing them down exponentially. For
+ * example, with a high of 100 megabytes:
+ *
+ *  +-------+------------------------+
+ *  | usage | time to allocate in ms |
+ *  +-------+------------------------+
+ *  | 100M  |                      0 |
+ *  | 101M  |                      6 |
+ *  | 102M  |                     25 |
+ *  | 103M  |                     57 |
+ *  | 104M  |                    102 |
+ *  | 105M  |                    159 |
+ *  | 106M  |                    230 |
+ *  | 107M  |                    313 |
+ *  | 108M  |                    409 |
+ *  | 109M  |                    518 |
+ *  | 110M  |                    639 |
+ *  | 111M  |                    774 |
+ *  | 112M  |                    921 |
+ *  | 113M  |                   1081 |
+ *  | 114M  |                   1254 |
+ *  | 115M  |                   1439 |
+ *  | 116M  |                   1638 |
+ *  | 117M  |                   1849 |
+ *  | 118M  |                   2000 |
+ *  | 119M  |                   2000 |
+ *  | 120M  |                   2000 |
+ *  +-------+------------------------+
+ */
+ #define MEMCG_DELAY_PRECISION_SHIFT 20
+ #define MEMCG_DELAY_SCALING_SHIFT 14
+
 /*
  * Scheduled by try_charge() to be executed from the userland return path
  * and reclaims memory over the high limit.
  */
 void mem_cgroup_handle_over_high(void)
 {
+       unsigned long usage, high, clamped_high;
+       unsigned long pflags;
+       unsigned long penalty_jiffies, overage;
        unsigned int nr_pages = current->memcg_nr_pages_over_high;
        struct mem_cgroup *memcg;
 
@@ -2372,8 +2428,75 @@ void mem_cgroup_handle_over_high(void)
 
        memcg = get_mem_cgroup_from_mm(current->mm);
        reclaim_high(memcg, nr_pages, GFP_KERNEL);
-       css_put(&memcg->css);
        current->memcg_nr_pages_over_high = 0;
+
+       /*
+        * memory.high is breached and reclaim is unable to keep up. Throttle
+        * allocators proactively to slow down excessive growth.
+        *
+        * We use overage compared to memory.high to calculate the number of
+        * jiffies to sleep (penalty_jiffies). Ideally this value should be
+        * fairly lenient on small overages, and increasingly harsh when the
+        * memcg in question makes it clear that it has no intention of stopping
+        * its crazy behaviour, so we exponentially increase the delay based on
+        * overage amount.
+        */
+
+       usage = page_counter_read(&memcg->memory);
+       high = READ_ONCE(memcg->high);
+
+       if (usage <= high)
+               goto out;
+
+       /*
+        * Prevent division by 0 in overage calculation by acting as if it was a
+        * threshold of 1 page
+        */
+       clamped_high = max(high, 1UL);
+
+       overage = div_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
+                         clamped_high);
+
+       penalty_jiffies = ((u64)overage * overage * HZ)
+               >> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
+
+       /*
+        * Factor in the task's own contribution to the overage, such that four
+        * N-sized allocations are throttled approximately the same as one
+        * 4N-sized allocation.
+        *
+        * MEMCG_CHARGE_BATCH pages is nominal, so work out how much smaller or
+        * larger the current charge patch is than that.
+        */
+       penalty_jiffies = penalty_jiffies * nr_pages / MEMCG_CHARGE_BATCH;
+
+       /*
+        * Clamp the max delay per usermode return so as to still keep the
+        * application moving forwards and also permit diagnostics, albeit
+        * extremely slowly.
+        */
+       penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
+
+       /*
+        * Don't sleep if the amount of jiffies this memcg owes us is so low
+        * that it's not even worth doing, in an attempt to be nice to those who
+        * go only a small amount over their memory.high value and maybe haven't
+        * been aggressively reclaimed enough yet.
+        */
+       if (penalty_jiffies <= HZ / 100)
+               goto out;
+
+       /*
+        * If we exit early, we're guaranteed to die (since
+        * schedule_timeout_killable sets TASK_KILLABLE). This means we don't
+        * need to account for any ill-begotten jiffies to pay them off later.
+        */
+       psi_memstall_enter(&pflags);
+       schedule_timeout_killable(penalty_jiffies);
+       psi_memstall_leave(&pflags);
+
+out:
+       css_put(&memcg->css);
 }
 
 static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
@@ -2411,6 +2534,15 @@ retry:
                goto retry;
        }
 
+       /*
+        * Memcg doesn't have a dedicated reserve for atomic
+        * allocations. But like the global atomic pool, we need to
+        * put the burden of reclaim on regular allocation requests
+        * and let these go through as privileged allocations.
+        */
+       if (gfp_mask & __GFP_ATOMIC)
+               goto force;
+
        /*
         * Unlike in global OOM situations, memcg is not in a physical
         * memory shortage.  Allow dying and OOM-killed tasks to
@@ -2825,6 +2957,16 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
 
        if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
            !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
+
+               /*
+                * Enforce __GFP_NOFAIL allocation because callers are not
+                * prepared to see failures and likely do not have any failure
+                * handling code.
+                */
+               if (gfp & __GFP_NOFAIL) {
+                       page_counter_charge(&memcg->kmem, nr_pages);
+                       return 0;
+               }
                cancel_charge(memcg, nr_pages);
                return -ENOMEM;
        }
@@ -3512,6 +3654,9 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
                        ret = mem_cgroup_resize_max(memcg, nr_pages, true);
                        break;
                case _KMEM:
+                       pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. "
+                                    "Please report your usecase to linux-mm@kvack.org if you "
+                                    "depend on this functionality.\n");
                        ret = memcg_update_kmem_max(memcg, nr_pages);
                        break;
                case _TCP:
@@ -4805,11 +4950,6 @@ static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
        }
 }
 
-static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
-{
-       mem_cgroup_id_get_many(memcg, 1);
-}
-
 static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
 {
        mem_cgroup_id_put_many(memcg, 1);
@@ -4883,12 +5023,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 {
        int node;
 
-       /*
-        * Flush percpu vmstats and vmevents to guarantee the value correctness
-        * on parent's and all ancestor levels.
-        */
-       memcg_flush_percpu_vmstats(memcg, false);
-       memcg_flush_percpu_vmevents(memcg);
        for_each_node(node)
                free_mem_cgroup_per_node_info(memcg, node);
        free_percpu(memcg->vmstats_percpu);
@@ -4899,6 +5033,12 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 static void mem_cgroup_free(struct mem_cgroup *memcg)
 {
        memcg_wb_domain_exit(memcg);
+       /*
+        * Flush percpu vmstats and vmevents to guarantee the value correctness
+        * on parent's and all ancestor levels.
+        */
+       memcg_flush_percpu_vmstats(memcg, false);
+       memcg_flush_percpu_vmevents(memcg);
        __mem_cgroup_free(memcg);
 }
 
@@ -4954,6 +5094,11 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
        for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++)
                memcg->cgwb_frn[i].done =
                        __WB_COMPLETION_INIT(&memcg_cgwb_frn_waitq);
+#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       spin_lock_init(&memcg->deferred_split_queue.split_queue_lock);
+       INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
+       memcg->deferred_split_queue.split_queue_len = 0;
 #endif
        idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
        return memcg;
@@ -5284,6 +5429,8 @@ static int mem_cgroup_move_account(struct page *page,
                                   struct mem_cgroup *from,
                                   struct mem_cgroup *to)
 {
+       struct lruvec *from_vec, *to_vec;
+       struct pglist_data *pgdat;
        unsigned long flags;
        unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1;
        int ret;
@@ -5307,11 +5454,15 @@ static int mem_cgroup_move_account(struct page *page,
 
        anon = PageAnon(page);
 
+       pgdat = page_pgdat(page);
+       from_vec = mem_cgroup_lruvec(pgdat, from);
+       to_vec = mem_cgroup_lruvec(pgdat, to);
+
        spin_lock_irqsave(&from->move_lock, flags);
 
        if (!anon && page_mapped(page)) {
-               __mod_memcg_state(from, NR_FILE_MAPPED, -nr_pages);
-               __mod_memcg_state(to, NR_FILE_MAPPED, nr_pages);
+               __mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages);
+               __mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages);
        }
 
        /*
@@ -5323,16 +5474,24 @@ static int mem_cgroup_move_account(struct page *page,
                struct address_space *mapping = page_mapping(page);
 
                if (mapping_cap_account_dirty(mapping)) {
-                       __mod_memcg_state(from, NR_FILE_DIRTY, -nr_pages);
-                       __mod_memcg_state(to, NR_FILE_DIRTY, nr_pages);
+                       __mod_lruvec_state(from_vec, NR_FILE_DIRTY, -nr_pages);
+                       __mod_lruvec_state(to_vec, NR_FILE_DIRTY, nr_pages);
                }
        }
 
        if (PageWriteback(page)) {
-               __mod_memcg_state(from, NR_WRITEBACK, -nr_pages);
-               __mod_memcg_state(to, NR_WRITEBACK, nr_pages);
+               __mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages);
+               __mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages);
        }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       if (compound && !list_empty(page_deferred_list(page))) {
+               spin_lock(&from->deferred_split_queue.split_queue_lock);
+               list_del_init(page_deferred_list(page));
+               from->deferred_split_queue.split_queue_len--;
+               spin_unlock(&from->deferred_split_queue.split_queue_lock);
+       }
+#endif
        /*
         * It is safe to change page->mem_cgroup here because the page
         * is referenced, charged, and isolated - we can't race with
@@ -5341,6 +5500,17 @@ static int mem_cgroup_move_account(struct page *page,
 
        /* caller should have done css_get */
        page->mem_cgroup = to;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       if (compound && list_empty(page_deferred_list(page))) {
+               spin_lock(&to->deferred_split_queue.split_queue_lock);
+               list_add_tail(page_deferred_list(page),
+                             &to->deferred_split_queue.split_queue);
+               to->deferred_split_queue.split_queue_len++;
+               spin_unlock(&to->deferred_split_queue.split_queue_lock);
+       }
+#endif
+
        spin_unlock_irqrestore(&from->move_lock, flags);
 
        ret = 0;
@@ -6511,7 +6681,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
                unsigned int nr_pages = 1;
 
                if (PageTransHuge(page)) {
-                       nr_pages <<= compound_order(page);
+                       nr_pages = compound_nr(page);
                        ug->nr_huge += nr_pages;
                }
                if (PageAnon(page))
@@ -6523,7 +6693,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
                }
                ug->pgpgout++;
        } else {
-               ug->nr_kmem += 1 << compound_order(page);
+               ug->nr_kmem += compound_nr(page);
                __ClearPageKmemcg(page);
        }
 
index 650e65a..2647c89 100644 (file)
@@ -39,6 +39,7 @@ static void memfd_tag_pins(struct xa_state *xas)
        xas_for_each(xas, page, ULONG_MAX) {
                if (xa_is_value(page))
                        continue;
+               page = find_subpage(page, xas->xa_index);
                if (page_count(page) - page_mapcount(page) > 1)
                        xas_set_mark(xas, MEMFD_TAG_PINNED);
 
@@ -88,6 +89,7 @@ static int memfd_wait_for_pins(struct address_space *mapping)
                        bool clear = true;
                        if (xa_is_value(page))
                                continue;
+                       page = find_subpage(page, xas.xa_index);
                        if (page_count(page) - page_mapcount(page) != 1) {
                                /*
                                 * On the last scan, we clean up all those tags
index 7ef849d..3151c87 100644 (file)
@@ -199,7 +199,6 @@ struct to_kill {
        struct task_struct *tsk;
        unsigned long addr;
        short size_shift;
-       char addr_valid;
 };
 
 /*
@@ -324,22 +323,27 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
                }
        }
        tk->addr = page_address_in_vma(p, vma);
-       tk->addr_valid = 1;
        if (is_zone_device_page(p))
                tk->size_shift = dev_pagemap_mapping_shift(p, vma);
        else
                tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
 
        /*
-        * In theory we don't have to kill when the page was
-        * munmaped. But it could be also a mremap. Since that's
-        * likely very rare kill anyways just out of paranoia, but use
-        * a SIGKILL because the error is not contained anymore.
+        * Send SIGKILL if "tk->addr == -EFAULT". Also, as
+        * "tk->size_shift" is always non-zero for !is_zone_device_page(),
+        * so "tk->size_shift == 0" effectively checks no mapping on
+        * ZONE_DEVICE. Indeed, when a devdax page is mmapped N times
+        * to a process' address space, it's possible not all N VMAs
+        * contain mappings for the page, but at least one VMA does.
+        * Only deliver SIGBUS with payload derived from the VMA that
+        * has a mapping for the page.
         */
-       if (tk->addr == -EFAULT || tk->size_shift == 0) {
+       if (tk->addr == -EFAULT) {
                pr_info("Memory failure: Unable to find user space address %lx in %s\n",
                        page_to_pfn(p), tsk->comm);
-               tk->addr_valid = 0;
+       } else if (tk->size_shift == 0) {
+               kfree(tk);
+               return;
        }
        get_task_struct(tsk);
        tk->tsk = tsk;
@@ -366,7 +370,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
                         * make sure the process doesn't catch the
                         * signal and then access the memory. Just kill it.
                         */
-                       if (fail || tk->addr_valid == 0) {
+                       if (fail || tk->addr == -EFAULT) {
                                pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
                                       pfn, tk->tsk->comm, tk->tsk->pid);
                                do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
@@ -1253,17 +1257,19 @@ int memory_failure(unsigned long pfn, int flags)
        if (!sysctl_memory_failure_recovery)
                panic("Memory failure on page %lx", pfn);
 
-       if (!pfn_valid(pfn)) {
+       p = pfn_to_online_page(pfn);
+       if (!p) {
+               if (pfn_valid(pfn)) {
+                       pgmap = get_dev_pagemap(pfn, NULL);
+                       if (pgmap)
+                               return memory_failure_dev_pagemap(pfn, flags,
+                                                                 pgmap);
+               }
                pr_err("Memory failure: %#lx: memory outside kernel control\n",
                        pfn);
                return -ENXIO;
        }
 
-       pgmap = get_dev_pagemap(pfn, NULL);
-       if (pgmap)
-               return memory_failure_dev_pagemap(pfn, flags, pgmap);
-
-       p = pfn_to_page(pfn);
        if (PageHuge(p))
                return memory_failure_hugetlb(pfn, flags);
        if (TestSetPageHWPoison(p)) {
index b1dff75..b1ca51a 100644 (file)
@@ -518,7 +518,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
                 (long long)pte_val(pte), (long long)pmd_val(*pmd));
        if (page)
                dump_page(page, "bad pte");
-       pr_alert("addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
+       pr_alert("addr:%px vm_flags:%08lx anon_vma:%px mapping:%px index:%lx\n",
                 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
        pr_alert("file:%pD fault:%ps mmap:%ps readpage:%ps\n",
                 vma->vm_file,
@@ -1026,6 +1026,9 @@ again:
                if (pte_none(ptent))
                        continue;
 
+               if (need_resched())
+                       break;
+
                if (pte_present(ptent)) {
                        struct page *page;
 
@@ -1093,7 +1096,6 @@ again:
                if (unlikely(details))
                        continue;
 
-               entry = pte_to_swp_entry(ptent);
                if (!non_swap_entry(entry))
                        rss[MM_SWAPENTS]--;
                else if (is_migration_entry(entry)) {
@@ -1124,8 +1126,11 @@ again:
        if (force_flush) {
                force_flush = 0;
                tlb_flush_mmu(tlb);
-               if (addr != end)
-                       goto again;
+       }
+
+       if (addr != end) {
+               cond_resched();
+               goto again;
        }
 
        return addr;
index c73f099..07e5c67 100644 (file)
@@ -436,67 +436,33 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
        zone_span_writeunlock(zone);
 }
 
-static void shrink_pgdat_span(struct pglist_data *pgdat,
-                             unsigned long start_pfn, unsigned long end_pfn)
+static void update_pgdat_span(struct pglist_data *pgdat)
 {
-       unsigned long pgdat_start_pfn = pgdat->node_start_pfn;
-       unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */
-       unsigned long pgdat_end_pfn = p;
-       unsigned long pfn;
-       int nid = pgdat->node_id;
-
-       if (pgdat_start_pfn == start_pfn) {
-               /*
-                * If the section is smallest section in the pgdat, it need
-                * shrink pgdat->node_start_pfn and pgdat->node_spanned_pages.
-                * In this case, we find second smallest valid mem_section
-                * for shrinking zone.
-                */
-               pfn = find_smallest_section_pfn(nid, NULL, end_pfn,
-                                               pgdat_end_pfn);
-               if (pfn) {
-                       pgdat->node_start_pfn = pfn;
-                       pgdat->node_spanned_pages = pgdat_end_pfn - pfn;
-               }
-       } else if (pgdat_end_pfn == end_pfn) {
-               /*
-                * If the section is biggest section in the pgdat, it need
-                * shrink pgdat->node_spanned_pages.
-                * In this case, we find second biggest valid mem_section for
-                * shrinking zone.
-                */
-               pfn = find_biggest_section_pfn(nid, NULL, pgdat_start_pfn,
-                                              start_pfn);
-               if (pfn)
-                       pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1;
-       }
+       unsigned long node_start_pfn = 0, node_end_pfn = 0;
+       struct zone *zone;
 
-       /*
-        * If the section is not biggest or smallest mem_section in the pgdat,
-        * it only creates a hole in the pgdat. So in this case, we need not
-        * change the pgdat.
-        * But perhaps, the pgdat has only hole data. Thus it check the pgdat
-        * has only hole or not.
-        */
-       pfn = pgdat_start_pfn;
-       for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SUBSECTION) {
-               if (unlikely(!pfn_valid(pfn)))
-                       continue;
+       for (zone = pgdat->node_zones;
+            zone < pgdat->node_zones + MAX_NR_ZONES; zone++) {
+               unsigned long zone_end_pfn = zone->zone_start_pfn +
+                                            zone->spanned_pages;
 
-               if (pfn_to_nid(pfn) != nid)
+               /* No need to lock the zones, they can't change. */
+               if (!zone->spanned_pages)
                        continue;
-
-               /* Skip range to be removed */
-               if (pfn >= start_pfn && pfn < end_pfn)
+               if (!node_end_pfn) {
+                       node_start_pfn = zone->zone_start_pfn;
+                       node_end_pfn = zone_end_pfn;
                        continue;
+               }
 
-               /* If we find valid section, we have nothing to do */
-               return;
+               if (zone_end_pfn > node_end_pfn)
+                       node_end_pfn = zone_end_pfn;
+               if (zone->zone_start_pfn < node_start_pfn)
+                       node_start_pfn = zone->zone_start_pfn;
        }
 
-       /* The pgdat has no valid section */
-       pgdat->node_start_pfn = 0;
-       pgdat->node_spanned_pages = 0;
+       pgdat->node_start_pfn = node_start_pfn;
+       pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
 }
 
 static void __remove_zone(struct zone *zone, unsigned long start_pfn,
@@ -507,7 +473,7 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn,
 
        pgdat_resize_lock(zone->zone_pgdat, &flags);
        shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
-       shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages);
+       update_pgdat_span(pgdat);
        pgdat_resize_unlock(zone->zone_pgdat, &flags);
 }
 
@@ -632,33 +598,30 @@ static void generic_online_page(struct page *page, unsigned int order)
 #endif
 }
 
-static int online_pages_blocks(unsigned long start, unsigned long nr_pages)
-{
-       unsigned long end = start + nr_pages;
-       int order, onlined_pages = 0;
-
-       while (start < end) {
-               order = min(MAX_ORDER - 1,
-                       get_order(PFN_PHYS(end) - PFN_PHYS(start)));
-               (*online_page_callback)(pfn_to_page(start), order);
-
-               onlined_pages += (1UL << order);
-               start += (1UL << order);
-       }
-       return onlined_pages;
-}
-
 static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
                        void *arg)
 {
-       unsigned long onlined_pages = *(unsigned long *)arg;
+       const unsigned long end_pfn = start_pfn + nr_pages;
+       unsigned long pfn;
+       int order;
 
-       if (PageReserved(pfn_to_page(start_pfn)))
-               onlined_pages += online_pages_blocks(start_pfn, nr_pages);
+       /*
+        * Online the pages. The callback might decide to keep some pages
+        * PG_reserved (to add them to the buddy later), but we still account
+        * them as being online/belonging to this zone ("present").
+        */
+       for (pfn = start_pfn; pfn < end_pfn; pfn += 1ul << order) {
+               order = min(MAX_ORDER - 1, get_order(PFN_PHYS(end_pfn - pfn)));
+               /* __free_pages_core() wants pfns to be aligned to the order */
+               if (WARN_ON_ONCE(!IS_ALIGNED(pfn, 1ul << order)))
+                       order = 0;
+               (*online_page_callback)(pfn_to_page(pfn), order);
+       }
 
-       online_mem_sections(start_pfn, start_pfn + nr_pages);
+       /* mark all involved sections as online */
+       online_mem_sections(start_pfn, end_pfn);
 
-       *(unsigned long *)arg = onlined_pages;
+       *(unsigned long *)arg += nr_pages;
        return 0;
 }
 
@@ -714,8 +677,13 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
                pgdat->node_start_pfn = start_pfn;
 
        pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
-}
 
+}
+/*
+ * Associate the pfn range with the given zone, initializing the memmaps
+ * and resizing the pgdat/zone data to span the added pages. After this
+ * call, all affected pages are PG_reserved.
+ */
 void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
                unsigned long nr_pages, struct vmem_altmap *altmap)
 {
@@ -804,20 +772,6 @@ struct zone * zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
        return default_zone_for_pfn(nid, start_pfn, nr_pages);
 }
 
-/*
- * Associates the given pfn range with the given node and the zone appropriate
- * for the given online type.
- */
-static struct zone * __meminit move_pfn_range(int online_type, int nid,
-               unsigned long start_pfn, unsigned long nr_pages)
-{
-       struct zone *zone;
-
-       zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
-       move_pfn_range_to_zone(zone, start_pfn, nr_pages, NULL);
-       return zone;
-}
-
 int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
 {
        unsigned long flags;
@@ -840,7 +794,8 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
        put_device(&mem->dev);
 
        /* associate pfn range with the zone */
-       zone = move_pfn_range(online_type, nid, pfn, nr_pages);
+       zone = zone_for_pfn_range(online_type, nid, pfn, nr_pages);
+       move_pfn_range_to_zone(zone, pfn, nr_pages, NULL);
 
        arg.start_pfn = pfn;
        arg.nr_pages = nr_pages;
@@ -864,6 +819,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
        ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
                online_pages_range);
        if (ret) {
+               /* not a single memory resource was applicable */
                if (need_zonelists_rebuild)
                        zone_pcp_reset(zone);
                goto failed_addition;
@@ -877,27 +833,22 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 
        shuffle_zone(zone);
 
-       if (onlined_pages) {
-               node_states_set_node(nid, &arg);
-               if (need_zonelists_rebuild)
-                       build_all_zonelists(NULL);
-               else
-                       zone_pcp_update(zone);
-       }
+       node_states_set_node(nid, &arg);
+       if (need_zonelists_rebuild)
+               build_all_zonelists(NULL);
+       else
+               zone_pcp_update(zone);
 
        init_per_zone_wmark_min();
 
-       if (onlined_pages) {
-               kswapd_run(nid);
-               kcompactd_run(nid);
-       }
+       kswapd_run(nid);
+       kcompactd_run(nid);
 
        vm_total_pages = nr_free_pagecache_pages();
 
        writeback_set_ratelimit();
 
-       if (onlined_pages)
-               memory_notify(MEM_ONLINE, &arg);
+       memory_notify(MEM_ONLINE, &arg);
        mem_hotplug_done();
        return 0;
 
@@ -933,8 +884,11 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
                if (!pgdat)
                        return NULL;
 
+               pgdat->per_cpu_nodestats =
+                       alloc_percpu(struct per_cpu_nodestat);
                arch_refresh_nodedata(nid, pgdat);
        } else {
+               int cpu;
                /*
                 * Reset the nr_zones, order and classzone_idx before reuse.
                 * Note that kswapd will init kswapd_classzone_idx properly
@@ -943,6 +897,12 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
                pgdat->nr_zones = 0;
                pgdat->kswapd_order = 0;
                pgdat->kswapd_classzone_idx = 0;
+               for_each_online_cpu(cpu) {
+                       struct per_cpu_nodestat *p;
+
+                       p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
+                       memset(p, 0, sizeof(*p));
+               }
        }
 
        /* we can use NODE_DATA(nid) from here */
@@ -952,7 +912,6 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 
        /* init node's zones as empty zones, we don't have any present pages.*/
        free_area_init_core_hotplug(nid);
-       pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
 
        /*
         * The node we allocated has no zone fallback lists. For avoiding
@@ -1309,7 +1268,7 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
                head = compound_head(page);
                if (page_huge_active(head))
                        return pfn;
-               skip = (1 << compound_order(head)) - (page - head);
+               skip = compound_nr(head) - (page - head);
                pfn += skip - 1;
        }
        return 0;
@@ -1347,7 +1306,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 
                if (PageHuge(page)) {
                        struct page *head = compound_head(page);
-                       pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
+                       pfn = page_to_pfn(head) + compound_nr(head) - 1;
                        isolate_huge_page(head, &source);
                        continue;
                } else if (PageTransHuge(page))
@@ -1662,7 +1621,7 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
                phys_addr_t beginpa, endpa;
 
                beginpa = PFN_PHYS(section_nr_to_pfn(mem->start_section_nr));
-               endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1;
+               endpa = beginpa + memory_block_size_bytes() - 1;
                pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n",
                        &beginpa, &endpa);
 
@@ -1800,7 +1759,7 @@ void __remove_memory(int nid, u64 start, u64 size)
 {
 
        /*
-        * trigger BUG() is some memory is not offlined prior to calling this
+        * trigger BUG() if some memory is not offlined prior to calling this
         * function
         */
        if (try_remove_memory(nid, start, size))
index f000771..4ae967b 100644 (file)
@@ -1179,8 +1179,8 @@ static struct page *new_page(struct page *page, unsigned long start)
        } else if (PageTransHuge(page)) {
                struct page *thp;
 
-               thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
-                               address, numa_node_id());
+               thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
+                                        HPAGE_PMD_ORDER);
                if (!thp)
                        return NULL;
                prep_transhuge_page(thp);
@@ -1405,6 +1405,7 @@ static long kernel_mbind(unsigned long start, unsigned long len,
        int err;
        unsigned short mode_flags;
 
+       start = untagged_addr(start);
        mode_flags = mode & MPOL_MODE_FLAGS;
        mode &= ~MPOL_MODE_FLAGS;
        if (mode >= MPOL_MAX)
@@ -1512,10 +1513,6 @@ static int kernel_migrate_pages(pid_t pid, unsigned long maxnode,
        if (nodes_empty(*new))
                goto out_put;
 
-       nodes_and(*new, *new, node_states[N_MEMORY]);
-       if (nodes_empty(*new))
-               goto out_put;
-
        err = security_task_movememory(task);
        if (err)
                goto out_put;
@@ -1562,6 +1559,8 @@ static int kernel_get_mempolicy(int __user *policy,
        int uninitialized_var(pval);
        nodemask_t nodes;
 
+       addr = untagged_addr(addr);
+
        if (nmask != NULL && maxnode < nr_node_ids)
                return -EINVAL;
 
@@ -1733,7 +1732,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
  * freeing by another task.  It is the caller's responsibility to free the
  * extra reference for shared policies.
  */
-struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
                                                unsigned long addr)
 {
        struct mempolicy *pol = __get_vma_policy(vma, addr);
@@ -2082,6 +2081,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  *     @vma:  Pointer to VMA or NULL if not available.
  *     @addr: Virtual Address of the allocation. Must be inside the VMA.
  *     @node: Which node to prefer for allocation (modulo policy).
+ *     @hugepage: for hugepages try only the preferred node if possible
  *
  *     This function allocates a page from the kernel page pool and applies
  *     a NUMA policy associated with the VMA or the current process.
@@ -2092,7 +2092,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  */
 struct page *
 alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-               unsigned long addr, int node)
+               unsigned long addr, int node, bool hugepage)
 {
        struct mempolicy *pol;
        struct page *page;
@@ -2110,6 +2110,42 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
                goto out;
        }
 
+       if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
+               int hpage_node = node;
+
+               /*
+                * For hugepage allocation and non-interleave policy which
+                * allows the current node (or other explicitly preferred
+                * node) we only try to allocate from the current/preferred
+                * node and don't fall back to other nodes, as the cost of
+                * remote accesses would likely offset THP benefits.
+                *
+                * If the policy is interleave, or does not allow the current
+                * node in its nodemask, we allocate the standard way.
+                */
+               if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
+                       hpage_node = pol->v.preferred_node;
+
+               nmask = policy_nodemask(gfp, pol);
+               if (!nmask || node_isset(hpage_node, *nmask)) {
+                       mpol_cond_put(pol);
+                       page = __alloc_pages_node(hpage_node,
+                                               gfp | __GFP_THISNODE, order);
+
+                       /*
+                        * If hugepage allocations are configured to always
+                        * synchronous compact or the vma has been madvised
+                        * to prefer hugepage backing, retry allowing remote
+                        * memory as well.
+                        */
+                       if (!page && (gfp & __GFP_DIRECT_RECLAIM))
+                               page = __alloc_pages_node(hpage_node,
+                                               gfp | __GFP_NORETRY, order);
+
+                       goto out;
+               }
+       }
+
        nmask = policy_nodemask(gfp, pol);
        preferred_nid = policy_node(gfp, pol, node);
        page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
index 32c79b5..03ccbdf 100644 (file)
@@ -13,8 +13,6 @@
 #include <linux/xarray.h>
 
 static DEFINE_XARRAY(pgmap_array);
-#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
-#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
 
 #ifdef CONFIG_DEV_PAGEMAP_OPS
 DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
@@ -105,6 +103,7 @@ static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
 void memunmap_pages(struct dev_pagemap *pgmap)
 {
        struct resource *res = &pgmap->res;
+       struct page *first_page;
        unsigned long pfn;
        int nid;
 
@@ -113,14 +112,16 @@ void memunmap_pages(struct dev_pagemap *pgmap)
                put_page(pfn_to_page(pfn));
        dev_pagemap_cleanup(pgmap);
 
+       /* make sure to access a memmap that was actually initialized */
+       first_page = pfn_to_page(pfn_first(pgmap));
+
        /* pages are dead and unused, undo the arch mapping */
-       nid = page_to_nid(pfn_to_page(PHYS_PFN(res->start)));
+       nid = page_to_nid(first_page);
 
        mem_hotplug_begin();
        if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
-               pfn = PHYS_PFN(res->start);
-               __remove_pages(page_zone(pfn_to_page(pfn)), pfn,
-                                PHYS_PFN(resource_size(res)), NULL);
+               __remove_pages(page_zone(first_page), PHYS_PFN(res->start),
+                              PHYS_PFN(resource_size(res)), NULL);
        } else {
                arch_remove_memory(nid, res->start, resource_size(res),
                                pgmap_altmap(pgmap));
index 9f4ed4e..4fe45d1 100644 (file)
@@ -460,7 +460,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 
                for (i = 1; i < HPAGE_PMD_NR; i++) {
                        xas_next(&xas);
-                       xas_store(&xas, newpage + i);
+                       xas_store(&xas, newpage);
                }
        }
 
@@ -1612,7 +1612,7 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
                        goto out_flush;
                if (get_user(node, nodes + i))
                        goto out_flush;
-               addr = (unsigned long)p;
+               addr = (unsigned long)untagged_addr(p);
 
                err = -ENODEV;
                if (node < 0 || node >= MAX_NUMNODES)
@@ -1892,7 +1892,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
        VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
 
        /* Avoid migrating to a node that is nearly full */
-       if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
+       if (!migrate_balanced_pgdat(pgdat, compound_nr(page)))
                return 0;
 
        if (isolate_lru_page(page))
@@ -2218,17 +2218,15 @@ again:
                pte_t pte;
 
                pte = *ptep;
-               pfn = pte_pfn(pte);
 
                if (pte_none(pte)) {
                        mpfn = MIGRATE_PFN_MIGRATE;
                        migrate->cpages++;
-                       pfn = 0;
                        goto next;
                }
 
                if (!pte_present(pte)) {
-                       mpfn = pfn = 0;
+                       mpfn = 0;
 
                        /*
                         * Only care about unaddressable device page special
@@ -2245,10 +2243,10 @@ again:
                        if (is_write_device_private_entry(entry))
                                mpfn |= MIGRATE_PFN_WRITE;
                } else {
+                       pfn = pte_pfn(pte);
                        if (is_zero_pfn(pfn)) {
                                mpfn = MIGRATE_PFN_MIGRATE;
                                migrate->cpages++;
-                               pfn = 0;
                                goto next;
                        }
                        page = vm_normal_page(migrate->vma, addr, pte);
@@ -2258,10 +2256,9 @@ again:
 
                /* FIXME support THP */
                if (!page || !page->mapping || PageTransCompound(page)) {
-                       mpfn = pfn = 0;
+                       mpfn = 0;
                        goto next;
                }
-               pfn = page_to_pfn(page);
 
                /*
                 * By getting a reference on the page we pin it and that blocks
index f9a9dbe..49b6fa2 100644 (file)
@@ -256,6 +256,8 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
        unsigned long pages;
        unsigned char *tmp;
 
+       start = untagged_addr(start);
+
        /* Check the start address: needs to be page-aligned.. */
        if (start & ~PAGE_MASK)
                return -EINVAL;
index a90099d..a72c1ee 100644 (file)
@@ -674,6 +674,8 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla
        unsigned long lock_limit;
        int error = -ENOMEM;
 
+       start = untagged_addr(start);
+
        if (!can_do_mlock())
                return -EPERM;
 
@@ -735,6 +737,8 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
 {
        int ret;
 
+       start = untagged_addr(start);
+
        len = PAGE_ALIGN(len + (offset_in_page(start)));
        start &= PAGE_MASK;
 
index 6bc21fc..a7d8c84 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -201,6 +201,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
        bool downgraded = false;
        LIST_HEAD(uf);
 
+       brk = untagged_addr(brk);
+
        if (down_write_killable(&mm->mmap_sem))
                return -EINTR;
 
@@ -289,9 +291,9 @@ out:
        return retval;
 }
 
-static long vma_compute_subtree_gap(struct vm_area_struct *vma)
+static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
 {
-       unsigned long max, prev_end, subtree_gap;
+       unsigned long gap, prev_end;
 
        /*
         * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
@@ -299,14 +301,21 @@ static long vma_compute_subtree_gap(struct vm_area_struct *vma)
         * an unmapped area; whereas when expanding we only require one.
         * That's a little inconsistent, but keeps the code here simpler.
         */
-       max = vm_start_gap(vma);
+       gap = vm_start_gap(vma);
        if (vma->vm_prev) {
                prev_end = vm_end_gap(vma->vm_prev);
-               if (max > prev_end)
-                       max -= prev_end;
+               if (gap > prev_end)
+                       gap -= prev_end;
                else
-                       max = 0;
+                       gap = 0;
        }
+       return gap;
+}
+
+#ifdef CONFIG_DEBUG_VM_RB
+static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma)
+{
+       unsigned long max = vma_compute_gap(vma), subtree_gap;
        if (vma->vm_rb.rb_left) {
                subtree_gap = rb_entry(vma->vm_rb.rb_left,
                                struct vm_area_struct, vm_rb)->rb_subtree_gap;
@@ -322,7 +331,6 @@ static long vma_compute_subtree_gap(struct vm_area_struct *vma)
        return max;
 }
 
-#ifdef CONFIG_DEBUG_VM_RB
 static int browse_rb(struct mm_struct *mm)
 {
        struct rb_root *root = &mm->mm_rb;
@@ -428,8 +436,9 @@ static void validate_mm(struct mm_struct *mm)
 #define validate_mm(mm) do { } while (0)
 #endif
 
-RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
-                    unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
+RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks,
+                        struct vm_area_struct, vm_rb,
+                        unsigned long, rb_subtree_gap, vma_compute_gap)
 
 /*
  * Update augmented rbtree rb_subtree_gap values after vma->vm_start or
@@ -439,8 +448,8 @@ RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
 static void vma_gap_update(struct vm_area_struct *vma)
 {
        /*
-        * As it turns out, RB_DECLARE_CALLBACKS() already created a callback
-        * function that does exactly what we want.
+        * As it turns out, RB_DECLARE_CALLBACKS_MAX() already created
+        * a callback function that does exactly what we want.
         */
        vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
 }
@@ -1358,6 +1367,9 @@ static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
        if (S_ISBLK(inode->i_mode))
                return MAX_LFS_FILESIZE;
 
+       if (S_ISSOCK(inode->i_mode))
+               return MAX_LFS_FILESIZE;
+
        /* Special "we do even unsigned file positions" case */
        if (file->f_mode & FMODE_UNSIGNED_OFFSET)
                return 0;
@@ -1577,6 +1589,8 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
        struct file *file = NULL;
        unsigned long retval;
 
+       addr = untagged_addr(addr);
+
        if (!(flags & MAP_ANONYMOUS)) {
                audit_mmap_fd(fd, flags);
                file = fget(fd);
@@ -2274,12 +2288,9 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
        if (vma) {
                *pprev = vma->vm_prev;
        } else {
-               struct rb_node *rb_node = mm->mm_rb.rb_node;
-               *pprev = NULL;
-               while (rb_node) {
-                       *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
-                       rb_node = rb_node->rb_right;
-               }
+               struct rb_node *rb_node = rb_last(&mm->mm_rb);
+
+               *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL;
        }
        return vma;
 }
@@ -2878,6 +2889,7 @@ EXPORT_SYMBOL(vm_munmap);
 
 SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
 {
+       addr = untagged_addr(addr);
        profile_munmap(addr);
        return __vm_munmap(addr, len, true);
 }
index 8c943a6..7d70e5c 100644 (file)
@@ -271,8 +271,6 @@ void tlb_finish_mmu(struct mmu_gather *tlb,
 
        tlb_flush_mmu(tlb);
 
-       /* keep the page table cache within bounds */
-       check_pgt_cache();
 #ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
        tlb_batch_list_free(tlb);
 #endif
index 7fde886..9a889e4 100644 (file)
@@ -180,7 +180,7 @@ int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
                                        mn->ops->invalidate_range_start, _ret,
                                        !mmu_notifier_range_blockable(range) ? "non-" : "");
                                WARN_ON(mmu_notifier_range_blockable(range) ||
-                                       ret != -EAGAIN);
+                                       _ret != -EAGAIN);
                                ret = _ret;
                        }
                }
index 675e5d3..7967825 100644 (file)
@@ -459,6 +459,8 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
        const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
                                (prot & PROT_READ);
 
+       start = untagged_addr(start);
+
        prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
        if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
                return -EINVAL;
index fc241d2..1fc8a29 100644 (file)
@@ -606,6 +606,9 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
        LIST_HEAD(uf_unmap_early);
        LIST_HEAD(uf_unmap);
 
+       addr = untagged_addr(addr);
+       new_addr = untagged_addr(new_addr);
+
        if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
                return ret;
 
index ef30a42..c3bd3e7 100644 (file)
@@ -37,6 +37,8 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
        int unmapped_error = 0;
        int error = -EINVAL;
 
+       start = untagged_addr(start);
+
        if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
                goto out;
        if (offset_in_page(start))
index fed1b6e..99b7ec3 100644 (file)
@@ -108,7 +108,7 @@ unsigned int kobjsize(const void *objp)
         * The ksize() function is only guaranteed to work for pointers
         * returned by kmalloc(). So handle arbitrary pointers here.
         */
-       return PAGE_SIZE << compound_order(page);
+       return page_size(page);
 }
 
 /**
index eda2e2a..71e3ace 100644 (file)
@@ -73,7 +73,7 @@ static inline bool is_memcg_oom(struct oom_control *oc)
 /**
  * oom_cpuset_eligible() - check task eligiblity for kill
  * @start: task struct of which task to consider
- * @mask: nodemask passed to page allocator for mempolicy ooms
+ * @oc: pointer to struct oom_control
  *
  * Task eligibility is determined by whether or not a candidate task, @tsk,
  * shares the same mempolicy nodes as current if it is bound by such a policy
@@ -287,7 +287,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
            !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
                oc->totalpages = total_swap_pages;
                for_each_node_mask(nid, *oc->nodemask)
-                       oc->totalpages += node_spanned_pages(nid);
+                       oc->totalpages += node_present_pages(nid);
                return CONSTRAINT_MEMORY_POLICY;
        }
 
@@ -300,7 +300,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
        if (cpuset_limited) {
                oc->totalpages = total_swap_pages;
                for_each_node_mask(nid, cpuset_current_mems_allowed)
-                       oc->totalpages += node_spanned_pages(nid);
+                       oc->totalpages += node_present_pages(nid);
                return CONSTRAINT_CPUSET;
        }
        return CONSTRAINT_NONE;
@@ -523,7 +523,7 @@ bool __oom_reap_task_mm(struct mm_struct *mm)
        set_bit(MMF_UNSTABLE, &mm->flags);
 
        for (vma = mm->mmap ; vma; vma = vma->vm_next) {
-               if (!can_madv_dontneed_vma(vma))
+               if (!can_madv_lru_vma(vma))
                        continue;
 
                /*
@@ -884,12 +884,13 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
         */
        do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
        mark_oom_victim(victim);
-       pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
-               message, task_pid_nr(victim), victim->comm,
-               K(victim->mm->total_vm),
-               K(get_mm_counter(victim->mm, MM_ANONPAGES)),
-               K(get_mm_counter(victim->mm, MM_FILEPAGES)),
-               K(get_mm_counter(victim->mm, MM_SHMEMPAGES)));
+       pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u pgtables:%lukB oom_score_adj:%hd\n",
+               message, task_pid_nr(victim), victim->comm, K(mm->total_vm),
+               K(get_mm_counter(mm, MM_ANONPAGES)),
+               K(get_mm_counter(mm, MM_FILEPAGES)),
+               K(get_mm_counter(mm, MM_SHMEMPAGES)),
+               from_kuid(&init_user_ns, task_uid(victim)),
+               mm_pgtables_bytes(mm), victim->signal->oom_score_adj);
        task_unlock(victim);
 
        /*
@@ -1068,9 +1069,10 @@ bool out_of_memory(struct oom_control *oc)
         * The OOM killer does not compensate for IO-less reclaim.
         * pagefault_out_of_memory lost its gfp context so we have to
         * make sure exclude 0 mask - all other users should have at least
-        * ___GFP_DIRECT_RECLAIM to get here.
+        * ___GFP_DIRECT_RECLAIM to get here. But mem_cgroup_oom() has to
+        * invoke the OOM killer even if it is a GFP_NOFS allocation.
         */
-       if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS))
+       if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
                return true;
 
        /*
index ff5484f..f391c0c 100644 (file)
@@ -670,6 +670,7 @@ out:
 
 void free_compound_page(struct page *page)
 {
+       mem_cgroup_uncharge(page);
        __free_pages_ok(page, compound_order(page));
 }
 
@@ -1174,11 +1175,17 @@ static __always_inline bool free_pages_prepare(struct page *page,
                debug_check_no_obj_freed(page_address(page),
                                           PAGE_SIZE << order);
        }
-       arch_free_page(page, order);
        if (want_init_on_free())
                kernel_init_free_pages(page, 1 << order);
 
        kernel_poison_pages(page, 1 << order, 0);
+       /*
+        * arch_free_page() can make the page's contents inaccessible.  s390
+        * does this.  So nothing which can access the page's contents should
+        * happen after this.
+        */
+       arch_free_page(page, order);
+
        if (debug_pagealloc_enabled())
                kernel_map_pages(page, 1 << order, 0);
 
@@ -1940,6 +1947,14 @@ void __init page_alloc_init_late(void)
        /* Block until all are initialised */
        wait_for_completion(&pgdat_init_all_done_comp);
 
+       /*
+        * The number of managed pages has changed due to the initialisation
+        * so the pcpu batch and high limits needs to be updated or the limits
+        * will be artificially small.
+        */
+       for_each_populated_zone(zone)
+               zone_pcp_update(zone);
+
        /*
         * We initialized the rest of the deferred pages.  Permanently disable
         * on-demand struct page initialization.
@@ -3713,10 +3728,6 @@ try_this_zone:
 static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask)
 {
        unsigned int filter = SHOW_MEM_FILTER_NODES;
-       static DEFINE_RATELIMIT_STATE(show_mem_rs, HZ, 1);
-
-       if (!__ratelimit(&show_mem_rs))
-               return;
 
        /*
         * This documents exceptions given to allocations in certain
@@ -3737,8 +3748,7 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
 {
        struct va_format vaf;
        va_list args;
-       static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL,
-                                     DEFAULT_RATELIMIT_BURST);
+       static DEFINE_RATELIMIT_STATE(nopage_rs, 10*HZ, 1);
 
        if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
                return;
@@ -3954,15 +3964,23 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
        if (compaction_failed(compact_result))
                goto check_priority;
 
+       /*
+        * compaction was skipped because there are not enough order-0 pages
+        * to work with, so we retry only if it looks like reclaim can help.
+        */
+       if (compaction_needs_reclaim(compact_result)) {
+               ret = compaction_zonelist_suitable(ac, order, alloc_flags);
+               goto out;
+       }
+
        /*
         * make sure the compaction wasn't deferred or didn't bail out early
         * due to locks contention before we declare that we should give up.
-        * But do not retry if the given zonelist is not suitable for
-        * compaction.
+        * But the next retry should use a higher priority if allowed, so
+        * we don't just keep bailing out endlessly.
         */
        if (compaction_withdrawn(compact_result)) {
-               ret = compaction_zonelist_suitable(ac, order, alloc_flags);
-               goto out;
+               goto check_priority;
        }
 
        /*
@@ -4458,6 +4476,30 @@ retry_cpuset:
                if (page)
                        goto got_pg;
 
+                if (order >= pageblock_order && (gfp_mask & __GFP_IO) &&
+                    !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
+                       /*
+                        * If allocating entire pageblock(s) and compaction
+                        * failed because all zones are below low watermarks
+                        * or is prohibited because it recently failed at this
+                        * order, fail immediately unless the allocator has
+                        * requested compaction and reclaim retry.
+                        *
+                        * Reclaim is
+                        *  - potentially very expensive because zones are far
+                        *    below their low watermarks or this is part of very
+                        *    bursty high order allocations,
+                        *  - not guaranteed to help because isolate_freepages()
+                        *    may not iterate over freed pages as part of its
+                        *    linear scan, and
+                        *  - unlikely to make entire pageblocks free on its
+                        *    own.
+                        */
+                       if (compact_result == COMPACT_SKIPPED ||
+                           compact_result == COMPACT_DEFERRED)
+                               goto nopage;
+               }
+
                /*
                 * Checks for costly allocations with __GFP_NORETRY, which
                 * includes THP page fault allocations
@@ -6638,9 +6680,11 @@ static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void pgdat_init_split_queue(struct pglist_data *pgdat)
 {
-       spin_lock_init(&pgdat->split_queue_lock);
-       INIT_LIST_HEAD(&pgdat->split_queue);
-       pgdat->split_queue_len = 0;
+       struct deferred_split *ds_queue = &pgdat->deferred_split_queue;
+
+       spin_lock_init(&ds_queue->split_queue_lock);
+       INIT_LIST_HEAD(&ds_queue->split_queue);
+       ds_queue->split_queue_len = 0;
 }
 #else
 static void pgdat_init_split_queue(struct pglist_data *pgdat) {}
@@ -8196,7 +8240,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
                        if (!hugepage_migration_supported(page_hstate(head)))
                                goto unmovable;
 
-                       skip_pages = (1 << compound_order(head)) - (page - head);
+                       skip_pages = compound_nr(head) - (page - head);
                        iter += skip_pages - 1;
                        continue;
                }
@@ -8473,7 +8517,6 @@ void free_contig_range(unsigned long pfn, unsigned int nr_pages)
        WARN(count != 0, "%d pages are still in use!\n", count);
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG
 /*
  * The zone indicated has a new number of managed_pages; batch sizes and percpu
  * page high values need to be recalulated.
@@ -8487,7 +8530,6 @@ void __meminit zone_pcp_update(struct zone *zone)
                                per_cpu_ptr(zone->pageset, cpu));
        mutex_unlock(&pcp_batch_high_lock);
 }
-#endif
 
 void zone_pcp_reset(struct zone *zone)
 {
index 5f5769c..4ade843 100644 (file)
@@ -67,8 +67,9 @@ static struct page_ext_operations *page_ext_ops[] = {
 #endif
 };
 
+unsigned long page_ext_size = sizeof(struct page_ext);
+
 static unsigned long total_usage;
-static unsigned long extra_mem;
 
 static bool __init invoke_need_callbacks(void)
 {
@@ -78,9 +79,8 @@ static bool __init invoke_need_callbacks(void)
 
        for (i = 0; i < entries; i++) {
                if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
-                       page_ext_ops[i]->offset = sizeof(struct page_ext) +
-                                               extra_mem;
-                       extra_mem += page_ext_ops[i]->size;
+                       page_ext_ops[i]->offset = page_ext_size;
+                       page_ext_size += page_ext_ops[i]->size;
                        need = true;
                }
        }
@@ -99,14 +99,9 @@ static void __init invoke_init_callbacks(void)
        }
 }
 
-static unsigned long get_entry_size(void)
-{
-       return sizeof(struct page_ext) + extra_mem;
-}
-
 static inline struct page_ext *get_entry(void *base, unsigned long index)
 {
-       return base + get_entry_size() * index;
+       return base + page_ext_size * index;
 }
 
 #if !defined(CONFIG_SPARSEMEM)
@@ -156,7 +151,7 @@ static int __init alloc_node_page_ext(int nid)
                !IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
                nr_pages += MAX_ORDER_NR_PAGES;
 
-       table_size = get_entry_size() * nr_pages;
+       table_size = page_ext_size * nr_pages;
 
        base = memblock_alloc_try_nid(
                        table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
@@ -234,7 +229,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
        if (section->page_ext)
                return 0;
 
-       table_size = get_entry_size() * PAGES_PER_SECTION;
+       table_size = page_ext_size * PAGES_PER_SECTION;
        base = alloc_page_ext(table_size, nid);
 
        /*
@@ -254,7 +249,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
         * we need to apply a mask.
         */
        pfn &= PAGE_SECTION_MASK;
-       section->page_ext = (void *)base - get_entry_size() * pfn;
+       section->page_ext = (void *)base - page_ext_size * pfn;
        total_usage += table_size;
        return 0;
 }
@@ -267,7 +262,7 @@ static void free_page_ext(void *addr)
                struct page *page = virt_to_page(addr);
                size_t table_size;
 
-               table_size = get_entry_size() * PAGES_PER_SECTION;
+               table_size = page_ext_size * PAGES_PER_SECTION;
 
                BUG_ON(PageReserved(page));
                kmemleak_free(addr);
index addcbb2..18ecde9 100644 (file)
@@ -24,9 +24,10 @@ struct page_owner {
        short last_migrate_reason;
        gfp_t gfp_mask;
        depot_stack_handle_t handle;
+       depot_stack_handle_t free_handle;
 };
 
-static bool page_owner_disabled = true;
+static bool page_owner_enabled = false;
 DEFINE_STATIC_KEY_FALSE(page_owner_inited);
 
 static depot_stack_handle_t dummy_handle;
@@ -41,7 +42,7 @@ static int __init early_page_owner_param(char *buf)
                return -EINVAL;
 
        if (strcmp(buf, "on") == 0)
-               page_owner_disabled = false;
+               page_owner_enabled = true;
 
        return 0;
 }
@@ -49,10 +50,7 @@ early_param("page_owner", early_page_owner_param);
 
 static bool need_page_owner(void)
 {
-       if (page_owner_disabled)
-               return false;
-
-       return true;
+       return page_owner_enabled;
 }
 
 static __always_inline depot_stack_handle_t create_dummy_stack(void)
@@ -81,7 +79,7 @@ static noinline void register_early_stack(void)
 
 static void init_page_owner(void)
 {
-       if (page_owner_disabled)
+       if (!page_owner_enabled)
                return;
 
        register_dummy_stack();
@@ -102,19 +100,6 @@ static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
        return (void *)page_ext + page_owner_ops.offset;
 }
 
-void __reset_page_owner(struct page *page, unsigned int order)
-{
-       int i;
-       struct page_ext *page_ext;
-
-       for (i = 0; i < (1 << order); i++) {
-               page_ext = lookup_page_ext(page + i);
-               if (unlikely(!page_ext))
-                       continue;
-               __clear_bit(PAGE_EXT_OWNER, &page_ext->flags);
-       }
-}
-
 static inline bool check_recursive_alloc(unsigned long *entries,
                                         unsigned int nr_entries,
                                         unsigned long ip)
@@ -154,18 +139,44 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags)
        return handle;
 }
 
-static inline void __set_page_owner_handle(struct page_ext *page_ext,
-       depot_stack_handle_t handle, unsigned int order, gfp_t gfp_mask)
+void __reset_page_owner(struct page *page, unsigned int order)
 {
+       int i;
+       struct page_ext *page_ext;
+       depot_stack_handle_t handle = 0;
        struct page_owner *page_owner;
 
-       page_owner = get_page_owner(page_ext);
-       page_owner->handle = handle;
-       page_owner->order = order;
-       page_owner->gfp_mask = gfp_mask;
-       page_owner->last_migrate_reason = -1;
+       handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
+
+       page_ext = lookup_page_ext(page);
+       if (unlikely(!page_ext))
+               return;
+       for (i = 0; i < (1 << order); i++) {
+               __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+               page_owner = get_page_owner(page_ext);
+               page_owner->free_handle = handle;
+               page_ext = page_ext_next(page_ext);
+       }
+}
+
+static inline void __set_page_owner_handle(struct page *page,
+       struct page_ext *page_ext, depot_stack_handle_t handle,
+       unsigned int order, gfp_t gfp_mask)
+{
+       struct page_owner *page_owner;
+       int i;
 
-       __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+       for (i = 0; i < (1 << order); i++) {
+               page_owner = get_page_owner(page_ext);
+               page_owner->handle = handle;
+               page_owner->order = order;
+               page_owner->gfp_mask = gfp_mask;
+               page_owner->last_migrate_reason = -1;
+               __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+               __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+
+               page_ext = page_ext_next(page_ext);
+       }
 }
 
 noinline void __set_page_owner(struct page *page, unsigned int order,
@@ -178,7 +189,7 @@ noinline void __set_page_owner(struct page *page, unsigned int order,
                return;
 
        handle = save_stack(gfp_mask);
-       __set_page_owner_handle(page_ext, handle, order, gfp_mask);
+       __set_page_owner_handle(page, page_ext, handle, order, gfp_mask);
 }
 
 void __set_page_owner_migrate_reason(struct page *page, int reason)
@@ -202,10 +213,11 @@ void __split_page_owner(struct page *page, unsigned int order)
        if (unlikely(!page_ext))
                return;
 
-       page_owner = get_page_owner(page_ext);
-       page_owner->order = 0;
-       for (i = 1; i < (1 << order); i++)
-               __copy_page_owner(page, page + i);
+       for (i = 0; i < (1 << order); i++) {
+               page_owner = get_page_owner(page_ext);
+               page_owner->order = 0;
+               page_ext = page_ext_next(page_ext);
+       }
 }
 
 void __copy_page_owner(struct page *oldpage, struct page *newpage)
@@ -235,6 +247,7 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
         * the new page, which will be freed.
         */
        __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
+       __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
 }
 
 void pagetypeinfo_showmixedcount_print(struct seq_file *m,
@@ -258,7 +271,8 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
         * not matter as the mixed block count will still be correct
         */
        for (; pfn < end_pfn; ) {
-               if (!pfn_valid(pfn)) {
+               page = pfn_to_online_page(pfn);
+               if (!page) {
                        pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
                        continue;
                }
@@ -266,13 +280,13 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
                block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
                block_end_pfn = min(block_end_pfn, end_pfn);
 
-               page = pfn_to_page(pfn);
                pageblock_mt = get_pageblock_migratetype(page);
 
                for (; pfn < block_end_pfn; pfn++) {
                        if (!pfn_valid_within(pfn))
                                continue;
 
+                       /* The pageblock is online, no need to recheck. */
                        page = pfn_to_page(pfn);
 
                        if (page_zone(page) != zone)
@@ -294,7 +308,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
                        if (unlikely(!page_ext))
                                continue;
 
-                       if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
+                       if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
                                continue;
 
                        page_owner = get_page_owner(page_ext);
@@ -405,20 +419,34 @@ void __dump_page_owner(struct page *page)
        mt = gfpflags_to_migratetype(gfp_mask);
 
        if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
-               pr_alert("page_owner info is not active (free page?)\n");
+               pr_alert("page_owner info is not present (never set?)\n");
                return;
        }
 
+       if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
+               pr_alert("page_owner tracks the page as allocated\n");
+       else
+               pr_alert("page_owner tracks the page as freed\n");
+
+       pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
+                page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask);
+
        handle = READ_ONCE(page_owner->handle);
        if (!handle) {
-               pr_alert("page_owner info is not active (free page?)\n");
-               return;
+               pr_alert("page_owner allocation stack trace missing\n");
+       } else {
+               nr_entries = stack_depot_fetch(handle, &entries);
+               stack_trace_print(entries, nr_entries, 0);
        }
 
-       nr_entries = stack_depot_fetch(handle, &entries);
-       pr_alert("page allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
-                page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask);
-       stack_trace_print(entries, nr_entries, 0);
+       handle = READ_ONCE(page_owner->free_handle);
+       if (!handle) {
+               pr_alert("page_owner free stack trace missing\n");
+       } else {
+               nr_entries = stack_depot_fetch(handle, &entries);
+               pr_alert("page last free stack trace:\n");
+               stack_trace_print(entries, nr_entries, 0);
+       }
 
        if (page_owner->last_migrate_reason != -1)
                pr_alert("page has been migrated, last migrate reason: %s\n",
@@ -481,8 +509,22 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
                if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
                        continue;
 
+               /*
+                * Although we do have the info about past allocation of free
+                * pages, it's not relevant for current memory usage.
+                */
+               if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
+                       continue;
+
                page_owner = get_page_owner(page_ext);
 
+               /*
+                * Don't print "tail" pages of high-order allocations as that
+                * would inflate the stats.
+                */
+               if (!IS_ALIGNED(pfn, 1 << page_owner->order))
+                       continue;
+
                /*
                 * Access to page_ext->handle isn't synchronous so we should
                 * be careful to access it.
@@ -562,7 +604,8 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
                                continue;
 
                        /* Found early allocated page */
-                       __set_page_owner_handle(page_ext, early_handle, 0, 0);
+                       __set_page_owner_handle(page, page_ext, early_handle,
+                                               0, 0);
                        count++;
                }
                cond_resched();
index 21d4f97..34b9181 100644 (file)
@@ -101,7 +101,7 @@ static void unpoison_page(struct page *page)
        /*
         * Page poisoning when enabled poisons each and every page
         * that is freed to buddy. Thus no extra check is done to
-        * see if a page was posioned.
+        * see if a page was poisoned.
         */
        check_poison_mem(addr, PAGE_SIZE);
        kunmap_atomic(addr);
index 11df03e..eff4b45 100644 (file)
@@ -153,8 +153,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 
        if (unlikely(PageHuge(pvmw->page))) {
                /* when pud is not present, pte will be NULL */
-               pvmw->pte = huge_pte_offset(mm, pvmw->address,
-                                           PAGE_SIZE << compound_order(page));
+               pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
                if (!pvmw->pte)
                        return false;
 
diff --git a/mm/quicklist.c b/mm/quicklist.c
deleted file mode 100644 (file)
index 5e98ac7..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Quicklist support.
- *
- * Quicklists are light weight lists of pages that have a defined state
- * on alloc and free. Pages must be in the quicklist specific defined state
- * (zero by default) when the page is freed. It seems that the initial idea
- * for such lists first came from Dave Miller and then various other people
- * improved on it.
- *
- * Copyright (C) 2007 SGI,
- *     Christoph Lameter <cl@linux.com>
- *             Generalized, added support for multiple lists and
- *             constructors / destructors.
- */
-#include <linux/kernel.h>
-
-#include <linux/gfp.h>
-#include <linux/mm.h>
-#include <linux/mmzone.h>
-#include <linux/quicklist.h>
-
-DEFINE_PER_CPU(struct quicklist [CONFIG_NR_QUICK], quicklist);
-
-#define FRACTION_OF_NODE_MEM   16
-
-static unsigned long max_pages(unsigned long min_pages)
-{
-       unsigned long node_free_pages, max;
-       int node = numa_node_id();
-       struct zone *zones = NODE_DATA(node)->node_zones;
-       int num_cpus_on_node;
-
-       node_free_pages =
-#ifdef CONFIG_ZONE_DMA
-               zone_page_state(&zones[ZONE_DMA], NR_FREE_PAGES) +
-#endif
-#ifdef CONFIG_ZONE_DMA32
-               zone_page_state(&zones[ZONE_DMA32], NR_FREE_PAGES) +
-#endif
-               zone_page_state(&zones[ZONE_NORMAL], NR_FREE_PAGES);
-
-       max = node_free_pages / FRACTION_OF_NODE_MEM;
-
-       num_cpus_on_node = cpumask_weight(cpumask_of_node(node));
-       max /= num_cpus_on_node;
-
-       return max(max, min_pages);
-}
-
-static long min_pages_to_free(struct quicklist *q,
-       unsigned long min_pages, long max_free)
-{
-       long pages_to_free;
-
-       pages_to_free = q->nr_pages - max_pages(min_pages);
-
-       return min(pages_to_free, max_free);
-}
-
-/*
- * Trim down the number of pages in the quicklist
- */
-void quicklist_trim(int nr, void (*dtor)(void *),
-       unsigned long min_pages, unsigned long max_free)
-{
-       long pages_to_free;
-       struct quicklist *q;
-
-       q = &get_cpu_var(quicklist)[nr];
-       if (q->nr_pages > min_pages) {
-               pages_to_free = min_pages_to_free(q, min_pages, max_free);
-
-               while (pages_to_free > 0) {
-                       /*
-                        * We pass a gfp_t of 0 to quicklist_alloc here
-                        * because we will never call into the page allocator.
-                        */
-                       void *p = quicklist_alloc(nr, 0, NULL);
-
-                       if (dtor)
-                               dtor(p);
-                       free_page((unsigned long)p);
-                       pages_to_free--;
-               }
-       }
-       put_cpu_var(quicklist);
-}
-
-unsigned long quicklist_total_size(void)
-{
-       unsigned long count = 0;
-       int cpu;
-       struct quicklist *ql, *q;
-
-       for_each_online_cpu(cpu) {
-               ql = per_cpu(quicklist, cpu);
-               for (q = ql; q < ql + CONFIG_NR_QUICK; q++)
-                       count += q->nr_pages;
-       }
-       return count;
-}
-
index 003377e..0c7b2a9 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -61,6 +61,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/migrate.h>
 #include <linux/hugetlb.h>
+#include <linux/huge_mm.h>
 #include <linux/backing-dev.h>
 #include <linux/page_idle.h>
 #include <linux/memremap.h>
@@ -898,15 +899,13 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
         */
        mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
                                0, vma, vma->vm_mm, address,
-                               min(vma->vm_end, address +
-                                   (PAGE_SIZE << compound_order(page))));
+                               min(vma->vm_end, address + page_size(page)));
        mmu_notifier_invalidate_range_start(&range);
 
        while (page_vma_mapped_walk(&pvmw)) {
-               unsigned long cstart;
                int ret = 0;
 
-               cstart = address = pvmw.address;
+               address = pvmw.address;
                if (pvmw.pte) {
                        pte_t entry;
                        pte_t *pte = pvmw.pte;
@@ -933,7 +932,6 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
                        entry = pmd_wrprotect(entry);
                        entry = pmd_mkclean(entry);
                        set_pmd_at(vma->vm_mm, address, pmd, entry);
-                       cstart &= PMD_MASK;
                        ret = 1;
 #else
                        /* unexpected pmd-mapped page? */
@@ -1192,8 +1190,10 @@ void page_add_file_rmap(struct page *page, bool compound)
                }
                if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
                        goto out;
-               VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
-               __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
+               if (PageSwapBacked(page))
+                       __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
+               else
+                       __inc_node_page_state(page, NR_FILE_PMDMAPPED);
        } else {
                if (PageTransCompound(page) && page_mapping(page)) {
                        VM_WARN_ON_ONCE(!PageLocked(page));
@@ -1232,8 +1232,10 @@ static void page_remove_file_rmap(struct page *page, bool compound)
                }
                if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
                        goto out;
-               VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
-               __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
+               if (PageSwapBacked(page))
+                       __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
+               else
+                       __dec_node_page_state(page, NR_FILE_PMDMAPPED);
        } else {
                if (!atomic_add_negative(-1, &page->_mapcount))
                        goto out;
@@ -1374,8 +1376,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         */
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
                                address,
-                               min(vma->vm_end, address +
-                                   (PAGE_SIZE << compound_order(page))));
+                               min(vma->vm_end, address + page_size(page)));
        if (PageHuge(page)) {
                /*
                 * If sharing is possible, start and end will be adjusted
@@ -1524,8 +1525,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
                        pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
                        if (PageHuge(page)) {
-                               int nr = 1 << compound_order(page);
-                               hugetlb_count_sub(nr, mm);
+                               hugetlb_count_sub(compound_nr(page), mm);
                                set_huge_swap_pte_at(mm, address,
                                                     pvmw.pte, pteval,
                                                     vma_mmu_pagesize(vma));
index 0f7fd4a..220be9f 100644 (file)
@@ -609,7 +609,7 @@ static int shmem_add_to_page_cache(struct page *page,
 {
        XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
        unsigned long i = 0;
-       unsigned long nr = 1UL << compound_order(page);
+       unsigned long nr = compound_nr(page);
 
        VM_BUG_ON_PAGE(PageTail(page), page);
        VM_BUG_ON_PAGE(index != round_down(index, nr), page);
@@ -631,7 +631,7 @@ static int shmem_add_to_page_cache(struct page *page,
                if (xas_error(&xas))
                        goto unlock;
 next:
-               xas_store(&xas, page + i);
+               xas_store(&xas, page);
                if (++i < nr) {
                        xas_next(&xas);
                        goto next;
@@ -1481,7 +1481,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
 
        shmem_pseudo_vma_init(&pvma, info, hindex);
        page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-                       HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
+                       HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
        shmem_pseudo_vma_destroy(&pvma);
        if (page)
                prep_transhuge_page(page);
@@ -1734,7 +1734,7 @@ unlock:
  * vm. If we swap it in we mark it dirty since we also free the swap
  * entry since a page cannot live in both the swap and page cache.
  *
- * fault_mm and fault_type are only supplied by shmem_fault:
+ * vmf and fault_type are only supplied by shmem_fault:
  * otherwise they are NULL.
  */
 static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
@@ -1884,7 +1884,7 @@ alloc_nohuge:
        lru_cache_add_anon(page);
 
        spin_lock_irq(&info->lock);
-       info->alloced += 1 << compound_order(page);
+       info->alloced += compound_nr(page);
        inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
        shmem_recalc_inode(inode);
        spin_unlock_irq(&info->lock);
@@ -1925,7 +1925,7 @@ clear:
                struct page *head = compound_head(page);
                int i;
 
-               for (i = 0; i < (1 << compound_order(head)); i++) {
+               for (i = 0; i < compound_nr(head); i++) {
                        clear_highpage(head + i);
                        flush_dcache_page(head + i);
                }
@@ -1952,7 +1952,7 @@ clear:
         * Error recovery.
         */
 unacct:
-       shmem_inode_unacct_blocks(inode, 1 << compound_order(page));
+       shmem_inode_unacct_blocks(inode, compound_nr(page));
 
        if (PageTransHuge(page)) {
                unlock_page(page);
@@ -3482,6 +3482,12 @@ static int shmem_parse_options(struct fs_context *fc, void *data)
 {
        char *options = data;
 
+       if (options) {
+               int err = security_sb_eat_lsm_opts(options, &fc->security);
+               if (err)
+                       return err;
+       }
+
        while (options != NULL) {
                char *this_char = options;
                for (;;) {
index 3ce1248..b3fe97f 100644 (file)
@@ -33,7 +33,7 @@ __meminit void page_alloc_shuffle(enum mm_shuffle_ctl ctl)
 }
 
 static bool shuffle_param;
-extern int shuffle_show(char *buffer, const struct kernel_param *kp)
+static int shuffle_show(char *buffer, const struct kernel_param *kp)
 {
        return sprintf(buffer, "%c\n", test_bit(SHUFFLE_ENABLE, &shuffle_state)
                        ? 'Y' : 'N');
index 9df3705..66e5d80 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4206,9 +4206,12 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
 
 /**
  * __ksize -- Uninstrumented ksize.
+ * @objp: pointer to the object
  *
  * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same
  * safety checks as ksize() with KASAN instrumentation enabled.
+ *
+ * Return: size of the actual memory used by @objp in bytes
  */
 size_t __ksize(const void *objp)
 {
index 9057b80..b2b0169 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -30,6 +30,69 @@ struct kmem_cache {
        struct list_head list;  /* List of all slab caches on the system */
 };
 
+#else /* !CONFIG_SLOB */
+
+struct memcg_cache_array {
+       struct rcu_head rcu;
+       struct kmem_cache *entries[0];
+};
+
+/*
+ * This is the main placeholder for memcg-related information in kmem caches.
+ * Both the root cache and the child caches will have it. For the root cache,
+ * this will hold a dynamically allocated array large enough to hold
+ * information about the currently limited memcgs in the system. To allow the
+ * array to be accessed without taking any locks, on relocation we free the old
+ * version only after a grace period.
+ *
+ * Root and child caches hold different metadata.
+ *
+ * @root_cache:        Common to root and child caches.  NULL for root, pointer to
+ *             the root cache for children.
+ *
+ * The following fields are specific to root caches.
+ *
+ * @memcg_caches: kmemcg ID indexed table of child caches.  This table is
+ *             used to index child cachces during allocation and cleared
+ *             early during shutdown.
+ *
+ * @root_caches_node: List node for slab_root_caches list.
+ *
+ * @children:  List of all child caches.  While the child caches are also
+ *             reachable through @memcg_caches, a child cache remains on
+ *             this list until it is actually destroyed.
+ *
+ * The following fields are specific to child caches.
+ *
+ * @memcg:     Pointer to the memcg this cache belongs to.
+ *
+ * @children_node: List node for @root_cache->children list.
+ *
+ * @kmem_caches_node: List node for @memcg->kmem_caches list.
+ */
+struct memcg_cache_params {
+       struct kmem_cache *root_cache;
+       union {
+               struct {
+                       struct memcg_cache_array __rcu *memcg_caches;
+                       struct list_head __root_caches_node;
+                       struct list_head children;
+                       bool dying;
+               };
+               struct {
+                       struct mem_cgroup *memcg;
+                       struct list_head children_node;
+                       struct list_head kmem_caches_node;
+                       struct percpu_ref refcnt;
+
+                       void (*work_fn)(struct kmem_cache *);
+                       union {
+                               struct rcu_head rcu_head;
+                               struct work_struct work;
+                       };
+               };
+       };
+};
 #endif /* CONFIG_SLOB */
 
 #ifdef CONFIG_SLAB
@@ -174,6 +237,7 @@ int __kmem_cache_shrink(struct kmem_cache *);
 void __kmemcg_cache_deactivate(struct kmem_cache *s);
 void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s);
 void slab_kmem_cache_release(struct kmem_cache *);
+void kmem_cache_shrink_all(struct kmem_cache *s);
 
 struct seq_file;
 struct file;
@@ -259,8 +323,8 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
  * Expects a pointer to a slab page. Please note, that PageSlab() check
  * isn't sufficient, as it returns true also for tail compound slab pages,
  * which do not have slab_cache pointer set.
- * So this function assumes that the page can pass PageHead() and PageSlab()
- * checks.
+ * So this function assumes that the page can pass PageSlab() && !PageTail()
+ * check.
  *
  * The kmem_cache can be reparented asynchronously. The caller must ensure
  * the memcg lifetime, e.g. by taking rcu_read_lock() or cgroup_mutex.
index 807490f..f9fb27b 100644 (file)
@@ -178,10 +178,13 @@ static int init_memcg_params(struct kmem_cache *s,
 
 static void destroy_memcg_params(struct kmem_cache *s)
 {
-       if (is_root_cache(s))
+       if (is_root_cache(s)) {
                kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
-       else
+       } else {
+               mem_cgroup_put(s->memcg_params.memcg);
+               WRITE_ONCE(s->memcg_params.memcg, NULL);
                percpu_ref_exit(&s->memcg_params.refcnt);
+       }
 }
 
 static void free_memcg_params(struct rcu_head *rcu)
@@ -253,8 +256,6 @@ static void memcg_unlink_cache(struct kmem_cache *s)
        } else {
                list_del(&s->memcg_params.children_node);
                list_del(&s->memcg_params.kmem_caches_node);
-               mem_cgroup_put(s->memcg_params.memcg);
-               WRITE_ONCE(s->memcg_params.memcg, NULL);
        }
 }
 #else
@@ -981,6 +982,43 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
 }
 EXPORT_SYMBOL(kmem_cache_shrink);
 
+/**
+ * kmem_cache_shrink_all - shrink a cache and all memcg caches for root cache
+ * @s: The cache pointer
+ */
+void kmem_cache_shrink_all(struct kmem_cache *s)
+{
+       struct kmem_cache *c;
+
+       if (!IS_ENABLED(CONFIG_MEMCG_KMEM) || !is_root_cache(s)) {
+               kmem_cache_shrink(s);
+               return;
+       }
+
+       get_online_cpus();
+       get_online_mems();
+       kasan_cache_shrink(s);
+       __kmem_cache_shrink(s);
+
+       /*
+        * We have to take the slab_mutex to protect from the memcg list
+        * modification.
+        */
+       mutex_lock(&slab_mutex);
+       for_each_memcg_cache(c, s) {
+               /*
+                * Don't need to shrink deactivated memcg caches.
+                */
+               if (s->flags & SLAB_DEACTIVATED)
+                       continue;
+               kasan_cache_shrink(c);
+               __kmem_cache_shrink(c);
+       }
+       mutex_unlock(&slab_mutex);
+       put_online_mems();
+       put_online_cpus();
+}
+
 bool slab_is_available(void)
 {
        return slab_state >= UP;
@@ -993,10 +1031,19 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name,
                unsigned int useroffset, unsigned int usersize)
 {
        int err;
+       unsigned int align = ARCH_KMALLOC_MINALIGN;
 
        s->name = name;
        s->size = s->object_size = size;
-       s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
+
+       /*
+        * For power of two sizes, guarantee natural alignment for kmalloc
+        * caches, regardless of SL*B debugging options.
+        */
+       if (is_power_of_2(size))
+               align = max(align, size);
+       s->align = calculate_alignment(flags, align, size);
+
        s->useroffset = useroffset;
        s->usersize = usersize;
 
@@ -1250,12 +1297,16 @@ void __init create_kmalloc_caches(slab_flags_t flags)
  */
 void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
 {
-       void *ret;
+       void *ret = NULL;
        struct page *page;
 
        flags |= __GFP_COMP;
        page = alloc_pages(flags, order);
-       ret = page ? page_address(page) : NULL;
+       if (likely(page)) {
+               ret = page_address(page);
+               mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
+                                   1 << order);
+       }
        ret = kasan_kmalloc_large(ret, size, flags);
        /* As ret might get tagged, call kmemleak hook after KASAN. */
        kmemleak_alloc(ret, size, 1, flags);
index 7f421d0..fa53e9f 100644 (file)
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -190,7 +190,7 @@ static int slob_last(slob_t *s)
 
 static void *slob_new_pages(gfp_t gfp, int order, int node)
 {
-       void *page;
+       struct page *page;
 
 #ifdef CONFIG_NUMA
        if (node != NUMA_NO_NODE)
@@ -202,14 +202,21 @@ static void *slob_new_pages(gfp_t gfp, int order, int node)
        if (!page)
                return NULL;
 
+       mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
+                           1 << order);
        return page_address(page);
 }
 
 static void slob_free_pages(void *b, int order)
 {
+       struct page *sp = virt_to_page(b);
+
        if (current->reclaim_state)
                current->reclaim_state->reclaimed_slab += 1 << order;
-       free_pages((unsigned long)b, order);
+
+       mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE,
+                           -(1 << order));
+       __free_pages(sp, order);
 }
 
 /*
@@ -217,6 +224,7 @@ static void slob_free_pages(void *b, int order)
  * @sp: Page to look in.
  * @size: Size of the allocation.
  * @align: Allocation alignment.
+ * @align_offset: Offset in the allocated block that will be aligned.
  * @page_removed_from_list: Return parameter.
  *
  * Tries to find a chunk of memory at least @size bytes big within @page.
@@ -227,7 +235,7 @@ static void slob_free_pages(void *b, int order)
  *         true (set to false otherwise).
  */
 static void *slob_page_alloc(struct page *sp, size_t size, int align,
-                            bool *page_removed_from_list)
+                             int align_offset, bool *page_removed_from_list)
 {
        slob_t *prev, *cur, *aligned = NULL;
        int delta = 0, units = SLOB_UNITS(size);
@@ -236,8 +244,17 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align,
        for (prev = NULL, cur = sp->freelist; ; prev = cur, cur = slob_next(cur)) {
                slobidx_t avail = slob_units(cur);
 
+               /*
+                * 'aligned' will hold the address of the slob block so that the
+                * address 'aligned'+'align_offset' is aligned according to the
+                * 'align' parameter. This is for kmalloc() which prepends the
+                * allocated block with its size, so that the block itself is
+                * aligned when needed.
+                */
                if (align) {
-                       aligned = (slob_t *)ALIGN((unsigned long)cur, align);
+                       aligned = (slob_t *)
+                               (ALIGN((unsigned long)cur + align_offset, align)
+                                - align_offset);
                        delta = aligned - cur;
                }
                if (avail >= units + delta) { /* room enough? */
@@ -281,7 +298,8 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align,
 /*
  * slob_alloc: entry point into the slob allocator.
  */
-static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
+static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
+                                                       int align_offset)
 {
        struct page *sp;
        struct list_head *slob_list;
@@ -312,7 +330,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
                if (sp->units < SLOB_UNITS(size))
                        continue;
 
-               b = slob_page_alloc(sp, size, align, &page_removed_from_list);
+               b = slob_page_alloc(sp, size, align, align_offset, &page_removed_from_list);
                if (!b)
                        continue;
 
@@ -349,7 +367,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
                INIT_LIST_HEAD(&sp->slab_list);
                set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
                set_slob_page_free(sp, slob_list);
-               b = slob_page_alloc(sp, size, align, &_unused);
+               b = slob_page_alloc(sp, size, align, align_offset, &_unused);
                BUG_ON(!b);
                spin_unlock_irqrestore(&slob_lock, flags);
        }
@@ -451,7 +469,7 @@ static __always_inline void *
 __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 {
        unsigned int *m;
-       int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+       int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
        void *ret;
 
        gfp &= gfp_allowed_mask;
@@ -459,19 +477,28 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
        fs_reclaim_acquire(gfp);
        fs_reclaim_release(gfp);
 
-       if (size < PAGE_SIZE - align) {
+       if (size < PAGE_SIZE - minalign) {
+               int align = minalign;
+
+               /*
+                * For power of two sizes, guarantee natural alignment for
+                * kmalloc()'d objects.
+                */
+               if (is_power_of_2(size))
+                       align = max(minalign, (int) size);
+
                if (!size)
                        return ZERO_SIZE_PTR;
 
-               m = slob_alloc(size + align, gfp, align, node);
+               m = slob_alloc(size + minalign, gfp, align, node, minalign);
 
                if (!m)
                        return NULL;
                *m = size;
-               ret = (void *)m + align;
+               ret = (void *)m + minalign;
 
                trace_kmalloc_node(caller, ret,
-                                  size, size + align, gfp, node);
+                                  size, size + minalign, gfp, node);
        } else {
                unsigned int order = get_order(size);
 
@@ -521,8 +548,13 @@ void kfree(const void *block)
                int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
                unsigned int *m = (unsigned int *)(block - align);
                slob_free(m, *m + align);
-       } else
-               __free_pages(sp, compound_order(sp));
+       } else {
+               unsigned int order = compound_order(sp);
+               mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE,
+                                   -(1 << order));
+               __free_pages(sp, order);
+
+       }
 }
 EXPORT_SYMBOL(kfree);
 
@@ -539,7 +571,7 @@ size_t __ksize(const void *block)
 
        sp = virt_to_page(block);
        if (unlikely(!PageSlab(sp)))
-               return PAGE_SIZE << compound_order(sp);
+               return page_size(sp);
 
        align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
        m = (unsigned int *)(block - align);
@@ -567,7 +599,7 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
        fs_reclaim_release(flags);
 
        if (c->size < PAGE_SIZE) {
-               b = slob_alloc(c->size, flags, c->align, node);
+               b = slob_alloc(c->size, flags, c->align, node, 0);
                trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
                                            SLOB_UNITS(c->size) * SLOB_UNIT,
                                            flags, node);
index 8834563..b25c807 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -829,7 +829,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
                return 1;
 
        start = page_address(page);
-       length = PAGE_SIZE << compound_order(page);
+       length = page_size(page);
        end = start + length;
        remainder = length % s->size;
        if (!remainder)
@@ -1074,13 +1074,14 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
        init_tracking(s, object);
 }
 
-static void setup_page_debug(struct kmem_cache *s, void *addr, int order)
+static
+void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
 {
        if (!(s->flags & SLAB_POISON))
                return;
 
        metadata_access_enable();
-       memset(addr, POISON_INUSE, PAGE_SIZE << order);
+       memset(addr, POISON_INUSE, page_size(page));
        metadata_access_disable();
 }
 
@@ -1340,8 +1341,8 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
 #else /* !CONFIG_SLUB_DEBUG */
 static inline void setup_object_debug(struct kmem_cache *s,
                        struct page *page, void *object) {}
-static inline void setup_page_debug(struct kmem_cache *s,
-                       void *addr, int order) {}
+static inline
+void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
 
 static inline int alloc_debug_processing(struct kmem_cache *s,
        struct page *page, void *object, unsigned long addr) { return 0; }
@@ -1639,7 +1640,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
        struct kmem_cache_order_objects oo = s->oo;
        gfp_t alloc_gfp;
        void *start, *p, *next;
-       int idx, order;
+       int idx;
        bool shuffle;
 
        flags &= gfp_allowed_mask;
@@ -1673,7 +1674,6 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 
        page->objects = oo_objects(oo);
 
-       order = compound_order(page);
        page->slab_cache = s;
        __SetPageSlab(page);
        if (page_is_pfmemalloc(page))
@@ -1683,7 +1683,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 
        start = page_address(page);
 
-       setup_page_debug(s, start, order);
+       setup_page_debug(s, page, start);
 
        shuffle = shuffle_freelist(s, page);
 
@@ -2004,6 +2004,7 @@ static inline unsigned long next_tid(unsigned long tid)
        return tid + TID_STEP;
 }
 
+#ifdef SLUB_DEBUG_CMPXCHG
 static inline unsigned int tid_to_cpu(unsigned long tid)
 {
        return tid % TID_STEP;
@@ -2013,6 +2014,7 @@ static inline unsigned long tid_to_event(unsigned long tid)
 {
        return tid / TID_STEP;
 }
+#endif
 
 static inline unsigned int init_tid(int cpu)
 {
@@ -2669,6 +2671,17 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
        return p;
 }
 
+/*
+ * If the object has been wiped upon free, make sure it's fully initialized by
+ * zeroing out freelist pointer.
+ */
+static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
+                                                  void *obj)
+{
+       if (unlikely(slab_want_init_on_free(s)) && obj)
+               memset((void *)((char *)obj + s->offset), 0, sizeof(void *));
+}
+
 /*
  * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
  * have the fastpath folded into their functions. So no function call
@@ -2757,12 +2770,8 @@ redo:
                prefetch_freepointer(s, next_object);
                stat(s, ALLOC_FASTPATH);
        }
-       /*
-        * If the object has been wiped upon free, make sure it's fully
-        * initialized by zeroing out freelist pointer.
-        */
-       if (unlikely(slab_want_init_on_free(s)) && object)
-               memset(object + s->offset, 0, sizeof(void *));
+
+       maybe_wipe_obj_freeptr(s, object);
 
        if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
                memset(object, 0, s->object_size);
@@ -3176,10 +3185,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
                                goto error;
 
                        c = this_cpu_ptr(s->cpu_slab);
+                       maybe_wipe_obj_freeptr(s, p[i]);
+
                        continue; /* goto for-loop */
                }
                c->freelist = get_freepointer(s, object);
                p[i] = object;
+               maybe_wipe_obj_freeptr(s, p[i]);
        }
        c->tid = next_tid(c->tid);
        local_irq_enable();
@@ -3819,11 +3831,15 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
 {
        struct page *page;
        void *ptr = NULL;
+       unsigned int order = get_order(size);
 
        flags |= __GFP_COMP;
-       page = alloc_pages_node(node, flags, get_order(size));
-       if (page)
+       page = alloc_pages_node(node, flags, order);
+       if (page) {
                ptr = page_address(page);
+               mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
+                                   1 << order);
+       }
 
        return kmalloc_large_node_hook(ptr, size, flags);
 }
@@ -3930,7 +3946,7 @@ size_t __ksize(const void *object)
 
        if (unlikely(!PageSlab(page))) {
                WARN_ON(!PageCompound(page));
-               return PAGE_SIZE << compound_order(page);
+               return page_size(page);
        }
 
        return slab_ksize(page->slab_cache);
@@ -3949,9 +3965,13 @@ void kfree(const void *x)
 
        page = virt_to_head_page(x);
        if (unlikely(!PageSlab(page))) {
+               unsigned int order = compound_order(page);
+
                BUG_ON(!PageCompound(page));
                kfree_hook(object);
-               __free_pages(page, compound_order(page));
+               mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
+                                   -(1 << order));
+               __free_pages(page, order);
                return;
        }
        slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
@@ -4836,7 +4856,17 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
                }
        }
 
-       get_online_mems();
+       /*
+        * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
+        * already held which will conflict with an existing lock order:
+        *
+        * mem_hotplug_lock->slab_mutex->kernfs_mutex
+        *
+        * We don't really need mem_hotplug_lock (to hold off
+        * slab_mem_going_offline_callback) here because slab's memory hot
+        * unplug code doesn't destroy the kmem_cache->node[] data.
+        */
+
 #ifdef CONFIG_SLUB_DEBUG
        if (flags & SO_ALL) {
                struct kmem_cache_node *n;
@@ -4877,7 +4907,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
                        x += sprintf(buf + x, " N%d=%lu",
                                        node, nodes[node]);
 #endif
-       put_online_mems();
        kfree(nodes);
        return x + sprintf(buf + x, "\n");
 }
@@ -5298,7 +5327,7 @@ static ssize_t shrink_store(struct kmem_cache *s,
                        const char *buf, size_t length)
 {
        if (buf[0] == '1')
-               kmem_cache_shrink(s);
+               kmem_cache_shrink_all(s);
        else
                return -EINVAL;
        return length;
index 72f010d..f6891c1 100644 (file)
@@ -11,6 +11,8 @@
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
 
 #include "internal.h"
 #include <asm/dma.h>
@@ -217,7 +219,7 @@ static inline unsigned long first_present_section_nr(void)
        return next_present_section_nr(-1);
 }
 
-void subsection_mask_set(unsigned long *map, unsigned long pfn,
+static void subsection_mask_set(unsigned long *map, unsigned long pfn,
                unsigned long nr_pages)
 {
        int idx = subsection_map_index(pfn);
@@ -470,6 +472,12 @@ struct page __init *__populate_section_memmap(unsigned long pfn,
 static void *sparsemap_buf __meminitdata;
 static void *sparsemap_buf_end __meminitdata;
 
+static inline void __meminit sparse_buffer_free(unsigned long size)
+{
+       WARN_ON(!sparsemap_buf || size == 0);
+       memblock_free_early(__pa(sparsemap_buf), size);
+}
+
 static void __init sparse_buffer_init(unsigned long size, int nid)
 {
        phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
@@ -486,7 +494,7 @@ static void __init sparse_buffer_fini(void)
        unsigned long size = sparsemap_buf_end - sparsemap_buf;
 
        if (sparsemap_buf && size > 0)
-               memblock_free_early(__pa(sparsemap_buf), size);
+               sparse_buffer_free(size);
        sparsemap_buf = NULL;
 }
 
@@ -495,11 +503,15 @@ void * __meminit sparse_buffer_alloc(unsigned long size)
        void *ptr = NULL;
 
        if (sparsemap_buf) {
-               ptr = PTR_ALIGN(sparsemap_buf, size);
+               ptr = (void *) roundup((unsigned long)sparsemap_buf, size);
                if (ptr + size > sparsemap_buf_end)
                        ptr = NULL;
-               else
+               else {
+                       /* Free redundant aligned space */
+                       if ((unsigned long)(ptr - sparsemap_buf) > 0)
+                               sparse_buffer_free((unsigned long)(ptr - sparsemap_buf));
                        sparsemap_buf = ptr + size;
+               }
        }
        return ptr;
 }
@@ -867,7 +879,7 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
         */
        page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
 
-       ms = __pfn_to_section(start_pfn);
+       ms = __nr_to_section(section_nr);
        set_section_nid(section_nr, nid);
        section_mark_present(ms);
 
@@ -884,9 +896,6 @@ static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
 {
        int i;
 
-       if (!memmap)
-               return;
-
        /*
         * A further optimization is to have per section refcounted
         * num_poisoned_pages.  But that would need more space per memmap, so
@@ -898,7 +907,7 @@ static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
 
        for (i = 0; i < nr_pages; i++) {
                if (PageHWPoison(&memmap[i])) {
-                       atomic_long_sub(1, &num_poisoned_pages);
+                       num_poisoned_pages_dec();
                        ClearPageHWPoison(&memmap[i]);
                }
        }
index ae30039..38c3fa4 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -47,6 +47,7 @@ int page_cluster;
 static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
 static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
 static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
@@ -71,12 +72,12 @@ static void __page_cache_release(struct page *page)
                spin_unlock_irqrestore(&pgdat->lru_lock, flags);
        }
        __ClearPageWaiters(page);
-       mem_cgroup_uncharge(page);
 }
 
 static void __put_single_page(struct page *page)
 {
        __page_cache_release(page);
+       mem_cgroup_uncharge(page);
        free_unref_page(page);
 }
 
@@ -515,7 +516,6 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
        del_page_from_lru_list(page, lruvec, lru + active);
        ClearPageActive(page);
        ClearPageReferenced(page);
-       add_page_to_lru_list(page, lruvec, lru);
 
        if (PageWriteback(page) || PageDirty(page)) {
                /*
@@ -523,13 +523,14 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
                 * It can make readahead confusing.  But race window
                 * is _really_ small and  it's non-critical problem.
                 */
+               add_page_to_lru_list(page, lruvec, lru);
                SetPageReclaim(page);
        } else {
                /*
                 * The page's writeback ends up during pagevec
                 * We moves tha page into tail of inactive.
                 */
-               list_move_tail(&page->lru, &lruvec->lists[lru]);
+               add_page_to_lru_list_tail(page, lruvec, lru);
                __count_vm_event(PGROTATED);
        }
 
@@ -538,6 +539,22 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
        update_page_reclaim_stat(lruvec, file, 0);
 }
 
+static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
+                           void *arg)
+{
+       if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
+               int file = page_is_file_cache(page);
+               int lru = page_lru_base_type(page);
+
+               del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);
+               ClearPageActive(page);
+               ClearPageReferenced(page);
+               add_page_to_lru_list(page, lruvec, lru);
+
+               __count_vm_events(PGDEACTIVATE, hpage_nr_pages(page));
+               update_page_reclaim_stat(lruvec, file, 0);
+       }
+}
 
 static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
                            void *arg)
@@ -590,6 +607,10 @@ void lru_add_drain_cpu(int cpu)
        if (pagevec_count(pvec))
                pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
 
+       pvec = &per_cpu(lru_deactivate_pvecs, cpu);
+       if (pagevec_count(pvec))
+               pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+
        pvec = &per_cpu(lru_lazyfree_pvecs, cpu);
        if (pagevec_count(pvec))
                pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
@@ -623,6 +644,26 @@ void deactivate_file_page(struct page *page)
        }
 }
 
+/*
+ * deactivate_page - deactivate a page
+ * @page: page to deactivate
+ *
+ * deactivate_page() moves @page to the inactive list if @page was on the active
+ * list and was not an unevictable page.  This is done to accelerate the reclaim
+ * of @page.
+ */
+void deactivate_page(struct page *page)
+{
+       if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
+               struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+
+               get_page(page);
+               if (!pagevec_add(pvec, page) || PageCompound(page))
+                       pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+               put_cpu_var(lru_deactivate_pvecs);
+       }
+}
+
 /**
  * mark_page_lazyfree - make an anon page lazyfree
  * @page: page to deactivate
@@ -687,6 +728,7 @@ void lru_add_drain_all(void)
                if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
                    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
                    pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
+                   pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
                    pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
                    need_activate_page_drain(cpu)) {
                        INIT_WORK(work, lru_add_drain_per_cpu);
@@ -844,17 +886,15 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
                get_page(page_tail);
                list_add_tail(&page_tail->lru, list);
        } else {
-               struct list_head *list_head;
                /*
                 * Head page has not yet been counted, as an hpage,
                 * so we must account for each subpage individually.
                 *
-                * Use the standard add function to put page_tail on the list,
-                * but then correct its position so they all end up in order.
+                * Put page_tail on the list at the correct position
+                * so they all end up in order.
                 */
-               add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail));
-               list_head = page_tail->lru.prev;
-               list_move_tail(&page_tail->lru, list_head);
+               add_page_to_lru_list_tail(page_tail, lruvec,
+                                         page_lru(page_tail));
        }
 
        if (!PageUnevictable(page))
index 8368621..8e7ce9a 100644 (file)
@@ -116,7 +116,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
        struct address_space *address_space = swap_address_space(entry);
        pgoff_t idx = swp_offset(entry);
        XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page));
-       unsigned long i, nr = 1UL << compound_order(page);
+       unsigned long i, nr = compound_nr(page);
 
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON_PAGE(PageSwapCache(page), page);
@@ -133,7 +133,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
                for (i = 0; i < nr; i++) {
                        VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
                        set_page_private(page + i, entry.val + i);
-                       xas_store(&xas, page + i);
+                       xas_store(&xas, page);
                        xas_next(&xas);
                }
                address_space->nrpages += nr;
@@ -168,7 +168,7 @@ void __delete_from_swap_cache(struct page *page, swp_entry_t entry)
 
        for (i = 0; i < nr; i++) {
                void *entry = xas_store(&xas, NULL);
-               VM_BUG_ON_PAGE(entry != page + i, entry);
+               VM_BUG_ON_PAGE(entry != page, entry);
                set_page_private(page + i, 0);
                xas_next(&xas);
        }
index 8563339..dd9ebc1 100644 (file)
@@ -592,6 +592,16 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
                                        unlock_page(page);
                                        continue;
                                }
+
+                               /* Take a pin outside pagevec */
+                               get_page(page);
+
+                               /*
+                                * Drop extra pins before trying to invalidate
+                                * the huge page.
+                                */
+                               pagevec_remove_exceptionals(&pvec);
+                               pagevec_release(&pvec);
                        }
 
                        ret = invalidate_inode_page(page);
@@ -602,6 +612,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
                         */
                        if (!ret)
                                deactivate_file_page(page);
+                       if (PageTransHuge(page))
+                               put_page(page);
                        count += ret;
                }
                pagevec_remove_exceptionals(&pvec);
index 98e9248..660717a 100644 (file)
@@ -11,6 +11,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/mm.h>
+#include <linux/highmem.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
@@ -227,7 +228,12 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
        if (!virt_addr_valid(ptr))
                return;
 
-       page = virt_to_head_page(ptr);
+       /*
+        * When CONFIG_HIGHMEM=y, kmap_to_page() will give either the
+        * highmem page or fallback to virt_to_page(). The following
+        * is effectively a highmem-aware virt_to_head_page().
+        */
+       page = compound_head(kmap_to_page((void *)ptr));
 
        if (PageSlab(page)) {
                /* Check slab allocator for flags and size. */
index e6351a8..3ad6db9 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
 #include <linux/hugetlb.h>
 #include <linux/vmalloc.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/elf.h>
+#include <linux/elf-randomize.h>
+#include <linux/personality.h>
+#include <linux/random.h>
+#include <linux/processor.h>
+#include <linux/sizes.h>
+#include <linux/compat.h>
 
 #include <linux/uaccess.h>
 
@@ -293,7 +300,105 @@ int vma_is_stack_for_current(struct vm_area_struct *vma)
        return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
 }
 
-#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
+#ifndef STACK_RND_MASK
+#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
+#endif
+
+unsigned long randomize_stack_top(unsigned long stack_top)
+{
+       unsigned long random_variable = 0;
+
+       if (current->flags & PF_RANDOMIZE) {
+               random_variable = get_random_long();
+               random_variable &= STACK_RND_MASK;
+               random_variable <<= PAGE_SHIFT;
+       }
+#ifdef CONFIG_STACK_GROWSUP
+       return PAGE_ALIGN(stack_top) + random_variable;
+#else
+       return PAGE_ALIGN(stack_top) - random_variable;
+#endif
+}
+
+#ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+       /* Is the current task 32bit ? */
+       if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task())
+               return randomize_page(mm->brk, SZ_32M);
+
+       return randomize_page(mm->brk, SZ_1G);
+}
+
+unsigned long arch_mmap_rnd(void)
+{
+       unsigned long rnd;
+
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+       if (is_compat_task())
+               rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
+       else
+#endif /* CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS */
+               rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
+
+       return rnd << PAGE_SHIFT;
+}
+
+static int mmap_is_legacy(struct rlimit *rlim_stack)
+{
+       if (current->personality & ADDR_COMPAT_LAYOUT)
+               return 1;
+
+       if (rlim_stack->rlim_cur == RLIM_INFINITY)
+               return 1;
+
+       return sysctl_legacy_va_layout;
+}
+
+/*
+ * Leave enough space between the mmap area and the stack to honour ulimit in
+ * the face of randomisation.
+ */
+#define MIN_GAP                (SZ_128M)
+#define MAX_GAP                (STACK_TOP / 6 * 5)
+
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
+{
+       unsigned long gap = rlim_stack->rlim_cur;
+       unsigned long pad = stack_guard_gap;
+
+       /* Account for stack randomization if necessary */
+       if (current->flags & PF_RANDOMIZE)
+               pad += (STACK_RND_MASK << PAGE_SHIFT);
+
+       /* Values close to RLIM_INFINITY can overflow. */
+       if (gap + pad > gap)
+               gap += pad;
+
+       if (gap < MIN_GAP)
+               gap = MIN_GAP;
+       else if (gap > MAX_GAP)
+               gap = MAX_GAP;
+
+       return PAGE_ALIGN(STACK_TOP - gap - rnd);
+}
+
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
+{
+       unsigned long random_factor = 0UL;
+
+       if (current->flags & PF_RANDOMIZE)
+               random_factor = arch_mmap_rnd();
+
+       if (mmap_is_legacy(rlim_stack)) {
+               mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+               mm->get_unmapped_area = arch_get_unmapped_area;
+       } else {
+               mm->mmap_base = mmap_base(random_factor, rlim_stack);
+               mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+       }
+}
+#elif defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
        mm->mmap_base = TASK_UNMAPPED_BASE;
@@ -521,7 +626,7 @@ bool page_mapped(struct page *page)
                return true;
        if (PageHuge(page))
                return false;
-       for (i = 0; i < (1 << compound_order(page)); i++) {
+       for (i = 0; i < compound_nr(page); i++) {
                if (atomic_read(&page[i]._mapcount) >= 0)
                        return true;
        }
@@ -783,3 +888,16 @@ out_mm:
 out:
        return res;
 }
+
+int memcmp_pages(struct page *page1, struct page *page2)
+{
+       char *addr1, *addr2;
+       int ret;
+
+       addr1 = kmap_atomic(page1);
+       addr2 = kmap_atomic(page2);
+       ret = memcmp(addr1, addr2, PAGE_SIZE);
+       kunmap_atomic(addr2);
+       kunmap_atomic(addr1);
+       return ret;
+}
index c1246d7..a3c70e2 100644 (file)
@@ -329,8 +329,6 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
 #define DEBUG_AUGMENT_PROPAGATE_CHECK 0
 #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
 
-#define VM_LAZY_FREE   0x02
-#define VM_VM_AREA     0x04
 
 static DEFINE_SPINLOCK(vmap_area_lock);
 /* Export for kexec only */
@@ -398,9 +396,8 @@ compute_subtree_max_size(struct vmap_area *va)
                get_subtree_max_size(va->rb_node.rb_right));
 }
 
-RB_DECLARE_CALLBACKS(static, free_vmap_area_rb_augment_cb,
-       struct vmap_area, rb_node, unsigned long, subtree_max_size,
-       compute_subtree_max_size)
+RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
+       struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
 
 static void purge_vmap_area_lazy(void);
 static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
@@ -1116,7 +1113,7 @@ retry:
 
        va->va_start = addr;
        va->va_end = addr + size;
-       va->flags = 0;
+       va->vm = NULL;
        insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
 
        spin_unlock(&vmap_area_lock);
@@ -1282,7 +1279,14 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
        llist_for_each_entry_safe(va, n_va, valist, purge_list) {
                unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
 
-               __free_vmap_area(va);
+               /*
+                * Finally insert or merge lazily-freed area. It is
+                * detached and there is no need to "unlink" it from
+                * anything.
+                */
+               merge_or_add_vmap_area(va,
+                       &free_vmap_area_root, &free_vmap_area_list);
+
                atomic_long_sub(nr, &vmap_lazy_nr);
 
                if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
@@ -1324,6 +1328,10 @@ static void free_vmap_area_noflush(struct vmap_area *va)
 {
        unsigned long nr_lazy;
 
+       spin_lock(&vmap_area_lock);
+       unlink_va(va, &vmap_area_root);
+       spin_unlock(&vmap_area_lock);
+
        nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
                                PAGE_SHIFT, &vmap_lazy_nr);
 
@@ -1918,7 +1926,6 @@ void __init vmalloc_init(void)
                if (WARN_ON_ONCE(!va))
                        continue;
 
-               va->flags = VM_VM_AREA;
                va->va_start = (unsigned long)tmp->addr;
                va->va_end = va->va_start + tmp->size;
                va->vm = tmp;
@@ -2016,7 +2023,6 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
        vm->size = va->va_end - va->va_start;
        vm->caller = caller;
        va->vm = vm;
-       va->flags |= VM_VM_AREA;
        spin_unlock(&vmap_area_lock);
 }
 
@@ -2121,10 +2127,10 @@ struct vm_struct *find_vm_area(const void *addr)
        struct vmap_area *va;
 
        va = find_vmap_area((unsigned long)addr);
-       if (va && va->flags & VM_VM_AREA)
-               return va->vm;
+       if (!va)
+               return NULL;
 
-       return NULL;
+       return va->vm;
 }
 
 /**
@@ -2143,14 +2149,12 @@ struct vm_struct *remove_vm_area(const void *addr)
 
        might_sleep();
 
-       va = find_vmap_area((unsigned long)addr);
-       if (va && va->flags & VM_VM_AREA) {
+       spin_lock(&vmap_area_lock);
+       va = __find_vmap_area((unsigned long)addr);
+       if (va && va->vm) {
                struct vm_struct *vm = va->vm;
 
-               spin_lock(&vmap_area_lock);
                va->vm = NULL;
-               va->flags &= ~VM_VM_AREA;
-               va->flags |= VM_LAZY_FREE;
                spin_unlock(&vmap_area_lock);
 
                kasan_free_shadow(vm);
@@ -2158,6 +2162,8 @@ struct vm_struct *remove_vm_area(const void *addr)
 
                return vm;
        }
+
+       spin_unlock(&vmap_area_lock);
        return NULL;
 }
 
@@ -2402,7 +2408,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
        nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
        array_size = (nr_pages * sizeof(struct page *));
 
-       area->nr_pages = nr_pages;
        /* Please note that the recursion is strictly bounded. */
        if (array_size > PAGE_SIZE) {
                pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask,
@@ -2410,13 +2415,16 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
        } else {
                pages = kmalloc_node(array_size, nested_gfp, node);
        }
-       area->pages = pages;
-       if (!area->pages) {
+
+       if (!pages) {
                remove_vm_area(area->addr);
                kfree(area);
                return NULL;
        }
 
+       area->pages = pages;
+       area->nr_pages = nr_pages;
+
        for (i = 0; i < area->nr_pages; i++) {
                struct page *page;
 
@@ -2851,7 +2859,7 @@ long vread(char *buf, char *addr, unsigned long count)
                if (!count)
                        break;
 
-               if (!(va->flags & VM_VM_AREA))
+               if (!va->vm)
                        continue;
 
                vm = va->vm;
@@ -2931,7 +2939,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
                if (!count)
                        break;
 
-               if (!(va->flags & VM_VM_AREA))
+               if (!va->vm)
                        continue;
 
                vm = va->vm;
@@ -3450,6 +3458,22 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
        }
 }
 
+static void show_purge_info(struct seq_file *m)
+{
+       struct llist_node *head;
+       struct vmap_area *va;
+
+       head = READ_ONCE(vmap_purge_list.first);
+       if (head == NULL)
+               return;
+
+       llist_for_each_entry(va, head, purge_list) {
+               seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
+                       (void *)va->va_start, (void *)va->va_end,
+                       va->va_end - va->va_start);
+       }
+}
+
 static int s_show(struct seq_file *m, void *p)
 {
        struct vmap_area *va;
@@ -3458,14 +3482,13 @@ static int s_show(struct seq_file *m, void *p)
        va = list_entry(p, struct vmap_area, list);
 
        /*
-        * s_show can encounter race with remove_vm_area, !VM_VM_AREA on
-        * behalf of vmap area is being tear down or vm_map_ram allocation.
+        * s_show can encounter race with remove_vm_area, !vm on behalf
+        * of vmap area is being tear down or vm_map_ram allocation.
         */
-       if (!(va->flags & VM_VM_AREA)) {
-               seq_printf(m, "0x%pK-0x%pK %7ld %s\n",
+       if (!va->vm) {
+               seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
                        (void *)va->va_start, (void *)va->va_end,
-                       va->va_end - va->va_start,
-                       va->flags & VM_LAZY_FREE ? "unpurged vm_area" : "vm_map_ram");
+                       va->va_end - va->va_start);
 
                return 0;
        }
@@ -3504,6 +3527,16 @@ static int s_show(struct seq_file *m, void *p)
 
        show_numa_info(m, v);
        seq_putc(m, '\n');
+
+       /*
+        * As a final step, dump "unpurged" areas. Note,
+        * that entire "/proc/vmallocinfo" output will not
+        * be address sorted, because the purge list is not
+        * sorted.
+        */
+       if (list_is_last(&va->list, &vmap_area_list))
+               show_purge_info(m);
+
        return 0;
 }
 
index f3b5081..4bac22f 100644 (file)
@@ -355,6 +355,9 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
  * "hierarchy" or "local").
  *
  * To be used as memcg event method.
+ *
+ * Return: 0 on success, -ENOMEM on memory failure or -EINVAL if @args could
+ * not be parsed.
  */
 int vmpressure_register_event(struct mem_cgroup *memcg,
                              struct eventfd_ctx *eventfd, const char *args)
@@ -362,7 +365,7 @@ int vmpressure_register_event(struct mem_cgroup *memcg,
        struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
        struct vmpressure_event *ev;
        enum vmpressure_modes mode = VMPRESSURE_NO_PASSTHROUGH;
-       enum vmpressure_levels level = -1;
+       enum vmpressure_levels level;
        char *spec, *spec_orig;
        char *token;
        int ret = 0;
@@ -375,20 +378,18 @@ int vmpressure_register_event(struct mem_cgroup *memcg,
 
        /* Find required level */
        token = strsep(&spec, ",");
-       level = match_string(vmpressure_str_levels, VMPRESSURE_NUM_LEVELS, token);
-       if (level < 0) {
-               ret = level;
+       ret = match_string(vmpressure_str_levels, VMPRESSURE_NUM_LEVELS, token);
+       if (ret < 0)
                goto out;
-       }
+       level = ret;
 
        /* Find optional mode */
        token = strsep(&spec, ",");
        if (token) {
-               mode = match_string(vmpressure_str_modes, VMPRESSURE_NUM_MODES, token);
-               if (mode < 0) {
-                       ret = mode;
+               ret = match_string(vmpressure_str_modes, VMPRESSURE_NUM_MODES, token);
+               if (ret < 0)
                        goto out;
-               }
+               mode = ret;
        }
 
        ev = kzalloc(sizeof(*ev), GFP_KERNEL);
@@ -404,6 +405,7 @@ int vmpressure_register_event(struct mem_cgroup *memcg,
        mutex_lock(&vmpr->events_lock);
        list_add(&ev->node, &vmpr->events);
        mutex_unlock(&vmpr->events_lock);
+       ret = 0;
 out:
        kfree(spec_orig);
        return ret;
index a6c5d0b..ee4eecc 100644 (file)
@@ -171,11 +171,22 @@ int vm_swappiness = 60;
  */
 unsigned long vm_total_pages;
 
+static void set_task_reclaim_state(struct task_struct *task,
+                                  struct reclaim_state *rs)
+{
+       /* Check for an overwrite */
+       WARN_ON_ONCE(rs && task->reclaim_state);
+
+       /* Check for the nulling of an already-nulled member */
+       WARN_ON_ONCE(!rs && !task->reclaim_state);
+
+       task->reclaim_state = rs;
+}
+
 static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
 
-#ifdef CONFIG_MEMCG_KMEM
-
+#ifdef CONFIG_MEMCG
 /*
  * We allow subsystems to populate their shrinker-related
  * LRU lists before register_shrinker_prepared() is called
@@ -227,30 +238,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
        idr_remove(&shrinker_idr, id);
        up_write(&shrinker_rwsem);
 }
-#else /* CONFIG_MEMCG_KMEM */
-static int prealloc_memcg_shrinker(struct shrinker *shrinker)
-{
-       return 0;
-}
-
-static void unregister_memcg_shrinker(struct shrinker *shrinker)
-{
-}
-#endif /* CONFIG_MEMCG_KMEM */
-
-static void set_task_reclaim_state(struct task_struct *task,
-                                  struct reclaim_state *rs)
-{
-       /* Check for an overwrite */
-       WARN_ON_ONCE(rs && task->reclaim_state);
-
-       /* Check for the nulling of an already-nulled member */
-       WARN_ON_ONCE(!rs && !task->reclaim_state);
-
-       task->reclaim_state = rs;
-}
 
-#ifdef CONFIG_MEMCG
 static bool global_reclaim(struct scan_control *sc)
 {
        return !sc->target_mem_cgroup;
@@ -305,6 +293,15 @@ static bool memcg_congested(pg_data_t *pgdat,
 
 }
 #else
+static int prealloc_memcg_shrinker(struct shrinker *shrinker)
+{
+       return 0;
+}
+
+static void unregister_memcg_shrinker(struct shrinker *shrinker)
+{
+}
+
 static bool global_reclaim(struct scan_control *sc)
 {
        return true;
@@ -354,12 +351,13 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
  */
 unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx)
 {
-       unsigned long lru_size;
+       unsigned long lru_size = 0;
        int zid;
 
-       if (!mem_cgroup_disabled())
-               lru_size = lruvec_page_state_local(lruvec, NR_LRU_BASE + lru);
-       else
+       if (!mem_cgroup_disabled()) {
+               for (zid = 0; zid < MAX_NR_ZONES; zid++)
+                       lru_size += mem_cgroup_get_zone_lru_size(lruvec, lru, zid);
+       } else
                lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
 
        for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) {
@@ -591,7 +589,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
        return freed;
 }
 
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
 static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
                        struct mem_cgroup *memcg, int priority)
 {
@@ -599,7 +597,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
        unsigned long ret, freed = 0;
        int i;
 
-       if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg))
+       if (!mem_cgroup_online(memcg))
                return 0;
 
        if (!down_read_trylock(&shrinker_rwsem))
@@ -625,6 +623,11 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
                        continue;
                }
 
+               /* Call non-slab shrinkers even though kmem is disabled */
+               if (!memcg_kmem_enabled() &&
+                   !(shrinker->flags & SHRINKER_NONSLAB))
+                       continue;
+
                ret = do_shrink_slab(&sc, shrinker, priority);
                if (ret == SHRINK_EMPTY) {
                        clear_bit(i, map->map);
@@ -661,13 +664,13 @@ unlock:
        up_read(&shrinker_rwsem);
        return freed;
 }
-#else /* CONFIG_MEMCG_KMEM */
+#else /* CONFIG_MEMCG */
 static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
                        struct mem_cgroup *memcg, int priority)
 {
        return 0;
 }
-#endif /* CONFIG_MEMCG_KMEM */
+#endif /* CONFIG_MEMCG */
 
 /**
  * shrink_slab - shrink slab caches
@@ -930,10 +933,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
         * Note that if SetPageDirty is always performed via set_page_dirty,
         * and thus under the i_pages lock, then this ordering is not required.
         */
-       if (unlikely(PageTransHuge(page)) && PageSwapCache(page))
-               refcount = 1 + HPAGE_PMD_NR;
-       else
-               refcount = 2;
+       refcount = 1 + compound_nr(page);
        if (!page_ref_freeze(page, refcount))
                goto cannot_free;
        /* note: atomic_cmpxchg in page_ref_freeze provides the smp_rmb */
@@ -1121,7 +1121,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                      struct scan_control *sc,
                                      enum ttu_flags ttu_flags,
                                      struct reclaim_stat *stat,
-                                     bool force_reclaim)
+                                     bool ignore_references)
 {
        LIST_HEAD(ret_pages);
        LIST_HEAD(free_pages);
@@ -1135,7 +1135,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                struct address_space *mapping;
                struct page *page;
                int may_enter_fs;
-               enum page_references references = PAGEREF_RECLAIM_CLEAN;
+               enum page_references references = PAGEREF_RECLAIM;
                bool dirty, writeback;
                unsigned int nr_pages;
 
@@ -1149,7 +1149,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 
                VM_BUG_ON_PAGE(PageActive(page), page);
 
-               nr_pages = 1 << compound_order(page);
+               nr_pages = compound_nr(page);
 
                /* Account the number of base pages even though THP */
                sc->nr_scanned += nr_pages;
@@ -1266,7 +1266,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                        }
                }
 
-               if (!force_reclaim)
+               if (!ignore_references)
                        references = page_check_references(page, sc);
 
                switch (references) {
@@ -1487,10 +1487,9 @@ free_it:
                 * Is there need to periodically free_page_list? It would
                 * appear not as the counts should be low
                 */
-               if (unlikely(PageTransHuge(page))) {
-                       mem_cgroup_uncharge(page);
+               if (unlikely(PageTransHuge(page)))
                        (*get_compound_page_dtor(page))(page);
-               else
+               else
                        list_add(&page->lru, &free_pages);
                continue;
 
@@ -1705,7 +1704,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
                VM_BUG_ON_PAGE(!PageLRU(page), page);
 
-               nr_pages = 1 << compound_order(page);
+               nr_pages = compound_nr(page);
                total_scan += nr_pages;
 
                if (page_zonenum(page) > sc->reclaim_idx) {
@@ -1911,7 +1910,6 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
 
                        if (unlikely(PageCompound(page))) {
                                spin_unlock_irq(&pgdat->lru_lock);
-                               mem_cgroup_uncharge(page);
                                (*get_compound_page_dtor(page))(page);
                                spin_lock_irq(&pgdat->lru_lock);
                        } else
@@ -2145,6 +2143,62 @@ static void shrink_active_list(unsigned long nr_to_scan,
                        nr_deactivate, nr_rotated, sc->priority, file);
 }
 
+unsigned long reclaim_pages(struct list_head *page_list)
+{
+       int nid = -1;
+       unsigned long nr_reclaimed = 0;
+       LIST_HEAD(node_page_list);
+       struct reclaim_stat dummy_stat;
+       struct page *page;
+       struct scan_control sc = {
+               .gfp_mask = GFP_KERNEL,
+               .priority = DEF_PRIORITY,
+               .may_writepage = 1,
+               .may_unmap = 1,
+               .may_swap = 1,
+       };
+
+       while (!list_empty(page_list)) {
+               page = lru_to_page(page_list);
+               if (nid == -1) {
+                       nid = page_to_nid(page);
+                       INIT_LIST_HEAD(&node_page_list);
+               }
+
+               if (nid == page_to_nid(page)) {
+                       ClearPageActive(page);
+                       list_move(&page->lru, &node_page_list);
+                       continue;
+               }
+
+               nr_reclaimed += shrink_page_list(&node_page_list,
+                                               NODE_DATA(nid),
+                                               &sc, 0,
+                                               &dummy_stat, false);
+               while (!list_empty(&node_page_list)) {
+                       page = lru_to_page(&node_page_list);
+                       list_del(&page->lru);
+                       putback_lru_page(page);
+               }
+
+               nid = -1;
+       }
+
+       if (!list_empty(&node_page_list)) {
+               nr_reclaimed += shrink_page_list(&node_page_list,
+                                               NODE_DATA(nid),
+                                               &sc, 0,
+                                               &dummy_stat, false);
+               while (!list_empty(&node_page_list)) {
+                       page = lru_to_page(&node_page_list);
+                       list_del(&page->lru);
+                       putback_lru_page(page);
+               }
+       }
+
+       return nr_reclaimed;
+}
+
 /*
  * The inactive anon list should be small enough that the VM never has
  * to do too much work.
@@ -2403,17 +2457,70 @@ out:
        *lru_pages = 0;
        for_each_evictable_lru(lru) {
                int file = is_file_lru(lru);
-               unsigned long size;
+               unsigned long lruvec_size;
                unsigned long scan;
+               unsigned long protection;
+
+               lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
+               protection = mem_cgroup_protection(memcg,
+                                                  sc->memcg_low_reclaim);
+
+               if (protection) {
+                       /*
+                        * Scale a cgroup's reclaim pressure by proportioning
+                        * its current usage to its memory.low or memory.min
+                        * setting.
+                        *
+                        * This is important, as otherwise scanning aggression
+                        * becomes extremely binary -- from nothing as we
+                        * approach the memory protection threshold, to totally
+                        * nominal as we exceed it.  This results in requiring
+                        * setting extremely liberal protection thresholds. It
+                        * also means we simply get no protection at all if we
+                        * set it too low, which is not ideal.
+                        *
+                        * If there is any protection in place, we reduce scan
+                        * pressure by how much of the total memory used is
+                        * within protection thresholds.
+                        *
+                        * There is one special case: in the first reclaim pass,
+                        * we skip over all groups that are within their low
+                        * protection. If that fails to reclaim enough pages to
+                        * satisfy the reclaim goal, we come back and override
+                        * the best-effort low protection. However, we still
+                        * ideally want to honor how well-behaved groups are in
+                        * that case instead of simply punishing them all
+                        * equally. As such, we reclaim them based on how much
+                        * memory they are using, reducing the scan pressure
+                        * again by how much of the total memory used is under
+                        * hard protection.
+                        */
+                       unsigned long cgroup_size = mem_cgroup_size(memcg);
+
+                       /* Avoid TOCTOU with earlier protection check */
+                       cgroup_size = max(cgroup_size, protection);
+
+                       scan = lruvec_size - lruvec_size * protection /
+                               cgroup_size;
+
+                       /*
+                        * Minimally target SWAP_CLUSTER_MAX pages to keep
+                        * reclaim moving forwards, avoiding decremeting
+                        * sc->priority further than desirable.
+                        */
+                       scan = max(scan, SWAP_CLUSTER_MAX);
+               } else {
+                       scan = lruvec_size;
+               }
+
+               scan >>= sc->priority;
 
-               size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
-               scan = size >> sc->priority;
                /*
                 * If the cgroup's already been deleted, make sure to
                 * scrape out the remaining cache.
                 */
                if (!scan && !mem_cgroup_online(memcg))
-                       scan = min(size, SWAP_CLUSTER_MAX);
+                       scan = min(lruvec_size, SWAP_CLUSTER_MAX);
 
                switch (scan_balance) {
                case SCAN_EQUAL:
@@ -2433,7 +2540,7 @@ out:
                case SCAN_ANON:
                        /* Scan one type exclusively */
                        if ((scan_balance == SCAN_FILE) != file) {
-                               size = 0;
+                               lruvec_size = 0;
                                scan = 0;
                        }
                        break;
@@ -2442,7 +2549,7 @@ out:
                        BUG();
                }
 
-               *lru_pages += size;
+               *lru_pages += lruvec_size;
                nr[lru] = scan;
        }
 }
@@ -2586,7 +2693,6 @@ static bool in_reclaim_compaction(struct scan_control *sc)
  */
 static inline bool should_continue_reclaim(struct pglist_data *pgdat,
                                        unsigned long nr_reclaimed,
-                                       unsigned long nr_scanned,
                                        struct scan_control *sc)
 {
        unsigned long pages_for_compaction;
@@ -2597,40 +2703,18 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
        if (!in_reclaim_compaction(sc))
                return false;
 
-       /* Consider stopping depending on scan and reclaim activity */
-       if (sc->gfp_mask & __GFP_RETRY_MAYFAIL) {
-               /*
-                * For __GFP_RETRY_MAYFAIL allocations, stop reclaiming if the
-                * full LRU list has been scanned and we are still failing
-                * to reclaim pages. This full LRU scan is potentially
-                * expensive but a __GFP_RETRY_MAYFAIL caller really wants to succeed
-                */
-               if (!nr_reclaimed && !nr_scanned)
-                       return false;
-       } else {
-               /*
-                * For non-__GFP_RETRY_MAYFAIL allocations which can presumably
-                * fail without consequence, stop if we failed to reclaim
-                * any pages from the last SWAP_CLUSTER_MAX number of
-                * pages that were scanned. This will return to the
-                * caller faster at the risk reclaim/compaction and
-                * the resulting allocation attempt fails
-                */
-               if (!nr_reclaimed)
-                       return false;
-       }
-
        /*
-        * If we have not reclaimed enough pages for compaction and the
-        * inactive lists are large enough, continue reclaiming
+        * Stop if we failed to reclaim any pages from the last SWAP_CLUSTER_MAX
+        * number of pages that were scanned. This will return to the caller
+        * with the risk reclaim/compaction and the resulting allocation attempt
+        * fails. In the past we have tried harder for __GFP_RETRY_MAYFAIL
+        * allocations through requiring that the full LRU list has been scanned
+        * first, by assuming that zero delta of sc->nr_scanned means full LRU
+        * scan, but that approximation was wrong, and there were corner cases
+        * where always a non-zero amount of pages were scanned.
         */
-       pages_for_compaction = compact_gap(sc->order);
-       inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
-       if (get_nr_swap_pages() > 0)
-               inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
-       if (sc->nr_reclaimed < pages_for_compaction &&
-                       inactive_lru_pages > pages_for_compaction)
-               return true;
+       if (!nr_reclaimed)
+               return false;
 
        /* If compaction would go ahead or the allocation would succeed, stop */
        for (z = 0; z <= sc->reclaim_idx; z++) {
@@ -2647,7 +2731,17 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
                        ;
                }
        }
-       return true;
+
+       /*
+        * If we have not reclaimed enough pages for compaction and the
+        * inactive lists are large enough, continue reclaiming
+        */
+       pages_for_compaction = compact_gap(sc->order);
+       inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
+       if (get_nr_swap_pages() > 0)
+               inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
+
+       return inactive_lru_pages > pages_for_compaction;
 }
 
 static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
@@ -2664,10 +2758,6 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 
        do {
                struct mem_cgroup *root = sc->target_mem_cgroup;
-               struct mem_cgroup_reclaim_cookie reclaim = {
-                       .pgdat = pgdat,
-                       .priority = sc->priority,
-               };
                unsigned long node_lru_pages = 0;
                struct mem_cgroup *memcg;
 
@@ -2676,7 +2766,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                nr_reclaimed = sc->nr_reclaimed;
                nr_scanned = sc->nr_scanned;
 
-               memcg = mem_cgroup_iter(root, NULL, &reclaim);
+               memcg = mem_cgroup_iter(root, NULL, NULL);
                do {
                        unsigned long lru_pages;
                        unsigned long reclaimed;
@@ -2703,6 +2793,13 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                                memcg_memory_event(memcg, MEMCG_LOW);
                                break;
                        case MEMCG_PROT_NONE:
+                               /*
+                                * All protection thresholds breached. We may
+                                * still choose to vary the scan pressure
+                                * applied based on by how much the cgroup in
+                                * question has exceeded its protection
+                                * thresholds (see get_scan_count).
+                                */
                                break;
                        }
 
@@ -2719,21 +2816,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                                   sc->nr_scanned - scanned,
                                   sc->nr_reclaimed - reclaimed);
 
-                       /*
-                        * Kswapd have to scan all memory cgroups to fulfill
-                        * the overall scan target for the node.
-                        *
-                        * Limit reclaim, on the other hand, only cares about
-                        * nr_to_reclaim pages to be reclaimed and it will
-                        * retry with decreasing priority if one round over the
-                        * whole hierarchy is not sufficient.
-                        */
-                       if (!current_is_kswapd() &&
-                                       sc->nr_reclaimed >= sc->nr_to_reclaim) {
-                               mem_cgroup_iter_break(root, memcg);
-                               break;
-                       }
-               } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
+               } while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
 
                if (reclaim_state) {
                        sc->nr_reclaimed += reclaim_state->reclaimed_slab;
@@ -2810,7 +2893,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                        wait_iff_congested(BLK_RW_ASYNC, HZ/10);
 
        } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
-                                        sc->nr_scanned - nr_scanned, sc));
+                                        sc));
 
        /*
         * Kswapd gives up on balancing particular nodes after too
index fd7e16c..a822204 100644 (file)
@@ -1158,6 +1158,8 @@ const char * const vmstat_text[] = {
        "nr_shmem",
        "nr_shmem_hugepages",
        "nr_shmem_pmdmapped",
+       "nr_file_hugepages",
+       "nr_file_pmdmapped",
        "nr_anon_transparent_hugepages",
        "nr_unstable",
        "nr_vmscan_write",
@@ -1381,12 +1383,29 @@ static void pagetypeinfo_showfree_print(struct seq_file *m,
                        unsigned long freecount = 0;
                        struct free_area *area;
                        struct list_head *curr;
+                       bool overflow = false;
 
                        area = &(zone->free_area[order]);
 
-                       list_for_each(curr, &area->free_list[mtype])
-                               freecount++;
-                       seq_printf(m, "%6lu ", freecount);
+                       list_for_each(curr, &area->free_list[mtype]) {
+                               /*
+                                * Cap the free_list iteration because it might
+                                * be really large and we are under a spinlock
+                                * so a long time spent here could trigger a
+                                * hard lockup detector. Anyway this is a
+                                * debugging tool so knowing there is a handful
+                                * of pages of this order should be more than
+                                * sufficient.
+                                */
+                               if (++freecount >= 100000) {
+                                       overflow = true;
+                                       break;
+                               }
+                       }
+                       seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount);
+                       spin_unlock_irq(&zone->lock);
+                       cond_resched();
+                       spin_lock_irq(&zone->lock);
                }
                seq_putc(m, '\n');
        }
@@ -1970,7 +1989,7 @@ void __init init_mm_internals(void)
 #endif
 #ifdef CONFIG_PROC_FS
        proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
-       proc_create_seq("pagetypeinfo", 0444, NULL, &pagetypeinfo_op);
+       proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
        proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
        proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
 #endif
index 75b7962..6d3d3f6 100644 (file)
@@ -41,7 +41,6 @@
 #include <linux/workqueue.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/wait.h>
 #include <linux/zpool.h>
 #include <linux/magic.h>
 
@@ -146,8 +145,6 @@ struct z3fold_header {
  * @release_wq:        workqueue for safe page release
  * @work:      work_struct for safe page release
  * @inode:     inode for z3fold pseudo filesystem
- * @destroying: bool to stop migration once we start destruction
- * @isolated: int to count the number of pages currently in isolation
  *
  * This structure is allocated at pool creation time and maintains metadata
  * pertaining to a particular z3fold pool.
@@ -166,11 +163,8 @@ struct z3fold_pool {
        const struct zpool_ops *zpool_ops;
        struct workqueue_struct *compact_wq;
        struct workqueue_struct *release_wq;
-       struct wait_queue_head isolate_wait;
        struct work_struct work;
        struct inode *inode;
-       bool destroying;
-       int isolated;
 };
 
 /*
@@ -301,14 +295,11 @@ static void z3fold_unregister_migration(struct z3fold_pool *pool)
  }
 
 /* Initializes the z3fold header of a newly allocated z3fold page */
-static struct z3fold_header *init_z3fold_page(struct page *page,
+static struct z3fold_header *init_z3fold_page(struct page *page, bool headless,
                                        struct z3fold_pool *pool, gfp_t gfp)
 {
        struct z3fold_header *zhdr = page_address(page);
-       struct z3fold_buddy_slots *slots = alloc_slots(pool, gfp);
-
-       if (!slots)
-               return NULL;
+       struct z3fold_buddy_slots *slots;
 
        INIT_LIST_HEAD(&page->lru);
        clear_bit(PAGE_HEADLESS, &page->private);
@@ -316,6 +307,12 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
        clear_bit(NEEDS_COMPACTING, &page->private);
        clear_bit(PAGE_STALE, &page->private);
        clear_bit(PAGE_CLAIMED, &page->private);
+       if (headless)
+               return zhdr;
+
+       slots = alloc_slots(pool, gfp);
+       if (!slots)
+               return NULL;
 
        spin_lock_init(&zhdr->page_lock);
        kref_init(&zhdr->refcount);
@@ -372,9 +369,10 @@ static inline int __idx(struct z3fold_header *zhdr, enum buddy bud)
  * Encodes the handle of a particular buddy within a z3fold page
  * Pool lock should be held as this function accesses first_num
  */
-static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
+static unsigned long __encode_handle(struct z3fold_header *zhdr,
+                               struct z3fold_buddy_slots *slots,
+                               enum buddy bud)
 {
-       struct z3fold_buddy_slots *slots;
        unsigned long h = (unsigned long)zhdr;
        int idx = 0;
 
@@ -391,11 +389,15 @@ static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
        if (bud == LAST)
                h |= (zhdr->last_chunks << BUDDY_SHIFT);
 
-       slots = zhdr->slots;
        slots->slot[idx] = h;
        return (unsigned long)&slots->slot[idx];
 }
 
+static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
+{
+       return __encode_handle(zhdr, zhdr->slots, bud);
+}
+
 /* Returns the z3fold page where a given handle is stored */
 static inline struct z3fold_header *handle_to_z3fold_header(unsigned long h)
 {
@@ -630,6 +632,7 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
        }
 
        if (unlikely(PageIsolated(page) ||
+                    test_bit(PAGE_CLAIMED, &page->private) ||
                     test_bit(PAGE_STALE, &page->private))) {
                z3fold_page_unlock(zhdr);
                return;
@@ -775,7 +778,6 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
                goto out_c;
        spin_lock_init(&pool->lock);
        spin_lock_init(&pool->stale_lock);
-       init_waitqueue_head(&pool->isolate_wait);
        pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
        if (!pool->unbuddied)
                goto out_pool;
@@ -815,15 +817,6 @@ out:
        return NULL;
 }
 
-static bool pool_isolated_are_drained(struct z3fold_pool *pool)
-{
-       bool ret;
-
-       spin_lock(&pool->lock);
-       ret = pool->isolated == 0;
-       spin_unlock(&pool->lock);
-       return ret;
-}
 /**
  * z3fold_destroy_pool() - destroys an existing z3fold pool
  * @pool:      the z3fold pool to be destroyed
@@ -833,22 +826,6 @@ static bool pool_isolated_are_drained(struct z3fold_pool *pool)
 static void z3fold_destroy_pool(struct z3fold_pool *pool)
 {
        kmem_cache_destroy(pool->c_handle);
-       /*
-        * We set pool-> destroying under lock to ensure that
-        * z3fold_page_isolate() sees any changes to destroying. This way we
-        * avoid the need for any memory barriers.
-        */
-
-       spin_lock(&pool->lock);
-       pool->destroying = true;
-       spin_unlock(&pool->lock);
-
-       /*
-        * We need to ensure that no pages are being migrated while we destroy
-        * these workqueues, as migration can queue work on either of the
-        * workqueues.
-        */
-       wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
 
        /*
         * We need to destroy pool->compact_wq before pool->release_wq,
@@ -956,7 +933,7 @@ retry:
        if (!page)
                return -ENOMEM;
 
-       zhdr = init_z3fold_page(page, pool, gfp);
+       zhdr = init_z3fold_page(page, bud == HEADLESS, pool, gfp);
        if (!zhdr) {
                __free_page(page);
                return -ENOMEM;
@@ -1021,9 +998,11 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
        struct z3fold_header *zhdr;
        struct page *page;
        enum buddy bud;
+       bool page_claimed;
 
        zhdr = handle_to_z3fold_header(handle);
        page = virt_to_page(zhdr);
+       page_claimed = test_and_set_bit(PAGE_CLAIMED, &page->private);
 
        if (test_bit(PAGE_HEADLESS, &page->private)) {
                /* if a headless page is under reclaim, just leave.
@@ -1031,7 +1010,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
                 * has not been set before, we release this page
                 * immediately so we don't care about its value any more.
                 */
-               if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) {
+               if (!page_claimed) {
                        spin_lock(&pool->lock);
                        list_del(&page->lru);
                        spin_unlock(&pool->lock);
@@ -1067,13 +1046,15 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
                atomic64_dec(&pool->pages_nr);
                return;
        }
-       if (test_bit(PAGE_CLAIMED, &page->private)) {
+       if (page_claimed) {
+               /* the page has not been claimed by us */
                z3fold_page_unlock(zhdr);
                return;
        }
        if (unlikely(PageIsolated(page)) ||
            test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
                z3fold_page_unlock(zhdr);
+               clear_bit(PAGE_CLAIMED, &page->private);
                return;
        }
        if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) {
@@ -1083,10 +1064,12 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
                zhdr->cpu = -1;
                kref_get(&zhdr->refcount);
                do_compact_page(zhdr, true);
+               clear_bit(PAGE_CLAIMED, &page->private);
                return;
        }
        kref_get(&zhdr->refcount);
        queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work);
+       clear_bit(PAGE_CLAIMED, &page->private);
        z3fold_page_unlock(zhdr);
 }
 
@@ -1132,6 +1115,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
        struct z3fold_header *zhdr = NULL;
        struct page *page = NULL;
        struct list_head *pos;
+       struct z3fold_buddy_slots slots;
        unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
 
        spin_lock(&pool->lock);
@@ -1150,16 +1134,22 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
                        /* this bit could have been set by free, in which case
                         * we pass over to the next page in the pool.
                         */
-                       if (test_and_set_bit(PAGE_CLAIMED, &page->private))
+                       if (test_and_set_bit(PAGE_CLAIMED, &page->private)) {
+                               page = NULL;
                                continue;
+                       }
 
-                       if (unlikely(PageIsolated(page)))
+                       if (unlikely(PageIsolated(page))) {
+                               clear_bit(PAGE_CLAIMED, &page->private);
+                               page = NULL;
                                continue;
+                       }
+                       zhdr = page_address(page);
                        if (test_bit(PAGE_HEADLESS, &page->private))
                                break;
 
-                       zhdr = page_address(page);
                        if (!z3fold_page_trylock(zhdr)) {
+                               clear_bit(PAGE_CLAIMED, &page->private);
                                zhdr = NULL;
                                continue; /* can't evict at this point */
                        }
@@ -1177,26 +1167,30 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 
                if (!test_bit(PAGE_HEADLESS, &page->private)) {
                        /*
-                        * We need encode the handles before unlocking, since
-                        * we can race with free that will set
-                        * (first|last)_chunks to 0
+                        * We need encode the handles before unlocking, and
+                        * use our local slots structure because z3fold_free
+                        * can zero out zhdr->slots and we can't do much
+                        * about that
                         */
                        first_handle = 0;
                        last_handle = 0;
                        middle_handle = 0;
                        if (zhdr->first_chunks)
-                               first_handle = encode_handle(zhdr, FIRST);
+                               first_handle = __encode_handle(zhdr, &slots,
+                                                               FIRST);
                        if (zhdr->middle_chunks)
-                               middle_handle = encode_handle(zhdr, MIDDLE);
+                               middle_handle = __encode_handle(zhdr, &slots,
+                                                               MIDDLE);
                        if (zhdr->last_chunks)
-                               last_handle = encode_handle(zhdr, LAST);
+                               last_handle = __encode_handle(zhdr, &slots,
+                                                               LAST);
                        /*
                         * it's safe to unlock here because we hold a
                         * reference to this page
                         */
                        z3fold_page_unlock(zhdr);
                } else {
-                       first_handle = encode_handle(zhdr, HEADLESS);
+                       first_handle = __encode_handle(zhdr, &slots, HEADLESS);
                        last_handle = middle_handle = 0;
                }
 
@@ -1226,9 +1220,9 @@ next:
                        spin_lock(&pool->lock);
                        list_add(&page->lru, &pool->lru);
                        spin_unlock(&pool->lock);
+                       clear_bit(PAGE_CLAIMED, &page->private);
                } else {
                        z3fold_page_lock(zhdr);
-                       clear_bit(PAGE_CLAIMED, &page->private);
                        if (kref_put(&zhdr->refcount,
                                        release_z3fold_page_locked)) {
                                atomic64_dec(&pool->pages_nr);
@@ -1243,6 +1237,7 @@ next:
                        list_add(&page->lru, &pool->lru);
                        spin_unlock(&pool->lock);
                        z3fold_page_unlock(zhdr);
+                       clear_bit(PAGE_CLAIMED, &page->private);
                }
 
                /* We started off locked to we need to lock the pool back */
@@ -1339,28 +1334,6 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
        return atomic64_read(&pool->pages_nr);
 }
 
-/*
- * z3fold_dec_isolated() expects to be called while pool->lock is held.
- */
-static void z3fold_dec_isolated(struct z3fold_pool *pool)
-{
-       assert_spin_locked(&pool->lock);
-       VM_BUG_ON(pool->isolated <= 0);
-       pool->isolated--;
-
-       /*
-        * If we have no more isolated pages, we have to see if
-        * z3fold_destroy_pool() is waiting for a signal.
-        */
-       if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait))
-               wake_up_all(&pool->isolate_wait);
-}
-
-static void z3fold_inc_isolated(struct z3fold_pool *pool)
-{
-       pool->isolated++;
-}
-
 static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
 {
        struct z3fold_header *zhdr;
@@ -1369,7 +1342,8 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
        VM_BUG_ON_PAGE(!PageMovable(page), page);
        VM_BUG_ON_PAGE(PageIsolated(page), page);
 
-       if (test_bit(PAGE_HEADLESS, &page->private))
+       if (test_bit(PAGE_HEADLESS, &page->private) ||
+           test_bit(PAGE_CLAIMED, &page->private))
                return false;
 
        zhdr = page_address(page);
@@ -1387,34 +1361,6 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
                spin_lock(&pool->lock);
                if (!list_empty(&page->lru))
                        list_del(&page->lru);
-               /*
-                * We need to check for destruction while holding pool->lock, as
-                * otherwise destruction could see 0 isolated pages, and
-                * proceed.
-                */
-               if (unlikely(pool->destroying)) {
-                       spin_unlock(&pool->lock);
-                       /*
-                        * If this page isn't stale, somebody else holds a
-                        * reference to it. Let't drop our refcount so that they
-                        * can call the release logic.
-                        */
-                       if (unlikely(kref_put(&zhdr->refcount,
-                                             release_z3fold_page_locked))) {
-                               /*
-                                * If we get here we have kref problems, so we
-                                * should freak out.
-                                */
-                               WARN(1, "Z3fold is experiencing kref problems\n");
-                               z3fold_page_unlock(zhdr);
-                               return false;
-                       }
-                       z3fold_page_unlock(zhdr);
-                       return false;
-               }
-
-
-               z3fold_inc_isolated(pool);
                spin_unlock(&pool->lock);
                z3fold_page_unlock(zhdr);
                return true;
@@ -1483,10 +1429,6 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
 
        queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
 
-       spin_lock(&pool->lock);
-       z3fold_dec_isolated(pool);
-       spin_unlock(&pool->lock);
-
        page_mapcount_reset(page);
        put_page(page);
        return 0;
@@ -1506,14 +1448,10 @@ static void z3fold_page_putback(struct page *page)
        INIT_LIST_HEAD(&page->lru);
        if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
                atomic64_dec(&pool->pages_nr);
-               spin_lock(&pool->lock);
-               z3fold_dec_isolated(pool);
-               spin_unlock(&pool->lock);
                return;
        }
        spin_lock(&pool->lock);
        list_add(&page->lru, &pool->lru);
-       z3fold_dec_isolated(pool);
        spin_unlock(&pool->lock);
        z3fold_page_unlock(zhdr);
 }
index a2dd910..8636692 100644 (file)
@@ -238,6 +238,22 @@ const char *zpool_get_type(struct zpool *zpool)
        return zpool->driver->type;
 }
 
+/**
+ * zpool_malloc_support_movable() - Check if the zpool support
+ * allocate movable memory
+ * @zpool:     The zpool to check
+ *
+ * This returns if the zpool support allocate movable memory.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: true if if the zpool support allocate movable memory, false if not
+ */
+bool zpool_malloc_support_movable(struct zpool *zpool)
+{
+       return zpool->driver->malloc_support_movable;
+}
+
 /**
  * zpool_malloc() - Allocate memory
  * @zpool:     The zpool to allocate from.
index e98bb6a..2b2b9aa 100644 (file)
@@ -443,15 +443,16 @@ static u64 zs_zpool_total_size(void *pool)
 }
 
 static struct zpool_driver zs_zpool_driver = {
-       .type =         "zsmalloc",
-       .owner =        THIS_MODULE,
-       .create =       zs_zpool_create,
-       .destroy =      zs_zpool_destroy,
-       .malloc =       zs_zpool_malloc,
-       .free =         zs_zpool_free,
-       .map =          zs_zpool_map,
-       .unmap =        zs_zpool_unmap,
-       .total_size =   zs_zpool_total_size,
+       .type =                   "zsmalloc",
+       .owner =                  THIS_MODULE,
+       .create =                 zs_zpool_create,
+       .destroy =                zs_zpool_destroy,
+       .malloc_support_movable = true,
+       .malloc =                 zs_zpool_malloc,
+       .free =                   zs_zpool_free,
+       .map =                    zs_zpool_map,
+       .unmap =                  zs_zpool_unmap,
+       .total_size =             zs_zpool_total_size,
 };
 
 MODULE_ALIAS("zpool-zsmalloc");
@@ -476,10 +477,6 @@ static inline int get_zspage_inuse(struct zspage *zspage)
        return zspage->inuse;
 }
 
-static inline void set_zspage_inuse(struct zspage *zspage, int val)
-{
-       zspage->inuse = val;
-}
 
 static inline void mod_zspage_inuse(struct zspage *zspage, int val)
 {
index 0e22744..46a3223 100644 (file)
@@ -856,7 +856,6 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
        /* extract swpentry from data */
        zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
        swpentry = zhdr->swpentry; /* here */
-       zpool_unmap_handle(pool, handle);
        tree = zswap_trees[swp_type(swpentry)];
        offset = swp_offset(swpentry);
 
@@ -866,6 +865,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
        if (!entry) {
                /* entry was invalidated */
                spin_unlock(&tree->lock);
+               zpool_unmap_handle(pool, handle);
                return 0;
        }
        spin_unlock(&tree->lock);
@@ -886,15 +886,13 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
        case ZSWAP_SWAPCACHE_NEW: /* page is locked */
                /* decompress */
                dlen = PAGE_SIZE;
-               src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
-                               ZPOOL_MM_RO) + sizeof(struct zswap_header);
+               src = (u8 *)zhdr + sizeof(struct zswap_header);
                dst = kmap_atomic(page);
                tfm = *get_cpu_ptr(entry->pool->tfm);
                ret = crypto_comp_decompress(tfm, src, entry->length,
                                             dst, &dlen);
                put_cpu_ptr(entry->pool->tfm);
                kunmap_atomic(dst);
-               zpool_unmap_handle(entry->pool->zpool, entry->handle);
                BUG_ON(ret);
                BUG_ON(dlen != PAGE_SIZE);
 
@@ -940,6 +938,7 @@ fail:
        spin_unlock(&tree->lock);
 
 end:
+       zpool_unmap_handle(pool, handle);
        return ret;
 }
 
@@ -997,6 +996,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
        char *buf;
        u8 *src, *dst;
        struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
+       gfp_t gfp;
 
        /* THP isn't supported */
        if (PageTransHuge(page)) {
@@ -1070,9 +1070,10 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 
        /* store */
        hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
-       ret = zpool_malloc(entry->pool->zpool, hlen + dlen,
-                          __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM,
-                          &handle);
+       gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
+       if (zpool_malloc_support_movable(entry->pool->zpool))
+               gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
+       ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle);
        if (ret == -ENOSPC) {
                zswap_reject_compress_poor++;
                goto put_dstmem;
index 54728d2..d4bcfd8 100644 (file)
@@ -172,7 +172,6 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack)
        if (err < 0)
                goto out_uninit_mvrp;
 
-       vlan->nest_level = dev_get_nest_level(real_dev) + 1;
        err = register_netdevice(dev);
        if (err < 0)
                goto out_uninit_mvrp;
index 93eadf1..e5bff5c 100644 (file)
@@ -489,36 +489,6 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
        dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
 }
 
-/*
- * vlan network devices have devices nesting below it, and are a special
- * "super class" of normal network devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key vlan_netdev_xmit_lock_key;
-static struct lock_class_key vlan_netdev_addr_lock_key;
-
-static void vlan_dev_set_lockdep_one(struct net_device *dev,
-                                    struct netdev_queue *txq,
-                                    void *_subclass)
-{
-       lockdep_set_class_and_subclass(&txq->_xmit_lock,
-                                      &vlan_netdev_xmit_lock_key,
-                                      *(int *)_subclass);
-}
-
-static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass)
-{
-       lockdep_set_class_and_subclass(&dev->addr_list_lock,
-                                      &vlan_netdev_addr_lock_key,
-                                      subclass);
-       netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass);
-}
-
-static int vlan_dev_get_lock_subclass(struct net_device *dev)
-{
-       return vlan_dev_priv(dev)->nest_level;
-}
-
 static const struct header_ops vlan_header_ops = {
        .create  = vlan_dev_hard_header,
        .parse   = eth_header_parse,
@@ -609,8 +579,6 @@ static int vlan_dev_init(struct net_device *dev)
 
        SET_NETDEV_DEVTYPE(dev, &vlan_type);
 
-       vlan_dev_set_lockdep_class(dev, vlan_dev_get_lock_subclass(dev));
-
        vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
        if (!vlan->vlan_pcpu_stats)
                return -ENOMEM;
@@ -812,7 +780,6 @@ static const struct net_device_ops vlan_netdev_ops = {
        .ndo_netpoll_cleanup    = vlan_dev_netpoll_cleanup,
 #endif
        .ndo_fix_features       = vlan_dev_fix_features,
-       .ndo_get_lock_subclass  = vlan_dev_get_lock_subclass,
        .ndo_get_iflink         = vlan_dev_get_iflink,
 };
 
index 9622f3e..1d48afc 100644 (file)
@@ -281,6 +281,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
 
        p9pdu_reset(&req->tc);
        p9pdu_reset(&req->rc);
+       req->t_err = 0;
        req->status = REQ_STATUS_ALLOC;
        init_waitqueue_head(&req->wq);
        INIT_LIST_HEAD(&req->req_list);
index 4072e9d..b41375d 100644 (file)
@@ -1023,6 +1023,11 @@ static int atalk_create(struct net *net, struct socket *sock, int protocol,
         */
        if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
                goto out;
+
+       rc = -EPERM;
+       if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
+               goto out;
+
        rc = -ENOMEM;
        sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, kern);
        if (!sk)
index b7528e7..0ce530a 100644 (file)
@@ -668,7 +668,7 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
                mask |= EPOLLHUP;
 
        /* readable? */
-       if (!skb_queue_empty(&sk->sk_receive_queue))
+       if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
                mask |= EPOLLIN | EPOLLRDNORM;
 
        /* writable? */
index ca52077..bb222b8 100644 (file)
@@ -855,6 +855,8 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
                break;
 
        case SOCK_RAW:
+               if (!capable(CAP_NET_RAW))
+                       return -EPERM;
                break;
        default:
                return -ESOCKTNOSUPPORT;
index a3d188d..d5028af 100644 (file)
@@ -12,11 +12,11 @@ config BATMAN_ADV
        depends on NET
        select LIBCRC32C
        help
-          B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
-          a routing protocol for multi-hop ad-hoc mesh networks. The
-          networks may be wired or wireless. See
-          https://www.open-mesh.org/ for more information and user space
-          tools.
+         B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
+         a routing protocol for multi-hop ad-hoc mesh networks. The
+         networks may be wired or wireless. See
+         https://www.open-mesh.org/ for more information and user space
+         tools.
 
 config BATMAN_ADV_BATMAN_V
        bool "B.A.T.M.A.N. V protocol"
index d78938e..5b0b20e 100644 (file)
@@ -22,6 +22,8 @@
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/pkt_sched.h>
@@ -193,14 +195,18 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
        unsigned char *ogm_buff;
        u32 random_seqno;
 
+       mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
        /* randomize initial seqno to avoid collision */
        get_random_bytes(&random_seqno, sizeof(random_seqno));
        atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno);
 
        hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN;
        ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC);
-       if (!ogm_buff)
+       if (!ogm_buff) {
+               mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
                return -ENOMEM;
+       }
 
        hard_iface->bat_iv.ogm_buff = ogm_buff;
 
@@ -212,35 +218,59 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
        batadv_ogm_packet->reserved = 0;
        batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE;
 
+       mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
+
        return 0;
 }
 
 static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface)
 {
+       mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
        kfree(hard_iface->bat_iv.ogm_buff);
        hard_iface->bat_iv.ogm_buff = NULL;
+
+       mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
 }
 
 static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface)
 {
        struct batadv_ogm_packet *batadv_ogm_packet;
-       unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
+       void *ogm_buff;
 
-       batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
+       mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
+       ogm_buff = hard_iface->bat_iv.ogm_buff;
+       if (!ogm_buff)
+               goto unlock;
+
+       batadv_ogm_packet = ogm_buff;
        ether_addr_copy(batadv_ogm_packet->orig,
                        hard_iface->net_dev->dev_addr);
        ether_addr_copy(batadv_ogm_packet->prev_sender,
                        hard_iface->net_dev->dev_addr);
+
+unlock:
+       mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
 }
 
 static void
 batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface)
 {
        struct batadv_ogm_packet *batadv_ogm_packet;
-       unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
+       void *ogm_buff;
 
-       batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
+       mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
+       ogm_buff = hard_iface->bat_iv.ogm_buff;
+       if (!ogm_buff)
+               goto unlock;
+
+       batadv_ogm_packet = ogm_buff;
        batadv_ogm_packet->ttl = BATADV_TTL;
+
+unlock:
+       mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
 }
 
 /* when do we schedule our own ogm to be sent */
@@ -742,7 +772,11 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
        }
 }
 
-static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
+/**
+ * batadv_iv_ogm_schedule_buff() - schedule submission of hardif ogm buffer
+ * @hard_iface: interface whose ogm buffer should be transmitted
+ */
+static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
 {
        struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
        unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff;
@@ -753,9 +787,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
        u16 tvlv_len = 0;
        unsigned long send_time;
 
-       if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
-           hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
-               return;
+       lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex);
 
        /* the interface gets activated here to avoid race conditions between
         * the moment of activating the interface in
@@ -823,6 +855,17 @@ out:
                batadv_hardif_put(primary_if);
 }
 
+static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
+{
+       if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
+           hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
+               return;
+
+       mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+       batadv_iv_ogm_schedule_buff(hard_iface);
+       mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
+}
+
 /**
  * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over iterface
  * @orig_node: originator which reproadcasted the OGMs directly
index dc4f743..8033f24 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/lockdep.h>
+#include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/random.h>
 #include <linux/rculist.h>
@@ -256,14 +257,12 @@ static void batadv_v_ogm_queue_on_if(struct sk_buff *skb,
 }
 
 /**
- * batadv_v_ogm_send() - periodic worker broadcasting the own OGM
- * @work: work queue item
+ * batadv_v_ogm_send_softif() - periodic worker broadcasting the own OGM
+ * @bat_priv: the bat priv with all the soft interface information
  */
-static void batadv_v_ogm_send(struct work_struct *work)
+static void batadv_v_ogm_send_softif(struct batadv_priv *bat_priv)
 {
        struct batadv_hard_iface *hard_iface;
-       struct batadv_priv_bat_v *bat_v;
-       struct batadv_priv *bat_priv;
        struct batadv_ogm2_packet *ogm_packet;
        struct sk_buff *skb, *skb_tmp;
        unsigned char *ogm_buff;
@@ -271,8 +270,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
        u16 tvlv_len = 0;
        int ret;
 
-       bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work);
-       bat_priv = container_of(bat_v, struct batadv_priv, bat_v);
+       lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex);
 
        if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
                goto out;
@@ -363,6 +361,23 @@ out:
        return;
 }
 
+/**
+ * batadv_v_ogm_send() - periodic worker broadcasting the own OGM
+ * @work: work queue item
+ */
+static void batadv_v_ogm_send(struct work_struct *work)
+{
+       struct batadv_priv_bat_v *bat_v;
+       struct batadv_priv *bat_priv;
+
+       bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work);
+       bat_priv = container_of(bat_v, struct batadv_priv, bat_v);
+
+       mutex_lock(&bat_priv->bat_v.ogm_buff_mutex);
+       batadv_v_ogm_send_softif(bat_priv);
+       mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex);
+}
+
 /**
  * batadv_v_ogm_aggr_work() - OGM queue periodic task per interface
  * @work: work queue item
@@ -424,11 +439,15 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface)
        struct batadv_priv *bat_priv = netdev_priv(primary_iface->soft_iface);
        struct batadv_ogm2_packet *ogm_packet;
 
+       mutex_lock(&bat_priv->bat_v.ogm_buff_mutex);
        if (!bat_priv->bat_v.ogm_buff)
-               return;
+               goto unlock;
 
        ogm_packet = (struct batadv_ogm2_packet *)bat_priv->bat_v.ogm_buff;
        ether_addr_copy(ogm_packet->orig, primary_iface->net_dev->dev_addr);
+
+unlock:
+       mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex);
 }
 
 /**
@@ -1050,6 +1069,8 @@ int batadv_v_ogm_init(struct batadv_priv *bat_priv)
        atomic_set(&bat_priv->bat_v.ogm_seqno, random_seqno);
        INIT_DELAYED_WORK(&bat_priv->bat_v.ogm_wq, batadv_v_ogm_send);
 
+       mutex_init(&bat_priv->bat_v.ogm_buff_mutex);
+
        return 0;
 }
 
@@ -1061,7 +1082,11 @@ void batadv_v_ogm_free(struct batadv_priv *bat_priv)
 {
        cancel_delayed_work_sync(&bat_priv->bat_v.ogm_wq);
 
+       mutex_lock(&bat_priv->bat_v.ogm_buff_mutex);
+
        kfree(bat_priv->bat_v.ogm_buff);
        bat_priv->bat_v.ogm_buff = NULL;
        bat_priv->bat_v.ogm_buff_len = 0;
+
+       mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex);
 }
index c90e473..afb5228 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/kref.h>
 #include <linux/limits.h>
 #include <linux/list.h>
+#include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
@@ -929,6 +930,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
        INIT_LIST_HEAD(&hard_iface->list);
        INIT_HLIST_HEAD(&hard_iface->neigh_list);
 
+       mutex_init(&hard_iface->bat_iv.ogm_buff_mutex);
        spin_lock_init(&hard_iface->neigh_list_lock);
        kref_init(&hard_iface->refcount);
 
index a1146cb..5ee8e9a 100644 (file)
@@ -436,7 +436,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
        /* clean the netfilter state now that the batman-adv header has been
         * removed
         */
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
                goto dropped;
@@ -740,36 +740,6 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto,
        return 0;
 }
 
-/* batman-adv network devices have devices nesting below it and are a special
- * "super class" of normal network devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key batadv_netdev_xmit_lock_key;
-static struct lock_class_key batadv_netdev_addr_lock_key;
-
-/**
- * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue
- * @dev: device which owns the tx queue
- * @txq: tx queue to modify
- * @_unused: always NULL
- */
-static void batadv_set_lockdep_class_one(struct net_device *dev,
-                                        struct netdev_queue *txq,
-                                        void *_unused)
-{
-       lockdep_set_class(&txq->_xmit_lock, &batadv_netdev_xmit_lock_key);
-}
-
-/**
- * batadv_set_lockdep_class() - Set txq and addr_list lockdep class
- * @dev: network device to modify
- */
-static void batadv_set_lockdep_class(struct net_device *dev)
-{
-       lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key);
-       netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL);
-}
-
 /**
  * batadv_softif_init_late() - late stage initialization of soft interface
  * @dev: registered network device to modify
@@ -783,8 +753,6 @@ static int batadv_softif_init_late(struct net_device *dev)
        int ret;
        size_t cnt_len = sizeof(u64) * BATADV_CNT_NUM;
 
-       batadv_set_lockdep_class(dev);
-
        bat_priv = netdev_priv(dev);
        bat_priv->soft_iface = dev;
 
index be7c02a..4d7f1ba 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/if.h>
 #include <linux/if_ether.h>
 #include <linux/kref.h>
+#include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/sched.h> /* for linux/wait.h */
@@ -81,6 +82,9 @@ struct batadv_hard_iface_bat_iv {
 
        /** @ogm_seqno: OGM sequence number - used to identify each OGM */
        atomic_t ogm_seqno;
+
+       /** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */
+       struct mutex ogm_buff_mutex;
 };
 
 /**
@@ -1539,6 +1543,9 @@ struct batadv_priv_bat_v {
        /** @ogm_seqno: OGM sequence number - used to identify each OGM */
        atomic_t ogm_seqno;
 
+       /** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */
+       struct mutex ogm_buff_mutex;
+
        /** @ogm_wq: workqueue used to schedule OGM transmissions */
        struct delayed_work ogm_wq;
 };
index bb55d92..4febc82 100644 (file)
@@ -571,15 +571,7 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
        return err < 0 ? NET_XMIT_DROP : err;
 }
 
-static int bt_dev_init(struct net_device *dev)
-{
-       netdev_lockdep_set_classes(dev);
-
-       return 0;
-}
-
 static const struct net_device_ops netdev_ops = {
-       .ndo_init               = bt_dev_init,
        .ndo_start_xmit         = bt_xmit,
 };
 
index 94ddf19..5f508c5 100644 (file)
@@ -460,7 +460,7 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock,
        if (sk->sk_state == BT_LISTEN)
                return bt_accept_poll(sk);
 
-       if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+       if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
                mask |= EPOLLERR |
                        (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
 
@@ -470,7 +470,7 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock,
        if (sk->sk_shutdown == SHUTDOWN_MASK)
                mask |= EPOLLHUP;
 
-       if (!skb_queue_empty(&sk->sk_receive_queue))
+       if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
                mask |= EPOLLIN | EPOLLRDNORM;
 
        if (sk->sk_state == BT_CLOSED)
index 681b728..e804a30 100644 (file)
@@ -24,8 +24,6 @@
 const struct nf_br_ops __rcu *nf_br_ops __read_mostly;
 EXPORT_SYMBOL_GPL(nf_br_ops);
 
-static struct lock_class_key bridge_netdev_addr_lock_key;
-
 /* net device transmit always called with BH disabled */
 netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 {
@@ -108,11 +106,6 @@ out:
        return NETDEV_TX_OK;
 }
 
-static void br_set_lockdep_class(struct net_device *dev)
-{
-       lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
-}
-
 static int br_dev_init(struct net_device *dev)
 {
        struct net_bridge *br = netdev_priv(dev);
@@ -150,7 +143,6 @@ static int br_dev_init(struct net_device *dev)
                br_mdb_hash_fini(br);
                br_fdb_hash_fini(br);
        }
-       br_set_lockdep_class(dev);
 
        return err;
 }
index 8842798..8096732 100644 (file)
@@ -33,6 +33,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
 {
        int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
        unsigned int hlen, ll_rs, mtu;
+       ktime_t tstamp = skb->tstamp;
        struct ip_frag_state state;
        struct iphdr *iph;
        int err;
@@ -80,6 +81,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
                        if (iter.frag)
                                ip_fraglist_prepare(skb, &iter);
 
+                       skb->tstamp = tstamp;
                        err = output(net, sk, data, skb);
                        if (err || !iter.frag)
                                break;
@@ -93,7 +95,7 @@ slow_path:
         * This may also be a clone skbuff, we could preserve the geometry for
         * the copies but probably not worth the effort.
         */
-       ip_frag_init(skb, hlen, ll_rs, frag_max_size, &state);
+       ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state);
 
        while (state.left > 0) {
                struct sk_buff *skb2;
@@ -104,6 +106,7 @@ slow_path:
                        goto blackhole;
                }
 
+               skb2->tstamp = tstamp;
                err = output(net, sk, data, skb2);
                if (err)
                        goto blackhole;
index 13ea920..ef14da5 100644 (file)
@@ -953,7 +953,7 @@ static __poll_t caif_poll(struct file *file,
                mask |= EPOLLRDHUP;
 
        /* readable? */
-       if (!skb_queue_empty(&sk->sk_receive_queue) ||
+       if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
                (sk->sk_shutdown & RCV_SHUTDOWN))
                mask |= EPOLLIN | EPOLLRDNORM;
 
index 4eeea4d..2d56824 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/nsproxy.h>
 #include <linux/parser.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/statfs.h>
@@ -185,18 +186,34 @@ int ceph_compare_options(struct ceph_options *new_opt,
 }
 EXPORT_SYMBOL(ceph_compare_options);
 
+/*
+ * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are
+ * compatible with (a superset of) GFP_KERNEL.  This is because while the
+ * actual pages are allocated with the specified flags, the page table pages
+ * are always allocated with GFP_KERNEL.  map_vm_area() doesn't even take
+ * flags because GFP_KERNEL is hard-coded in {p4d,pud,pmd,pte}_alloc().
+ *
+ * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO.
+ */
 void *ceph_kvmalloc(size_t size, gfp_t flags)
 {
-       if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
-               void *ptr = kmalloc(size, flags | __GFP_NOWARN);
-               if (ptr)
-                       return ptr;
+       void *p;
+
+       if ((flags & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) {
+               p = kvmalloc(size, flags);
+       } else if ((flags & (__GFP_IO | __GFP_FS)) == __GFP_IO) {
+               unsigned int nofs_flag = memalloc_nofs_save();
+               p = kvmalloc(size, GFP_KERNEL);
+               memalloc_nofs_restore(nofs_flag);
+       } else {
+               unsigned int noio_flag = memalloc_noio_save();
+               p = kvmalloc(size, GFP_KERNEL);
+               memalloc_noio_restore(noio_flag);
        }
 
-       return __vmalloc(size, flags, PAGE_KERNEL);
+       return p;
 }
 
-
 static int parse_fsid(const char *str, struct ceph_fsid *fsid)
 {
        int i = 0;
@@ -694,6 +711,14 @@ void ceph_destroy_client(struct ceph_client *client)
 }
 EXPORT_SYMBOL(ceph_destroy_client);
 
+void ceph_reset_client_addr(struct ceph_client *client)
+{
+       ceph_messenger_reset_nonce(&client->msgr);
+       ceph_monc_reopen_session(&client->monc);
+       ceph_osdc_reopen_osds(&client->osdc);
+}
+EXPORT_SYMBOL(ceph_reset_client_addr);
+
 /*
  * true if we have the mon map (and have thus joined the cluster)
  */
index 962f521..e4cb3db 100644 (file)
@@ -3031,6 +3031,12 @@ static void con_fault(struct ceph_connection *con)
 }
 
 
+void ceph_messenger_reset_nonce(struct ceph_messenger *msgr)
+{
+       u32 nonce = le32_to_cpu(msgr->inst.addr.nonce) + 1000000;
+       msgr->inst.addr.nonce = cpu_to_le32(nonce);
+       encode_my_addr(msgr);
+}
 
 /*
  * initialize a new messenger instance
index 0520bf9..7256c40 100644 (file)
@@ -213,6 +213,13 @@ static void reopen_session(struct ceph_mon_client *monc)
        __open_session(monc);
 }
 
+void ceph_monc_reopen_session(struct ceph_mon_client *monc)
+{
+       mutex_lock(&monc->mutex);
+       reopen_session(monc);
+       mutex_unlock(&monc->mutex);
+}
+
 static void un_backoff(struct ceph_mon_client *monc)
 {
        monc->hunt_mult /= 2; /* reduce by 50% */
index 78ae6e8..ba45b07 100644 (file)
@@ -841,6 +841,7 @@ int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
        struct ceph_pagelist *pagelist;
        size_t payload_len = 0;
        size_t size;
+       int ret;
 
        op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0);
 
@@ -852,20 +853,27 @@ int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
        size = strlen(class);
        BUG_ON(size > (size_t) U8_MAX);
        op->cls.class_len = size;
-       ceph_pagelist_append(pagelist, class, size);
+       ret = ceph_pagelist_append(pagelist, class, size);
+       if (ret)
+               goto err_pagelist_free;
        payload_len += size;
 
        op->cls.method_name = method;
        size = strlen(method);
        BUG_ON(size > (size_t) U8_MAX);
        op->cls.method_len = size;
-       ceph_pagelist_append(pagelist, method, size);
+       ret = ceph_pagelist_append(pagelist, method, size);
+       if (ret)
+               goto err_pagelist_free;
        payload_len += size;
 
        osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist);
-
        op->indata_len = payload_len;
        return 0;
+
+err_pagelist_free:
+       ceph_pagelist_release(pagelist);
+       return ret;
 }
 EXPORT_SYMBOL(osd_req_op_cls_init);
 
@@ -877,6 +885,7 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
                                                      opcode, 0);
        struct ceph_pagelist *pagelist;
        size_t payload_len;
+       int ret;
 
        BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR);
 
@@ -886,10 +895,14 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
 
        payload_len = strlen(name);
        op->xattr.name_len = payload_len;
-       ceph_pagelist_append(pagelist, name, payload_len);
+       ret = ceph_pagelist_append(pagelist, name, payload_len);
+       if (ret)
+               goto err_pagelist_free;
 
        op->xattr.value_len = size;
-       ceph_pagelist_append(pagelist, value, size);
+       ret = ceph_pagelist_append(pagelist, value, size);
+       if (ret)
+               goto err_pagelist_free;
        payload_len += size;
 
        op->xattr.cmp_op = cmp_op;
@@ -898,6 +911,10 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
        ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
        op->indata_len = payload_len;
        return 0;
+
+err_pagelist_free:
+       ceph_pagelist_release(pagelist);
+       return ret;
 }
 EXPORT_SYMBOL(osd_req_op_xattr_init);
 
@@ -1488,7 +1505,6 @@ enum calc_target_result {
 
 static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
                                           struct ceph_osd_request_target *t,
-                                          struct ceph_connection *con,
                                           bool any_change)
 {
        struct ceph_pg_pool_info *pi;
@@ -2272,7 +2288,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
        dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
 
 again:
-       ct_res = calc_target(osdc, &req->r_t, NULL, false);
+       ct_res = calc_target(osdc, &req->r_t, false);
        if (ct_res == CALC_TARGET_POOL_DNE && !wrlocked)
                goto promote;
 
@@ -2476,6 +2492,14 @@ void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err)
 }
 EXPORT_SYMBOL(ceph_osdc_abort_requests);
 
+void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc)
+{
+       down_write(&osdc->lock);
+       osdc->abort_err = 0;
+       up_write(&osdc->lock);
+}
+EXPORT_SYMBOL(ceph_osdc_clear_abort_err);
+
 static void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
 {
        if (likely(eb > osdc->epoch_barrier)) {
@@ -3087,7 +3111,7 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
                lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id;
        }
 
-       calc_target(osdc, &lreq->t, NULL, false);
+       calc_target(osdc, &lreq->t, false);
        osd = lookup_create_osd(osdc, lreq->t.osd, true);
        link_linger(osd, lreq);
 
@@ -3704,7 +3728,7 @@ recalc_linger_target(struct ceph_osd_linger_request *lreq)
        struct ceph_osd_client *osdc = lreq->osdc;
        enum calc_target_result ct_res;
 
-       ct_res = calc_target(osdc, &lreq->t, NULL, true);
+       ct_res = calc_target(osdc, &lreq->t, true);
        if (ct_res == CALC_TARGET_NEED_RESEND) {
                struct ceph_osd *osd;
 
@@ -3776,8 +3800,7 @@ static void scan_requests(struct ceph_osd *osd,
                n = rb_next(n); /* unlink_request(), check_pool_dne() */
 
                dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
-               ct_res = calc_target(osdc, &req->r_t, &req->r_osd->o_con,
-                                    false);
+               ct_res = calc_target(osdc, &req->r_t, false);
                switch (ct_res) {
                case CALC_TARGET_NO_ACTION:
                        force_resend_writes = cleared_full ||
@@ -3886,7 +3909,7 @@ static void kick_requests(struct ceph_osd_client *osdc,
                n = rb_next(n);
 
                if (req->r_t.epoch < osdc->osdmap->epoch) {
-                       ct_res = calc_target(osdc, &req->r_t, NULL, false);
+                       ct_res = calc_target(osdc, &req->r_t, false);
                        if (ct_res == CALC_TARGET_POOL_DNE) {
                                erase_request(need_resend, req);
                                check_pool_dne(req);
@@ -5086,6 +5109,24 @@ out_put_req:
 }
 EXPORT_SYMBOL(ceph_osdc_call);
 
+/*
+ * reset all osd connections
+ */
+void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc)
+{
+       struct rb_node *n;
+
+       down_write(&osdc->lock);
+       for (n = rb_first(&osdc->osds); n; ) {
+               struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
+
+               n = rb_next(n);
+               if (!reopen_osd(osd))
+                       kick_osd_requests(osd);
+       }
+       up_write(&osdc->lock);
+}
+
 /*
  * init, shutdown
  */
index 9043790..4e0de14 100644 (file)
@@ -973,11 +973,11 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
                                 struct ceph_pg_pool_info, node);
                __remove_pg_pool(&map->pg_pools, pi);
        }
-       kfree(map->osd_state);
-       kfree(map->osd_weight);
-       kfree(map->osd_addr);
-       kfree(map->osd_primary_affinity);
-       kfree(map->crush_workspace);
+       kvfree(map->osd_state);
+       kvfree(map->osd_weight);
+       kvfree(map->osd_addr);
+       kvfree(map->osd_primary_affinity);
+       kvfree(map->crush_workspace);
        kfree(map);
 }
 
@@ -986,28 +986,41 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
  *
  * The new elements are properly initialized.
  */
-static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
+static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
 {
        u32 *state;
        u32 *weight;
        struct ceph_entity_addr *addr;
+       u32 to_copy;
        int i;
 
-       state = krealloc(map->osd_state, max*sizeof(*state), GFP_NOFS);
-       if (!state)
-               return -ENOMEM;
-       map->osd_state = state;
+       dout("%s old %u new %u\n", __func__, map->max_osd, max);
+       if (max == map->max_osd)
+               return 0;
 
-       weight = krealloc(map->osd_weight, max*sizeof(*weight), GFP_NOFS);
-       if (!weight)
+       state = ceph_kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
+       weight = ceph_kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
+       addr = ceph_kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
+       if (!state || !weight || !addr) {
+               kvfree(state);
+               kvfree(weight);
+               kvfree(addr);
                return -ENOMEM;
-       map->osd_weight = weight;
+       }
 
-       addr = krealloc(map->osd_addr, max*sizeof(*addr), GFP_NOFS);
-       if (!addr)
-               return -ENOMEM;
-       map->osd_addr = addr;
+       to_copy = min(map->max_osd, max);
+       if (map->osd_state) {
+               memcpy(state, map->osd_state, to_copy * sizeof(*state));
+               memcpy(weight, map->osd_weight, to_copy * sizeof(*weight));
+               memcpy(addr, map->osd_addr, to_copy * sizeof(*addr));
+               kvfree(map->osd_state);
+               kvfree(map->osd_weight);
+               kvfree(map->osd_addr);
+       }
 
+       map->osd_state = state;
+       map->osd_weight = weight;
+       map->osd_addr = addr;
        for (i = map->max_osd; i < max; i++) {
                map->osd_state[i] = 0;
                map->osd_weight[i] = CEPH_OSD_OUT;
@@ -1017,12 +1030,16 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
        if (map->osd_primary_affinity) {
                u32 *affinity;
 
-               affinity = krealloc(map->osd_primary_affinity,
-                                   max*sizeof(*affinity), GFP_NOFS);
+               affinity = ceph_kvmalloc(array_size(max, sizeof(*affinity)),
+                                        GFP_NOFS);
                if (!affinity)
                        return -ENOMEM;
-               map->osd_primary_affinity = affinity;
 
+               memcpy(affinity, map->osd_primary_affinity,
+                      to_copy * sizeof(*affinity));
+               kvfree(map->osd_primary_affinity);
+
+               map->osd_primary_affinity = affinity;
                for (i = map->max_osd; i < max; i++)
                        map->osd_primary_affinity[i] =
                            CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
@@ -1043,7 +1060,7 @@ static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
 
        work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE);
        dout("%s work_size %zu bytes\n", __func__, work_size);
-       workspace = kmalloc(work_size, GFP_NOIO);
+       workspace = ceph_kvmalloc(work_size, GFP_NOIO);
        if (!workspace) {
                crush_destroy(crush);
                return -ENOMEM;
@@ -1052,7 +1069,7 @@ static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
 
        if (map->crush)
                crush_destroy(map->crush);
-       kfree(map->crush_workspace);
+       kvfree(map->crush_workspace);
        map->crush = crush;
        map->crush_workspace = workspace;
        return 0;
@@ -1298,9 +1315,9 @@ static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
        if (!map->osd_primary_affinity) {
                int i;
 
-               map->osd_primary_affinity = kmalloc_array(map->max_osd,
-                                                         sizeof(u32),
-                                                         GFP_NOFS);
+               map->osd_primary_affinity = ceph_kvmalloc(
+                   array_size(map->max_osd, sizeof(*map->osd_primary_affinity)),
+                   GFP_NOFS);
                if (!map->osd_primary_affinity)
                        return -ENOMEM;
 
@@ -1321,7 +1338,7 @@ static int decode_primary_affinity(void **p, void *end,
 
        ceph_decode_32_safe(p, end, len, e_inval);
        if (len == 0) {
-               kfree(map->osd_primary_affinity);
+               kvfree(map->osd_primary_affinity);
                map->osd_primary_affinity = NULL;
                return 0;
        }
index 4cc8dc5..da3c24e 100644 (file)
@@ -97,7 +97,7 @@ int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
        if (error)
                goto out_err;
 
-       if (sk->sk_receive_queue.prev != skb)
+       if (READ_ONCE(sk->sk_receive_queue.prev) != skb)
                goto out;
 
        /* Socket shut down? */
@@ -278,7 +278,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
                        break;
 
                sk_busy_loop(sk, flags & MSG_DONTWAIT);
-       } while (sk->sk_receive_queue.prev != *last);
+       } while (READ_ONCE(sk->sk_receive_queue.prev) != *last);
 
        error = -EAGAIN;
 
@@ -640,7 +640,7 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
                skb->len += copied;
                skb->truesize += truesize;
                if (sk && sk->sk_type == SOCK_STREAM) {
-                       sk->sk_wmem_queued += truesize;
+                       sk_wmem_queued_add(sk, truesize);
                        sk_mem_charge(sk, truesize);
                } else {
                        refcount_add(truesize, &skb->sk->sk_wmem_alloc);
@@ -767,7 +767,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
        mask = 0;
 
        /* exceptional events? */
-       if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+       if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
                mask |= EPOLLERR |
                        (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
 
@@ -777,7 +777,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
                mask |= EPOLLHUP;
 
        /* readable? */
-       if (!skb_queue_empty(&sk->sk_receive_queue))
+       if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
                mask |= EPOLLIN | EPOLLRDNORM;
 
        /* Connection-based need to check for termination and startup */
index 71b18e8..99ac84f 100644 (file)
 #include "net-sysfs.h"
 
 #define MAX_GRO_SKBS 8
+#define MAX_NEST_DEV 8
 
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
@@ -276,88 +277,6 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 EXPORT_PER_CPU_SYMBOL(softnet_data);
 
-#ifdef CONFIG_LOCKDEP
-/*
- * register_netdevice() inits txq->_xmit_lock and sets lockdep class
- * according to dev->type
- */
-static const unsigned short netdev_lock_type[] = {
-        ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
-        ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
-        ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
-        ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
-        ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
-        ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
-        ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
-        ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
-        ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
-        ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
-        ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
-        ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
-        ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
-        ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
-        ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
-
-static const char *const netdev_lock_name[] = {
-       "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
-       "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
-       "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
-       "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
-       "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
-       "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
-       "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
-       "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
-       "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
-       "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
-       "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
-       "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
-       "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
-       "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
-       "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
-
-static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
-static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
-
-static inline unsigned short netdev_lock_pos(unsigned short dev_type)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
-               if (netdev_lock_type[i] == dev_type)
-                       return i;
-       /* the last key is used by default */
-       return ARRAY_SIZE(netdev_lock_type) - 1;
-}
-
-static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
-                                                unsigned short dev_type)
-{
-       int i;
-
-       i = netdev_lock_pos(dev_type);
-       lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
-                                  netdev_lock_name[i]);
-}
-
-static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
-{
-       int i;
-
-       i = netdev_lock_pos(dev->type);
-       lockdep_set_class_and_name(&dev->addr_list_lock,
-                                  &netdev_addr_lock_key[i],
-                                  netdev_lock_name[i]);
-}
-#else
-static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
-                                                unsigned short dev_type)
-{
-}
-static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
-{
-}
-#endif
-
 /*******************************************************************************
  *
  *             Protocol management and registration routines
@@ -5666,7 +5585,7 @@ EXPORT_SYMBOL(gro_find_complete_by_type);
 static void napi_skb_free_stolen_head(struct sk_buff *skb)
 {
        skb_dst_drop(skb);
-       secpath_reset(skb);
+       skb_ext_put(skb);
        kmem_cache_free(skbuff_head_cache, skb);
 }
 
@@ -5733,7 +5652,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
        skb->encapsulation = 0;
        skb_shinfo(skb)->gso_type = 0;
        skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
-       secpath_reset(skb);
+       skb_ext_reset(skb);
 
        napi->skb = skb;
 }
@@ -6489,6 +6408,9 @@ struct netdev_adjacent {
        /* upper master flag, there can only be one master device per list */
        bool master;
 
+       /* lookup ignore flag */
+       bool ignore;
+
        /* counter for the number of times this device was added to us */
        u16 ref_nr;
 
@@ -6511,7 +6433,7 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
        return NULL;
 }
 
-static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data)
+static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data)
 {
        struct net_device *dev = data;
 
@@ -6532,7 +6454,7 @@ bool netdev_has_upper_dev(struct net_device *dev,
 {
        ASSERT_RTNL();
 
-       return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
+       return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
                                             upper_dev);
 }
 EXPORT_SYMBOL(netdev_has_upper_dev);
@@ -6550,7 +6472,7 @@ EXPORT_SYMBOL(netdev_has_upper_dev);
 bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
                                  struct net_device *upper_dev)
 {
-       return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
+       return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
                                               upper_dev);
 }
 EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
@@ -6594,6 +6516,22 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_get);
 
+static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev)
+{
+       struct netdev_adjacent *upper;
+
+       ASSERT_RTNL();
+
+       if (list_empty(&dev->adj_list.upper))
+               return NULL;
+
+       upper = list_first_entry(&dev->adj_list.upper,
+                                struct netdev_adjacent, list);
+       if (likely(upper->master) && !upper->ignore)
+               return upper->dev;
+       return NULL;
+}
+
 /**
  * netdev_has_any_lower_dev - Check if device is linked to some device
  * @dev: device
@@ -6644,6 +6582,23 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
 
+static struct net_device *__netdev_next_upper_dev(struct net_device *dev,
+                                                 struct list_head **iter,
+                                                 bool *ignore)
+{
+       struct netdev_adjacent *upper;
+
+       upper = list_entry((*iter)->next, struct netdev_adjacent, list);
+
+       if (&upper->list == &dev->adj_list.upper)
+               return NULL;
+
+       *iter = &upper->list;
+       *ignore = upper->ignore;
+
+       return upper->dev;
+}
+
 static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
                                                    struct list_head **iter)
 {
@@ -6661,34 +6616,111 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
        return upper->dev;
 }
 
+static int __netdev_walk_all_upper_dev(struct net_device *dev,
+                                      int (*fn)(struct net_device *dev,
+                                                void *data),
+                                      void *data)
+{
+       struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+       struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+       int ret, cur = 0;
+       bool ignore;
+
+       now = dev;
+       iter = &dev->adj_list.upper;
+
+       while (1) {
+               if (now != dev) {
+                       ret = fn(now, data);
+                       if (ret)
+                               return ret;
+               }
+
+               next = NULL;
+               while (1) {
+                       udev = __netdev_next_upper_dev(now, &iter, &ignore);
+                       if (!udev)
+                               break;
+                       if (ignore)
+                               continue;
+
+                       next = udev;
+                       niter = &udev->adj_list.upper;
+                       dev_stack[cur] = now;
+                       iter_stack[cur++] = iter;
+                       break;
+               }
+
+               if (!next) {
+                       if (!cur)
+                               return 0;
+                       next = dev_stack[--cur];
+                       niter = iter_stack[cur];
+               }
+
+               now = next;
+               iter = niter;
+       }
+
+       return 0;
+}
+
 int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
                                  int (*fn)(struct net_device *dev,
                                            void *data),
                                  void *data)
 {
-       struct net_device *udev;
-       struct list_head *iter;
-       int ret;
+       struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+       struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+       int ret, cur = 0;
 
-       for (iter = &dev->adj_list.upper,
-            udev = netdev_next_upper_dev_rcu(dev, &iter);
-            udev;
-            udev = netdev_next_upper_dev_rcu(dev, &iter)) {
-               /* first is the upper device itself */
-               ret = fn(udev, data);
-               if (ret)
-                       return ret;
+       now = dev;
+       iter = &dev->adj_list.upper;
 
-               /* then look at all of its upper devices */
-               ret = netdev_walk_all_upper_dev_rcu(udev, fn, data);
-               if (ret)
-                       return ret;
+       while (1) {
+               if (now != dev) {
+                       ret = fn(now, data);
+                       if (ret)
+                               return ret;
+               }
+
+               next = NULL;
+               while (1) {
+                       udev = netdev_next_upper_dev_rcu(now, &iter);
+                       if (!udev)
+                               break;
+
+                       next = udev;
+                       niter = &udev->adj_list.upper;
+                       dev_stack[cur] = now;
+                       iter_stack[cur++] = iter;
+                       break;
+               }
+
+               if (!next) {
+                       if (!cur)
+                               return 0;
+                       next = dev_stack[--cur];
+                       niter = iter_stack[cur];
+               }
+
+               now = next;
+               iter = niter;
        }
 
        return 0;
 }
 EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
 
+static bool __netdev_has_upper_dev(struct net_device *dev,
+                                  struct net_device *upper_dev)
+{
+       ASSERT_RTNL();
+
+       return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev,
+                                          upper_dev);
+}
+
 /**
  * netdev_lower_get_next_private - Get the next ->private from the
  *                                lower neighbour list
@@ -6785,34 +6817,119 @@ static struct net_device *netdev_next_lower_dev(struct net_device *dev,
        return lower->dev;
 }
 
+static struct net_device *__netdev_next_lower_dev(struct net_device *dev,
+                                                 struct list_head **iter,
+                                                 bool *ignore)
+{
+       struct netdev_adjacent *lower;
+
+       lower = list_entry((*iter)->next, struct netdev_adjacent, list);
+
+       if (&lower->list == &dev->adj_list.lower)
+               return NULL;
+
+       *iter = &lower->list;
+       *ignore = lower->ignore;
+
+       return lower->dev;
+}
+
 int netdev_walk_all_lower_dev(struct net_device *dev,
                              int (*fn)(struct net_device *dev,
                                        void *data),
                              void *data)
 {
-       struct net_device *ldev;
-       struct list_head *iter;
-       int ret;
+       struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+       struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+       int ret, cur = 0;
 
-       for (iter = &dev->adj_list.lower,
-            ldev = netdev_next_lower_dev(dev, &iter);
-            ldev;
-            ldev = netdev_next_lower_dev(dev, &iter)) {
-               /* first is the lower device itself */
-               ret = fn(ldev, data);
-               if (ret)
-                       return ret;
+       now = dev;
+       iter = &dev->adj_list.lower;
 
-               /* then look at all of its lower devices */
-               ret = netdev_walk_all_lower_dev(ldev, fn, data);
-               if (ret)
-                       return ret;
+       while (1) {
+               if (now != dev) {
+                       ret = fn(now, data);
+                       if (ret)
+                               return ret;
+               }
+
+               next = NULL;
+               while (1) {
+                       ldev = netdev_next_lower_dev(now, &iter);
+                       if (!ldev)
+                               break;
+
+                       next = ldev;
+                       niter = &ldev->adj_list.lower;
+                       dev_stack[cur] = now;
+                       iter_stack[cur++] = iter;
+                       break;
+               }
+
+               if (!next) {
+                       if (!cur)
+                               return 0;
+                       next = dev_stack[--cur];
+                       niter = iter_stack[cur];
+               }
+
+               now = next;
+               iter = niter;
        }
 
        return 0;
 }
 EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
 
+static int __netdev_walk_all_lower_dev(struct net_device *dev,
+                                      int (*fn)(struct net_device *dev,
+                                                void *data),
+                                      void *data)
+{
+       struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+       struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+       int ret, cur = 0;
+       bool ignore;
+
+       now = dev;
+       iter = &dev->adj_list.lower;
+
+       while (1) {
+               if (now != dev) {
+                       ret = fn(now, data);
+                       if (ret)
+                               return ret;
+               }
+
+               next = NULL;
+               while (1) {
+                       ldev = __netdev_next_lower_dev(now, &iter, &ignore);
+                       if (!ldev)
+                               break;
+                       if (ignore)
+                               continue;
+
+                       next = ldev;
+                       niter = &ldev->adj_list.lower;
+                       dev_stack[cur] = now;
+                       iter_stack[cur++] = iter;
+                       break;
+               }
+
+               if (!next) {
+                       if (!cur)
+                               return 0;
+                       next = dev_stack[--cur];
+                       niter = iter_stack[cur];
+               }
+
+               now = next;
+               iter = niter;
+       }
+
+       return 0;
+}
+
 static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
                                                    struct list_head **iter)
 {
@@ -6827,28 +6944,99 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
        return lower->dev;
 }
 
-int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
-                                 int (*fn)(struct net_device *dev,
-                                           void *data),
-                                 void *data)
+static u8 __netdev_upper_depth(struct net_device *dev)
+{
+       struct net_device *udev;
+       struct list_head *iter;
+       u8 max_depth = 0;
+       bool ignore;
+
+       for (iter = &dev->adj_list.upper,
+            udev = __netdev_next_upper_dev(dev, &iter, &ignore);
+            udev;
+            udev = __netdev_next_upper_dev(dev, &iter, &ignore)) {
+               if (ignore)
+                       continue;
+               if (max_depth < udev->upper_level)
+                       max_depth = udev->upper_level;
+       }
+
+       return max_depth;
+}
+
+static u8 __netdev_lower_depth(struct net_device *dev)
 {
        struct net_device *ldev;
        struct list_head *iter;
-       int ret;
+       u8 max_depth = 0;
+       bool ignore;
 
        for (iter = &dev->adj_list.lower,
-            ldev = netdev_next_lower_dev_rcu(dev, &iter);
+            ldev = __netdev_next_lower_dev(dev, &iter, &ignore);
             ldev;
-            ldev = netdev_next_lower_dev_rcu(dev, &iter)) {
-               /* first is the lower device itself */
-               ret = fn(ldev, data);
-               if (ret)
-                       return ret;
+            ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) {
+               if (ignore)
+                       continue;
+               if (max_depth < ldev->lower_level)
+                       max_depth = ldev->lower_level;
+       }
 
-               /* then look at all of its lower devices */
-               ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data);
-               if (ret)
-                       return ret;
+       return max_depth;
+}
+
+static int __netdev_update_upper_level(struct net_device *dev, void *data)
+{
+       dev->upper_level = __netdev_upper_depth(dev) + 1;
+       return 0;
+}
+
+static int __netdev_update_lower_level(struct net_device *dev, void *data)
+{
+       dev->lower_level = __netdev_lower_depth(dev) + 1;
+       return 0;
+}
+
+int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
+                                 int (*fn)(struct net_device *dev,
+                                           void *data),
+                                 void *data)
+{
+       struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+       struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+       int ret, cur = 0;
+
+       now = dev;
+       iter = &dev->adj_list.lower;
+
+       while (1) {
+               if (now != dev) {
+                       ret = fn(now, data);
+                       if (ret)
+                               return ret;
+               }
+
+               next = NULL;
+               while (1) {
+                       ldev = netdev_next_lower_dev_rcu(now, &iter);
+                       if (!ldev)
+                               break;
+
+                       next = ldev;
+                       niter = &ldev->adj_list.lower;
+                       dev_stack[cur] = now;
+                       iter_stack[cur++] = iter;
+                       break;
+               }
+
+               if (!next) {
+                       if (!cur)
+                               return 0;
+                       next = dev_stack[--cur];
+                       niter = iter_stack[cur];
+               }
+
+               now = next;
+               iter = niter;
        }
 
        return 0;
@@ -6952,6 +7140,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
        adj->master = master;
        adj->ref_nr = 1;
        adj->private = private;
+       adj->ignore = false;
        dev_hold(adj_dev);
 
        pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
@@ -7102,14 +7291,17 @@ static int __netdev_upper_dev_link(struct net_device *dev,
                return -EBUSY;
 
        /* To prevent loops, check if dev is not upper device to upper_dev. */
-       if (netdev_has_upper_dev(upper_dev, dev))
+       if (__netdev_has_upper_dev(upper_dev, dev))
                return -EBUSY;
 
+       if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV)
+               return -EMLINK;
+
        if (!master) {
-               if (netdev_has_upper_dev(dev, upper_dev))
+               if (__netdev_has_upper_dev(dev, upper_dev))
                        return -EEXIST;
        } else {
-               master_dev = netdev_master_upper_dev_get(dev);
+               master_dev = __netdev_master_upper_dev_get(dev);
                if (master_dev)
                        return master_dev == upper_dev ? -EEXIST : -EBUSY;
        }
@@ -7131,6 +7323,13 @@ static int __netdev_upper_dev_link(struct net_device *dev,
        if (ret)
                goto rollback;
 
+       __netdev_update_upper_level(dev, NULL);
+       __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
+
+       __netdev_update_lower_level(upper_dev, NULL);
+       __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
+                                   NULL);
+
        return 0;
 
 rollback:
@@ -7213,9 +7412,96 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 
        call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
                                      &changeupper_info.info);
+
+       __netdev_update_upper_level(dev, NULL);
+       __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
+
+       __netdev_update_lower_level(upper_dev, NULL);
+       __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
+                                   NULL);
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
+static void __netdev_adjacent_dev_set(struct net_device *upper_dev,
+                                     struct net_device *lower_dev,
+                                     bool val)
+{
+       struct netdev_adjacent *adj;
+
+       adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower);
+       if (adj)
+               adj->ignore = val;
+
+       adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper);
+       if (adj)
+               adj->ignore = val;
+}
+
+static void netdev_adjacent_dev_disable(struct net_device *upper_dev,
+                                       struct net_device *lower_dev)
+{
+       __netdev_adjacent_dev_set(upper_dev, lower_dev, true);
+}
+
+static void netdev_adjacent_dev_enable(struct net_device *upper_dev,
+                                      struct net_device *lower_dev)
+{
+       __netdev_adjacent_dev_set(upper_dev, lower_dev, false);
+}
+
+int netdev_adjacent_change_prepare(struct net_device *old_dev,
+                                  struct net_device *new_dev,
+                                  struct net_device *dev,
+                                  struct netlink_ext_ack *extack)
+{
+       int err;
+
+       if (!new_dev)
+               return 0;
+
+       if (old_dev && new_dev != old_dev)
+               netdev_adjacent_dev_disable(dev, old_dev);
+
+       err = netdev_upper_dev_link(new_dev, dev, extack);
+       if (err) {
+               if (old_dev && new_dev != old_dev)
+                       netdev_adjacent_dev_enable(dev, old_dev);
+               return err;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(netdev_adjacent_change_prepare);
+
+void netdev_adjacent_change_commit(struct net_device *old_dev,
+                                  struct net_device *new_dev,
+                                  struct net_device *dev)
+{
+       if (!new_dev || !old_dev)
+               return;
+
+       if (new_dev == old_dev)
+               return;
+
+       netdev_adjacent_dev_enable(dev, old_dev);
+       netdev_upper_dev_unlink(old_dev, dev);
+}
+EXPORT_SYMBOL(netdev_adjacent_change_commit);
+
+void netdev_adjacent_change_abort(struct net_device *old_dev,
+                                 struct net_device *new_dev,
+                                 struct net_device *dev)
+{
+       if (!new_dev)
+               return;
+
+       if (old_dev && new_dev != old_dev)
+               netdev_adjacent_dev_enable(dev, old_dev);
+
+       netdev_upper_dev_unlink(new_dev, dev);
+}
+EXPORT_SYMBOL(netdev_adjacent_change_abort);
+
 /**
  * netdev_bonding_info_change - Dispatch event about slave change
  * @dev: device
@@ -7329,25 +7615,6 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
 EXPORT_SYMBOL(netdev_lower_dev_get_private);
 
 
-int dev_get_nest_level(struct net_device *dev)
-{
-       struct net_device *lower = NULL;
-       struct list_head *iter;
-       int max_nest = -1;
-       int nest;
-
-       ASSERT_RTNL();
-
-       netdev_for_each_lower_dev(dev, lower, iter) {
-               nest = dev_get_nest_level(lower);
-               if (max_nest < nest)
-                       max_nest = nest;
-       }
-
-       return max_nest + 1;
-}
-EXPORT_SYMBOL(dev_get_nest_level);
-
 /**
  * netdev_lower_change - Dispatch event about lower device state change
  * @lower_dev: device
@@ -8154,7 +8421,8 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                        return -EINVAL;
                }
 
-               if (prog->aux->id == prog_id) {
+               /* prog->aux->id may be 0 for orphaned device-bound progs */
+               if (prog->aux->id && prog->aux->id == prog_id) {
                        bpf_prog_put(prog);
                        return 0;
                }
@@ -8619,7 +8887,7 @@ static void netdev_init_one_queue(struct net_device *dev,
 {
        /* Initialize queue lock */
        spin_lock_init(&queue->_xmit_lock);
-       netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+       lockdep_set_class(&queue->_xmit_lock, &dev->qdisc_xmit_lock_key);
        queue->xmit_lock_owner = -1;
        netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
        queue->dev = dev;
@@ -8666,6 +8934,43 @@ void netif_tx_stop_all_queues(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_tx_stop_all_queues);
 
+static void netdev_register_lockdep_key(struct net_device *dev)
+{
+       lockdep_register_key(&dev->qdisc_tx_busylock_key);
+       lockdep_register_key(&dev->qdisc_running_key);
+       lockdep_register_key(&dev->qdisc_xmit_lock_key);
+       lockdep_register_key(&dev->addr_list_lock_key);
+}
+
+static void netdev_unregister_lockdep_key(struct net_device *dev)
+{
+       lockdep_unregister_key(&dev->qdisc_tx_busylock_key);
+       lockdep_unregister_key(&dev->qdisc_running_key);
+       lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
+       lockdep_unregister_key(&dev->addr_list_lock_key);
+}
+
+void netdev_update_lockdep_key(struct net_device *dev)
+{
+       struct netdev_queue *queue;
+       int i;
+
+       lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
+       lockdep_unregister_key(&dev->addr_list_lock_key);
+
+       lockdep_register_key(&dev->qdisc_xmit_lock_key);
+       lockdep_register_key(&dev->addr_list_lock_key);
+
+       lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
+       for (i = 0; i < dev->num_tx_queues; i++) {
+               queue = netdev_get_tx_queue(dev, i);
+
+               lockdep_set_class(&queue->_xmit_lock,
+                                 &dev->qdisc_xmit_lock_key);
+       }
+}
+EXPORT_SYMBOL(netdev_update_lockdep_key);
+
 /**
  *     register_netdevice      - register a network device
  *     @dev: device to register
@@ -8700,7 +9005,7 @@ int register_netdevice(struct net_device *dev)
        BUG_ON(!net);
 
        spin_lock_init(&dev->addr_list_lock);
-       netdev_set_addr_lockdep_class(dev);
+       lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
 
        ret = dev_get_valid_name(net, dev, dev->name);
        if (ret < 0)
@@ -9210,8 +9515,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
        dev_net_set(dev, &init_net);
 
+       netdev_register_lockdep_key(dev);
+
        dev->gso_max_size = GSO_MAX_SIZE;
        dev->gso_max_segs = GSO_MAX_SEGS;
+       dev->upper_level = 1;
+       dev->lower_level = 1;
 
        INIT_LIST_HEAD(&dev->napi_list);
        INIT_LIST_HEAD(&dev->unreg_list);
@@ -9292,6 +9601,8 @@ void free_netdev(struct net_device *dev)
        free_percpu(dev->pcpu_refcnt);
        dev->pcpu_refcnt = NULL;
 
+       netdev_unregister_lockdep_key(dev);
+
        /*  Compatibility with error handling in drivers */
        if (dev->reg_state == NETREG_UNINITIALIZED) {
                netdev_freemem(dev);
@@ -9460,7 +9771,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
        call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
        rcu_barrier();
 
-       new_nsid = peernet2id_alloc(dev_net(dev), net);
+       new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
        /* If there is an ifindex conflict assign a new one */
        if (__dev_get_by_index(net, dev->ifindex))
                new_ifindex = dev_new_index(net);
index 6393ba9..2f949b5 100644 (file)
@@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)
        if (to->addr_len != from->addr_len)
                return -EINVAL;
 
-       netif_addr_lock_nested(to);
+       netif_addr_lock(to);
        err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
        if (!err)
                __dev_set_rx_mode(to);
@@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from)
        if (to->addr_len != from->addr_len)
                return -EINVAL;
 
-       netif_addr_lock_nested(to);
+       netif_addr_lock(to);
        err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
        if (!err)
                __dev_set_rx_mode(to);
@@ -691,7 +691,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from)
                return;
 
        netif_addr_lock_bh(from);
-       netif_addr_lock_nested(to);
+       netif_addr_lock(to);
        __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
        __dev_set_rx_mode(to);
        netif_addr_unlock(to);
@@ -858,7 +858,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
        if (to->addr_len != from->addr_len)
                return -EINVAL;
 
-       netif_addr_lock_nested(to);
+       netif_addr_lock(to);
        err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
        if (!err)
                __dev_set_rx_mode(to);
@@ -888,7 +888,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
        if (to->addr_len != from->addr_len)
                return -EINVAL;
 
-       netif_addr_lock_nested(to);
+       netif_addr_lock(to);
        err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len);
        if (!err)
                __dev_set_rx_mode(to);
@@ -912,7 +912,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from)
                return;
 
        netif_addr_lock_bh(from);
-       netif_addr_lock_nested(to);
+       netif_addr_lock(to);
        __hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
        __dev_set_rx_mode(to);
        netif_addr_unlock(to);
index e48680e..f80151e 100644 (file)
@@ -3172,7 +3172,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
                                                    NETLINK_CB(cb->skb).portid,
                                                    cb->nlh->nlmsg_seq,
                                                    NLM_F_MULTI);
-                       if (err) {
+                       if (err && err != -EOPNOTSUPP) {
                                mutex_unlock(&devlink->lock);
                                goto out;
                        }
@@ -3432,7 +3432,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
                                                NETLINK_CB(cb->skb).portid,
                                                cb->nlh->nlmsg_seq,
                                                NLM_F_MULTI);
-                               if (err) {
+                               if (err && err != -EOPNOTSUPP) {
                                        mutex_unlock(&devlink->lock);
                                        goto out;
                                }
@@ -4088,7 +4088,7 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                           cb->extack);
                mutex_unlock(&devlink->lock);
-               if (err)
+               if (err && err != -EOPNOTSUPP)
                        break;
                idx++;
        }
index 1325316..193af52 100644 (file)
@@ -172,7 +172,7 @@ void dst_release(struct dst_entry *dst)
                int newrefcnt;
 
                newrefcnt = atomic_dec_return(&dst->__refcnt);
-               if (unlikely(newrefcnt < 0))
+               if (WARN_ONCE(newrefcnt < 0, "dst_release underflow"))
                        net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
                                             __func__, dst, newrefcnt);
                if (!newrefcnt)
@@ -187,7 +187,7 @@ void dst_release_immediate(struct dst_entry *dst)
                int newrefcnt;
 
                newrefcnt = atomic_dec_return(&dst->__refcnt);
-               if (unlikely(newrefcnt < 0))
+               if (WARN_ONCE(newrefcnt < 0, "dst_release_immediate underflow"))
                        net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
                                             __func__, dst, newrefcnt);
                if (!newrefcnt)
index c763106..cd9bc67 100644 (file)
@@ -1396,11 +1396,13 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr)
 
 static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
 {
-       struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
+       struct ethtool_wolinfo wol;
 
        if (!dev->ethtool_ops->get_wol)
                return -EOPNOTSUPP;
 
+       memset(&wol, 0, sizeof(struct ethtool_wolinfo));
+       wol.cmd = ETHTOOL_GWOL;
        dev->ethtool_ops->get_wol(dev, &wol);
 
        if (copy_to_user(useraddr, &wol, sizeof(wol)))
index ed65636..3fed575 100644 (file)
@@ -4252,12 +4252,14 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
                case SO_RCVBUF:
                        val = min_t(u32, val, sysctl_rmem_max);
                        sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
-                       sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+                       WRITE_ONCE(sk->sk_rcvbuf,
+                                  max_t(int, val * 2, SOCK_MIN_RCVBUF));
                        break;
                case SO_SNDBUF:
                        val = min_t(u32, val, sysctl_wmem_max);
                        sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
-                       sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
+                       WRITE_ONCE(sk->sk_sndbuf,
+                                  max_t(int, val * 2, SOCK_MIN_SNDBUF));
                        break;
                case SO_MAX_PACING_RATE: /* 32bit version */
                        if (val != ~0U)
@@ -4274,7 +4276,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
                case SO_RCVLOWAT:
                        if (val < 0)
                                val = INT_MAX;
-                       sk->sk_rcvlowat = val ? : 1;
+                       WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
                        break;
                case SO_MARK:
                        if (sk->sk_mark != val) {
index 7c09d87..68eda10 100644 (file)
@@ -1350,30 +1350,21 @@ out_bad:
 }
 EXPORT_SYMBOL(__skb_flow_dissect);
 
-static u32 hashrnd __read_mostly;
+static siphash_key_t hashrnd __read_mostly;
 static __always_inline void __flow_hash_secret_init(void)
 {
        net_get_random_once(&hashrnd, sizeof(hashrnd));
 }
 
-static __always_inline u32 __flow_hash_words(const u32 *words, u32 length,
-                                            u32 keyval)
+static const void *flow_keys_hash_start(const struct flow_keys *flow)
 {
-       return jhash2(words, length, keyval);
-}
-
-static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow)
-{
-       const void *p = flow;
-
-       BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32));
-       return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET);
+       BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT);
+       return &flow->FLOW_KEYS_HASH_START_FIELD;
 }
 
 static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
 {
        size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
-       BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
        BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
                     sizeof(*flow) - sizeof(flow->addrs));
 
@@ -1388,7 +1379,7 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
                diff -= sizeof(flow->addrs.tipckey);
                break;
        }
-       return (sizeof(*flow) - diff) / sizeof(u32);
+       return sizeof(*flow) - diff;
 }
 
 __be32 flow_get_u32_src(const struct flow_keys *flow)
@@ -1454,14 +1445,15 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
        }
 }
 
-static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
+static inline u32 __flow_hash_from_keys(struct flow_keys *keys,
+                                       const siphash_key_t *keyval)
 {
        u32 hash;
 
        __flow_hash_consistentify(keys);
 
-       hash = __flow_hash_words(flow_keys_hash_start(keys),
-                                flow_keys_hash_length(keys), keyval);
+       hash = siphash(flow_keys_hash_start(keys),
+                      flow_keys_hash_length(keys), keyval);
        if (!hash)
                hash = 1;
 
@@ -1471,12 +1463,13 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
 u32 flow_hash_from_keys(struct flow_keys *keys)
 {
        __flow_hash_secret_init();
-       return __flow_hash_from_keys(keys, hashrnd);
+       return __flow_hash_from_keys(keys, &hashrnd);
 }
 EXPORT_SYMBOL(flow_hash_from_keys);
 
 static inline u32 ___skb_get_hash(const struct sk_buff *skb,
-                                 struct flow_keys *keys, u32 keyval)
+                                 struct flow_keys *keys,
+                                 const siphash_key_t *keyval)
 {
        skb_flow_dissect_flow_keys(skb, keys,
                                   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
@@ -1524,7 +1517,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
                           &keys, NULL, 0, 0, 0,
                           FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
 
-       return __flow_hash_from_keys(&keys, hashrnd);
+       return __flow_hash_from_keys(&keys, &hashrnd);
 }
 EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
 
@@ -1544,13 +1537,14 @@ void __skb_get_hash(struct sk_buff *skb)
 
        __flow_hash_secret_init();
 
-       hash = ___skb_get_hash(skb, &keys, hashrnd);
+       hash = ___skb_get_hash(skb, &keys, &hashrnd);
 
        __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
 }
 EXPORT_SYMBOL(__skb_get_hash);
 
-__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
+__u32 skb_get_hash_perturb(const struct sk_buff *skb,
+                          const siphash_key_t *perturb)
 {
        struct flow_keys keys;
 
index f93785e..74cfb8b 100644 (file)
@@ -88,11 +88,16 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb)
        int err = -EINVAL;
 
        if (skb->protocol == htons(ETH_P_IP)) {
+               struct net_device *dev = skb_dst(skb)->dev;
                struct iphdr *iph = ip_hdr(skb);
 
+               dev_hold(dev);
+               skb_dst_drop(skb);
                err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
-                                          iph->tos, skb_dst(skb)->dev);
+                                          iph->tos, dev);
+               dev_put(dev);
        } else if (skb->protocol == htons(ETH_P_IPV6)) {
+               skb_dst_drop(skb);
                err = ipv6_stub->ipv6_route_input(skb);
        } else {
                err = -EAFNOSUPPORT;
index a0e0d29..3940284 100644 (file)
@@ -245,11 +245,12 @@ static int __peernet2id(struct net *net, struct net *peer)
        return __peernet2id_alloc(net, peer, &no);
 }
 
-static void rtnl_net_notifyid(struct net *net, int cmd, int id);
+static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
+                             struct nlmsghdr *nlh, gfp_t gfp);
 /* This function returns the id of a peer netns. If no id is assigned, one will
  * be allocated and returned.
  */
-int peernet2id_alloc(struct net *net, struct net *peer)
+int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
 {
        bool alloc = false, alive = false;
        int id;
@@ -268,7 +269,7 @@ int peernet2id_alloc(struct net *net, struct net *peer)
        id = __peernet2id_alloc(net, peer, &alloc);
        spin_unlock_bh(&net->nsid_lock);
        if (alloc && id >= 0)
-               rtnl_net_notifyid(net, RTM_NEWNSID, id);
+               rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp);
        if (alive)
                put_net(peer);
        return id;
@@ -478,6 +479,7 @@ struct net *copy_net_ns(unsigned long flags,
 
        if (rv < 0) {
 put_userns:
+               key_remove_domain(net->key_domain);
                put_user_ns(user_ns);
                net_drop_ns(net);
 dec_ucounts:
@@ -532,7 +534,8 @@ static void unhash_nsid(struct net *net, struct net *last)
                        idr_remove(&tmp->netns_ids, id);
                spin_unlock_bh(&tmp->nsid_lock);
                if (id >= 0)
-                       rtnl_net_notifyid(tmp, RTM_DELNSID, id);
+                       rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
+                                         GFP_KERNEL);
                if (tmp == last)
                        break;
        }
@@ -764,7 +767,8 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
        err = alloc_netid(net, peer, nsid);
        spin_unlock_bh(&net->nsid_lock);
        if (err >= 0) {
-               rtnl_net_notifyid(net, RTM_NEWNSID, err);
+               rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
+                                 nlh, GFP_KERNEL);
                err = 0;
        } else if (err == -ENOSPC && nsid >= 0) {
                err = -EEXIST;
@@ -1051,16 +1055,19 @@ end:
        return err < 0 ? err : skb->len;
 }
 
-static void rtnl_net_notifyid(struct net *net, int cmd, int id)
+static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
+                             struct nlmsghdr *nlh, gfp_t gfp)
 {
        struct net_fill_args fillargs = {
+               .portid = portid,
+               .seq = nlh ? nlh->nlmsg_seq : 0,
                .cmd = cmd,
                .nsid = id,
        };
        struct sk_buff *msg;
        int err = -ENOMEM;
 
-       msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
+       msg = nlmsg_new(rtnl_net_get_size(), gfp);
        if (!msg)
                goto out;
 
@@ -1068,7 +1075,7 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id)
        if (err < 0)
                goto err_out;
 
-       rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
+       rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, gfp);
        return;
 
 err_out:
index c9bb000..f35c2e9 100644 (file)
@@ -96,7 +96,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
 
        fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq;
 
-       tcp_sk(sk)->fastopen_rsk = NULL;
+       RCU_INIT_POINTER(tcp_sk(sk)->fastopen_rsk, NULL);
        spin_lock_bh(&fastopenq->lock);
        fastopenq->qlen--;
        tcp_rsk(req)->tfo_listener = false;
index 1ee6460..c81cd80 100644 (file)
@@ -1523,7 +1523,7 @@ static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb,
 
 static int rtnl_fill_link_netnsid(struct sk_buff *skb,
                                  const struct net_device *dev,
-                                 struct net *src_net)
+                                 struct net *src_net, gfp_t gfp)
 {
        bool put_iflink = false;
 
@@ -1531,7 +1531,7 @@ static int rtnl_fill_link_netnsid(struct sk_buff *skb,
                struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
 
                if (!net_eq(dev_net(dev), link_net)) {
-                       int id = peernet2id_alloc(src_net, link_net);
+                       int id = peernet2id_alloc(src_net, link_net, gfp);
 
                        if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
                                return -EMSGSIZE;
@@ -1589,7 +1589,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
                            int type, u32 pid, u32 seq, u32 change,
                            unsigned int flags, u32 ext_filter_mask,
                            u32 event, int *new_nsid, int new_ifindex,
-                           int tgt_netnsid)
+                           int tgt_netnsid, gfp_t gfp)
 {
        struct ifinfomsg *ifm;
        struct nlmsghdr *nlh;
@@ -1681,7 +1681,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
                        goto nla_put_failure;
        }
 
-       if (rtnl_fill_link_netnsid(skb, dev, src_net))
+       if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp))
                goto nla_put_failure;
 
        if (new_nsid &&
@@ -2001,7 +2001,7 @@ walk_entries:
                                               NETLINK_CB(cb->skb).portid,
                                               nlh->nlmsg_seq, 0, flags,
                                               ext_filter_mask, 0, NULL, 0,
-                                              netnsid);
+                                              netnsid, GFP_KERNEL);
 
                        if (err < 0) {
                                if (likely(skb->len))
@@ -2355,6 +2355,7 @@ static int do_set_master(struct net_device *dev, int ifindex,
                        err = ops->ndo_del_slave(upper_dev, dev);
                        if (err)
                                return err;
+                       netdev_update_lockdep_key(dev);
                } else {
                        return -EOPNOTSUPP;
                }
@@ -3359,7 +3360,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
        err = rtnl_fill_ifinfo(nskb, dev, net,
                               RTM_NEWLINK, NETLINK_CB(skb).portid,
                               nlh->nlmsg_seq, 0, 0, ext_filter_mask,
-                              0, NULL, 0, netnsid);
+                              0, NULL, 0, netnsid, GFP_KERNEL);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in if_nlmsg_size */
                WARN_ON(err == -EMSGSIZE);
@@ -3471,7 +3472,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
 
        err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
                               type, 0, 0, change, 0, 0, event,
-                              new_nsid, new_ifindex, -1);
+                              new_nsid, new_ifindex, -1, flags);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in if_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
@@ -3916,7 +3917,7 @@ static int valid_fdb_dump_strict(const struct nlmsghdr *nlh,
        ndm = nlmsg_data(nlh);
        if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
            ndm->ndm_flags || ndm->ndm_type) {
-               NL_SET_ERR_MSG(extack, "Invalid values in header for fbd dump request");
+               NL_SET_ERR_MSG(extack, "Invalid values in header for fdb dump request");
                return -EINVAL;
        }
 
index f12e8a0..867e61d 100644 (file)
@@ -4415,7 +4415,7 @@ static void skb_set_err_queue(struct sk_buff *skb)
 int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
 {
        if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-           (unsigned int)sk->sk_rcvbuf)
+           (unsigned int)READ_ONCE(sk->sk_rcvbuf))
                return -ENOMEM;
 
        skb_orphan(skb);
@@ -5119,8 +5119,8 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
        skb->skb_iif = 0;
        skb->ignore_df = 0;
        skb_dst_drop(skb);
-       secpath_reset(skb);
-       nf_reset(skb);
+       skb_ext_reset(skb);
+       nf_reset_ct(skb);
        nf_reset_trace(skb);
 
 #ifdef CONFIG_NET_SWITCHDEV
@@ -5477,12 +5477,14 @@ static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
  * @skb: buffer
  * @mpls_lse: MPLS label stack entry to push
  * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
+ * @mac_len: length of the MAC header
  *
  * Expects skb->data at mac header.
  *
  * Returns 0 on success, -errno otherwise.
  */
-int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto)
+int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
+                 int mac_len)
 {
        struct mpls_shim_hdr *lse;
        int err;
@@ -5499,15 +5501,15 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto)
                return err;
 
        if (!skb->inner_protocol) {
-               skb_set_inner_network_header(skb, skb->mac_len);
+               skb_set_inner_network_header(skb, mac_len);
                skb_set_inner_protocol(skb, skb->protocol);
        }
 
        skb_push(skb, MPLS_HLEN);
        memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
-               skb->mac_len);
+               mac_len);
        skb_reset_mac_header(skb);
-       skb_set_network_header(skb, skb->mac_len);
+       skb_set_network_header(skb, mac_len);
 
        lse = mpls_hdr(skb);
        lse->label_stack_entry = mpls_lse;
@@ -5526,29 +5528,30 @@ EXPORT_SYMBOL_GPL(skb_mpls_push);
  *
  * @skb: buffer
  * @next_proto: ethertype of header after popped MPLS header
+ * @mac_len: length of the MAC header
  *
  * Expects skb->data at mac header.
  *
  * Returns 0 on success, -errno otherwise.
  */
-int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto)
+int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len)
 {
        int err;
 
        if (unlikely(!eth_p_mpls(skb->protocol)))
-               return -EINVAL;
+               return 0;
 
-       err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
+       err = skb_ensure_writable(skb, mac_len + MPLS_HLEN);
        if (unlikely(err))
                return err;
 
        skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
        memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
-               skb->mac_len);
+               mac_len);
 
        __skb_pull(skb, MPLS_HLEN);
        skb_reset_mac_header(skb);
-       skb_set_network_header(skb, skb->mac_len);
+       skb_set_network_header(skb, mac_len);
 
        if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
                struct ethhdr *hdr;
index 07863ed..ac78a57 100644 (file)
@@ -522,7 +522,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
                rc = sk_backlog_rcv(sk, skb);
 
                mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-       } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
+       } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
                bh_unlock_sock(sk);
                atomic_inc(&sk->sk_drops);
                goto discard_and_relse;
@@ -785,7 +785,8 @@ set_sndbuf:
                 */
                val = min_t(int, val, INT_MAX / 2);
                sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
-               sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
+               WRITE_ONCE(sk->sk_sndbuf,
+                          max_t(int, val * 2, SOCK_MIN_SNDBUF));
                /* Wake up sending tasks if we upped the value. */
                sk->sk_write_space(sk);
                break;
@@ -831,7 +832,8 @@ set_rcvbuf:
                 * returning the value we actually used in getsockopt
                 * is the most desirable behavior.
                 */
-               sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+               WRITE_ONCE(sk->sk_rcvbuf,
+                          max_t(int, val * 2, SOCK_MIN_RCVBUF));
                break;
 
        case SO_RCVBUFFORCE:
@@ -974,7 +976,7 @@ set_rcvbuf:
                if (sock->ops->set_rcvlowat)
                        ret = sock->ops->set_rcvlowat(sk, val);
                else
-                       sk->sk_rcvlowat = val ? : 1;
+                       WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
                break;
 
        case SO_RCVTIMEO_OLD:
@@ -1125,7 +1127,7 @@ set_rcvbuf:
                break;
                }
        case SO_INCOMING_CPU:
-               sk->sk_incoming_cpu = val;
+               WRITE_ONCE(sk->sk_incoming_cpu, val);
                break;
 
        case SO_CNX_ADVICE:
@@ -1474,7 +1476,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                break;
 
        case SO_INCOMING_CPU:
-               v.val = sk->sk_incoming_cpu;
+               v.val = READ_ONCE(sk->sk_incoming_cpu);
                break;
 
        case SO_MEMINFO:
@@ -1700,8 +1702,6 @@ static void __sk_destruct(struct rcu_head *head)
                sk_filter_uncharge(sk, filter);
                RCU_INIT_POINTER(sk->sk_filter, NULL);
        }
-       if (rcu_access_pointer(sk->sk_reuseport_cb))
-               reuseport_detach_sock(sk);
 
        sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
 
@@ -1728,7 +1728,14 @@ static void __sk_destruct(struct rcu_head *head)
 
 void sk_destruct(struct sock *sk)
 {
-       if (sock_flag(sk, SOCK_RCU_FREE))
+       bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
+
+       if (rcu_access_pointer(sk->sk_reuseport_cb)) {
+               reuseport_detach_sock(sk);
+               use_call_rcu = true;
+       }
+
+       if (use_call_rcu)
                call_rcu(&sk->sk_rcu, __sk_destruct);
        else
                __sk_destruct(&sk->sk_rcu);
@@ -2083,8 +2090,10 @@ EXPORT_SYMBOL(sock_i_ino);
 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
                             gfp_t priority)
 {
-       if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+       if (force ||
+           refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
                struct sk_buff *skb = alloc_skb(size, priority);
+
                if (skb) {
                        skb_set_owner_w(skb, sk);
                        return skb;
@@ -2185,7 +2194,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
                        break;
                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-               if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
+               if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
                        break;
                if (sk->sk_shutdown & SEND_SHUTDOWN)
                        break;
@@ -2220,7 +2229,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
                if (sk->sk_shutdown & SEND_SHUTDOWN)
                        goto failure;
 
-               if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
+               if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
                        break;
 
                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
@@ -2329,8 +2338,8 @@ static void sk_leave_memory_pressure(struct sock *sk)
        } else {
                unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
 
-               if (memory_pressure && *memory_pressure)
-                       *memory_pressure = 0;
+               if (memory_pressure && READ_ONCE(*memory_pressure))
+                       WRITE_ONCE(*memory_pressure, 0);
        }
 }
 
@@ -2801,7 +2810,7 @@ static void sock_def_write_space(struct sock *sk)
        /* Do not wake up a writer until he can make "significant"
         * progress.  --DaveM
         */
-       if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+       if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) {
                wq = rcu_dereference(sk->sk_wq);
                if (skwq_has_sleeper(wq))
                        wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
@@ -3199,13 +3208,13 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem)
        memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
 
        mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
-       mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+       mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
        mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
-       mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+       mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
        mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
-       mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+       mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
        mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
-       mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+       mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
        mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
 }
 
@@ -3492,7 +3501,7 @@ static long sock_prot_memory_allocated(struct proto *proto)
        return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
 }
 
-static char *sock_prot_memory_pressure(struct proto *proto)
+static const char *sock_prot_memory_pressure(struct proto *proto)
 {
        return proto->memory_pressure != NULL ?
        proto_memory_pressure(proto) ? "yes" : "no" : "NI";
@@ -3591,7 +3600,7 @@ bool sk_busy_loop_end(void *p, unsigned long start_time)
 {
        struct sock *sk = p;
 
-       return !skb_queue_empty(&sk->sk_receive_queue) ||
+       return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
               sk_busy_loop_timeout(sk, start_time);
 }
 EXPORT_SYMBOL(sk_busy_loop_end);
index b685bc8..0d8f782 100644 (file)
@@ -117,7 +117,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                                                    inet->inet_daddr,
                                                    inet->inet_sport,
                                                    inet->inet_dport);
-       inet->inet_id = dp->dccps_iss ^ jiffies;
+       inet->inet_id = prandom_u32();
 
        err = dccp_connect(sk);
        rt = NULL;
@@ -871,7 +871,7 @@ lookup:
 
        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
                goto discard_and_relse;
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted);
 
index 1b7381f..25aab67 100644 (file)
@@ -230,7 +230,8 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
                opt = ireq->ipv6_opt;
                if (!opt)
                        opt = rcu_dereference(np->opt);
-               err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass);
+               err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass,
+                              sk->sk_priority);
                rcu_read_unlock();
                err = net_xmit_eval(err);
        }
@@ -284,7 +285,7 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
        dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
        if (!IS_ERR(dst)) {
                skb_dst_set(skb, dst);
-               ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0);
+               ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0, 0);
                DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
                DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
                return;
index 0ea7528..3349ea8 100644 (file)
@@ -1205,7 +1205,7 @@ static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table  *wai
        struct dn_scp *scp = DN_SK(sk);
        __poll_t mask = datagram_poll(file, sock, wait);
 
-       if (!skb_queue_empty(&scp->other_receive_queue))
+       if (!skb_queue_empty_lockless(&scp->other_receive_queue))
                mask |= EPOLLRDBAND;
 
        return mask;
index 7300202..716d265 100644 (file)
@@ -46,7 +46,7 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index)
        dst->index = index;
 
        INIT_LIST_HEAD(&dst->list);
-       list_add_tail(&dsa_tree_list, &dst->list);
+       list_add_tail(&dst->list, &dsa_tree_list);
 
        kref_init(&dst->refcount);
 
index a8e52c9..3255dfc 100644 (file)
@@ -310,8 +310,6 @@ static void dsa_master_reset_mtu(struct net_device *dev)
        rtnl_unlock();
 }
 
-static struct lock_class_key dsa_master_addr_list_lock_key;
-
 int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
 {
        int ret;
@@ -325,9 +323,6 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
        wmb();
 
        dev->dsa_ptr = cpu_dp;
-       lockdep_set_class(&dev->addr_list_lock,
-                         &dsa_master_addr_list_lock_key);
-
        ret = dsa_master_ethtool_setup(dev);
        if (ret)
                return ret;
index 75d5822..028e65f 100644 (file)
@@ -1341,15 +1341,6 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
        return ret;
 }
 
-static struct lock_class_key dsa_slave_netdev_xmit_lock_key;
-static void dsa_slave_set_lockdep_class_one(struct net_device *dev,
-                                           struct netdev_queue *txq,
-                                           void *_unused)
-{
-       lockdep_set_class(&txq->_xmit_lock,
-                         &dsa_slave_netdev_xmit_lock_key);
-}
-
 int dsa_slave_suspend(struct net_device *slave_dev)
 {
        struct dsa_port *dp = dsa_slave_to_port(slave_dev);
@@ -1433,9 +1424,6 @@ int dsa_slave_create(struct dsa_port *port)
        slave_dev->max_mtu = ETH_MAX_MTU;
        SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
 
-       netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
-                                NULL);
-
        SET_NETDEV_DEV(slave_dev, port->ds->dev);
        slave_dev->dev.of_node = port->dn;
        slave_dev->vlan_features = master->vlan_features;
index 9c9aff3..63ef2a1 100644 (file)
@@ -156,7 +156,11 @@ static struct sk_buff
        /* Step 1: A timestampable frame was received.
         * Buffer it until we get its meta frame.
         */
-       if (is_link_local && sp->data->hwts_rx_en) {
+       if (is_link_local) {
+               if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state))
+                       /* Do normal processing. */
+                       return skb;
+
                spin_lock(&sp->data->meta_lock);
                /* Was this a link-local frame instead of the meta
                 * that we were expecting?
@@ -187,6 +191,12 @@ static struct sk_buff
        } else if (is_meta) {
                struct sk_buff *stampable_skb;
 
+               /* Drop the meta frame if we're not in the right state
+                * to process it.
+                */
+               if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state))
+                       return NULL;
+
                spin_lock(&sp->data->meta_lock);
 
                stampable_skb = sp->data->stampable_skb;
index 3297e7f..c0b107c 100644 (file)
@@ -58,13 +58,6 @@ static const struct header_ops lowpan_header_ops = {
        .create = lowpan_header_create,
 };
 
-static int lowpan_dev_init(struct net_device *ldev)
-{
-       netdev_lockdep_set_classes(ldev);
-
-       return 0;
-}
-
 static int lowpan_open(struct net_device *dev)
 {
        if (!open_count)
@@ -96,7 +89,6 @@ static int lowpan_get_iflink(const struct net_device *dev)
 }
 
 static const struct net_device_ops lowpan_netdev_ops = {
-       .ndo_init               = lowpan_dev_init,
        .ndo_start_xmit         = lowpan_xmit,
        .ndo_open               = lowpan_open,
        .ndo_stop               = lowpan_stop,
index badc5cf..d93d453 100644 (file)
@@ -1008,6 +1008,9 @@ static int ieee802154_create(struct net *net, struct socket *sock,
 
        switch (sock->type) {
        case SOCK_RAW:
+               rc = -EPERM;
+               if (!capable(CAP_NET_RAW))
+                       goto out;
                proto = &ieee802154_raw_prot;
                ops = &ieee802154_raw_ops;
                break;
index 6cd1f6d..bcf6505 100644 (file)
@@ -5,7 +5,7 @@
 
 menuconfig NET_IFE
        depends on NET
-        tristate "Inter-FE based on IETF ForCES InterFE LFB"
+       tristate "Inter-FE based on IETF ForCES InterFE LFB"
        default n
        help
          Say Y here to add support of IFE encapsulation protocol
index 974de4d..03381f3 100644 (file)
@@ -492,8 +492,8 @@ config TCP_CONG_WESTWOOD
        wired networks and throughput over wireless links.
 
 config TCP_CONG_HTCP
-        tristate "H-TCP"
-        default m
+       tristate "H-TCP"
+       default m
        ---help---
        H-TCP is a send-side only modifications of the TCP Reno
        protocol stack that optimizes the performance of TCP
index 9a0fe0c..4a8550c 100644 (file)
@@ -73,7 +73,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
        reuseport_has_conns(sk, true);
        sk->sk_state = TCP_ESTABLISHED;
        sk_set_txhash(sk);
-       inet->inet_id = jiffies;
+       inet->inet_id = prandom_u32();
 
        sk_dst_set(sk, &rt->dst);
        err = 0;
index dde77f7..71c78d2 100644 (file)
@@ -1148,7 +1148,7 @@ void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric)
        if (!(dev->flags & IFF_UP) ||
            ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) ||
            ipv4_is_zeronet(prefix) ||
-           prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32)
+           (prefix == ifa->ifa_local && ifa->ifa_prefixlen == 32))
                return;
 
        /* add the new */
index f5c163d..eb30fc1 100644 (file)
@@ -560,7 +560,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
        rt = ip_route_output_flow(net, fl4, sk);
        if (IS_ERR(rt))
                goto no_route;
-       if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
+       if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
                goto route_err;
        rcu_read_unlock();
        return &rt->dst;
@@ -598,7 +598,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
        rt = ip_route_output_flow(net, fl4, sk);
        if (IS_ERR(rt))
                goto no_route;
-       if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
+       if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
                goto route_err;
        return &rt->dst;
 
@@ -906,7 +906,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
        percpu_counter_inc(sk->sk_prot->orphan_count);
 
        if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
-               BUG_ON(tcp_sk(child)->fastopen_rsk != req);
+               BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req);
                BUG_ON(sk != req->rsk_listener);
 
                /* Paranoid, to prevent race condition if
@@ -915,7 +915,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
                 * Also to satisfy an assertion in
                 * tcp_v4_destroy_sock().
                 */
-               tcp_sk(child)->fastopen_rsk = NULL;
+               RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL);
        }
        inet_csk_destroy_sock(child);
 }
@@ -934,7 +934,7 @@ struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
                req->sk = child;
                req->dl_next = NULL;
                if (queue->rskq_accept_head == NULL)
-                       queue->rskq_accept_head = req;
+                       WRITE_ONCE(queue->rskq_accept_head, req);
                else
                        queue->rskq_accept_tail->dl_next = req;
                queue->rskq_accept_tail = req;
index bbb005e..7dc79b9 100644 (file)
@@ -193,7 +193,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
        if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
                struct inet_diag_meminfo minfo = {
                        .idiag_rmem = sk_rmem_alloc_get(sk),
-                       .idiag_wmem = sk->sk_wmem_queued,
+                       .idiag_wmem = READ_ONCE(sk->sk_wmem_queued),
                        .idiag_fmem = sk->sk_forward_alloc,
                        .idiag_tmem = sk_wmem_alloc_get(sk),
                };
index 9782486..83fb001 100644 (file)
@@ -240,7 +240,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
                        return -1;
 
                score = sk->sk_family == PF_INET ? 2 : 1;
-               if (sk->sk_incoming_cpu == raw_smp_processor_id())
+               if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
                        score++;
        }
        return score;
index 06f6f28..00ec819 100644 (file)
@@ -123,7 +123,7 @@ int ip_forward(struct sk_buff *skb)
 
        rt = skb_rtable(skb);
 
-       if (opt->is_strictroute && rt->rt_gw_family)
+       if (opt->is_strictroute && rt->rt_uses_gateway)
                goto sr_failed;
 
        IPCB(skb)->flags |= IPSKB_FORWARDED;
index a53a543..10636fb 100644 (file)
@@ -509,9 +509,9 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
        key = &tun_info->key;
        if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
                goto err_free_skb;
-       md = ip_tunnel_info_opts(tun_info);
-       if (!md)
+       if (tun_info->options_len < sizeof(*md))
                goto err_free_skb;
+       md = ip_tunnel_info_opts(tun_info);
 
        /* ERSPAN has fixed 8 byte GRE header */
        version = md->version;
@@ -1446,6 +1446,7 @@ static void erspan_setup(struct net_device *dev)
        struct ip_tunnel *t = netdev_priv(dev);
 
        ether_setup(dev);
+       dev->max_mtu = 0;
        dev->netdev_ops = &erspan_netdev_ops;
        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
index 1e2392b..c59a78a 100644 (file)
@@ -199,7 +199,7 @@ resubmit:
                                kfree_skb(skb);
                                return;
                        }
-                       nf_reset(skb);
+                       nf_reset_ct(skb);
                }
                ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv,
                                      skb);
index 5eb7377..3d8baaa 100644 (file)
@@ -499,7 +499,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
        skb_dst_set_noref(skb, &rt->dst);
 
 packet_routed:
-       if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gw_family)
+       if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
                goto no_route;
 
        /* OK, we know where to send it, allocate and build IP header. */
@@ -645,11 +645,12 @@ void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter)
 EXPORT_SYMBOL(ip_fraglist_prepare);
 
 void ip_frag_init(struct sk_buff *skb, unsigned int hlen,
-                 unsigned int ll_rs, unsigned int mtu,
+                 unsigned int ll_rs, unsigned int mtu, bool DF,
                  struct ip_frag_state *state)
 {
        struct iphdr *iph = ip_hdr(skb);
 
+       state->DF = DF;
        state->hlen = hlen;
        state->ll_rs = ll_rs;
        state->mtu = mtu;
@@ -668,9 +669,6 @@ static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to,
        /* Copy the flags to each fragment. */
        IPCB(to)->flags = IPCB(from)->flags;
 
-       if (IPCB(from)->flags & IPSKB_FRAG_PMTU)
-               state->iph->frag_off |= htons(IP_DF);
-
        /* ANK: dirty, but effective trick. Upgrade options only if
         * the segment to be fragmented was THE FIRST (otherwise,
         * options are already fixed) and make it ONCE
@@ -738,6 +736,8 @@ struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state)
         */
        iph = ip_hdr(skb2);
        iph->frag_off = htons((state->offset >> 3));
+       if (state->DF)
+               iph->frag_off |= htons(IP_DF);
 
        /*
         *      Added AC : If we are fragmenting a fragment that's not the
@@ -771,6 +771,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
        struct rtable *rt = skb_rtable(skb);
        unsigned int mtu, hlen, ll_rs;
        struct ip_fraglist_iter iter;
+       ktime_t tstamp = skb->tstamp;
        struct ip_frag_state state;
        int err = 0;
 
@@ -846,6 +847,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
                                ip_fraglist_prepare(skb, &iter);
                        }
 
+                       skb->tstamp = tstamp;
                        err = output(net, sk, skb);
 
                        if (!err)
@@ -881,7 +883,8 @@ slow_path:
         *      Fragment the datagram.
         */
 
-       ip_frag_init(skb, hlen, ll_rs, mtu, &state);
+       ip_frag_init(skb, hlen, ll_rs, mtu, IPCB(skb)->flags & IPSKB_FRAG_PMTU,
+                    &state);
 
        /*
         *      Keep copying data until we run out.
@@ -900,6 +903,7 @@ slow_path:
                /*
                 *      Put this fragment into the sending queue.
                 */
+               skb2->tstamp = tstamp;
                err = output(net, sk, skb2);
                if (err)
                        goto fail;
@@ -1694,7 +1698,6 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 
        inet_sk(sk)->tos = arg->tos;
 
-       sk->sk_priority = skb->priority;
        sk->sk_protocol = ip_hdr(skb)->protocol;
        sk->sk_bound_dev_if = arg->bound_dev_if;
        sk->sk_sndbuf = sysctl_wmem_default;
index 313470f..716d547 100644 (file)
@@ -1794,7 +1794,7 @@ static void ip_encap(struct net *net, struct sk_buff *skb,
        ip_send_check(iph);
 
        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-       nf_reset(skb);
+       nf_reset_ct(skb);
 }
 
 static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
@@ -2140,7 +2140,7 @@ int ip_mr_input(struct sk_buff *skb)
 
                        mroute_sk = rcu_dereference(mrt->mroute_sk);
                        if (mroute_sk) {
-                               nf_reset(skb);
+                               nf_reset_ct(skb);
                                raw_rcv(mroute_sk, skb);
                                return 0;
                        }
index af3fbf7..6cc5743 100644 (file)
@@ -65,7 +65,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
 
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
        /* Avoid counting cloned packets towards the original connection. */
-       nf_reset(skb);
+       nf_reset_ct(skb);
        nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
 #endif
        /*
index 80da5a6..3183413 100644 (file)
@@ -332,7 +332,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
                kfree_skb(skb);
                return NET_RX_DROP;
        }
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        skb_push(skb, skb->data - skb_network_header(skb));
 
index b6a6f18..621f834 100644 (file)
@@ -635,6 +635,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
 
        if (fnhe->fnhe_gw) {
                rt->rt_flags |= RTCF_REDIRECTED;
+               rt->rt_uses_gateway = 1;
                rt->rt_gw_family = AF_INET;
                rt->rt_gw4 = fnhe->fnhe_gw;
        }
@@ -915,16 +916,15 @@ void ip_rt_send_redirect(struct sk_buff *skb)
        if (peer->rate_tokens == 0 ||
            time_after(jiffies,
                       (peer->rate_last +
-                       (ip_rt_redirect_load << peer->rate_tokens)))) {
+                       (ip_rt_redirect_load << peer->n_redirects)))) {
                __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
 
                icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
                peer->rate_last = jiffies;
-               ++peer->rate_tokens;
                ++peer->n_redirects;
 #ifdef CONFIG_IP_ROUTE_VERBOSE
                if (log_martians &&
-                   peer->rate_tokens == ip_rt_redirect_number)
+                   peer->n_redirects == ip_rt_redirect_number)
                        net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
                                             &ip_hdr(skb)->saddr, inet_iif(skb),
                                             &ip_hdr(skb)->daddr, &gw);
@@ -1313,7 +1313,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
        mtu = READ_ONCE(dst->dev->mtu);
 
        if (unlikely(ip_mtu_locked(dst))) {
-               if (rt->rt_gw_family && mtu > 576)
+               if (rt->rt_uses_gateway && mtu > 576)
                        mtu = 576;
        }
 
@@ -1482,7 +1482,7 @@ static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
        prev = cmpxchg(p, orig, rt);
        if (prev == orig) {
                if (orig) {
-                       dst_dev_put(&orig->dst);
+                       rt_add_uncached_list(orig);
                        dst_release(&orig->dst);
                }
        } else {
@@ -1569,6 +1569,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
                struct fib_nh_common *nhc = FIB_RES_NHC(*res);
 
                if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
+                       rt->rt_uses_gateway = 1;
                        rt->rt_gw_family = nhc->nhc_gw_family;
                        /* only INET and INET6 are supported */
                        if (likely(nhc->nhc_gw_family == AF_INET))
@@ -1634,6 +1635,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
                rt->rt_iif = 0;
                rt->rt_pmtu = 0;
                rt->rt_mtu_locked = 0;
+               rt->rt_uses_gateway = 0;
                rt->rt_gw_family = 0;
                rt->rt_gw4 = 0;
                INIT_LIST_HEAD(&rt->rt_uncached);
@@ -2468,14 +2470,17 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
        int orig_oif = fl4->flowi4_oif;
        unsigned int flags = 0;
        struct rtable *rth;
-       int err = -ENETUNREACH;
+       int err;
 
        if (fl4->saddr) {
-               rth = ERR_PTR(-EINVAL);
                if (ipv4_is_multicast(fl4->saddr) ||
                    ipv4_is_lbcast(fl4->saddr) ||
-                   ipv4_is_zeronet(fl4->saddr))
+                   ipv4_is_zeronet(fl4->saddr)) {
+                       rth = ERR_PTR(-EINVAL);
                        goto out;
+               }
+
+               rth = ERR_PTR(-ENETUNREACH);
 
                /* I removed check for oif == dev_out->oif here.
                   It was wrong for two reasons:
@@ -2694,6 +2699,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
                rt->rt_genid = rt_genid_ipv4(net);
                rt->rt_flags = ort->rt_flags;
                rt->rt_type = ort->rt_type;
+               rt->rt_uses_gateway = ort->rt_uses_gateway;
                rt->rt_gw_family = ort->rt_gw_family;
                if (rt->rt_gw_family == AF_INET)
                        rt->rt_gw4 = ort->rt_gw4;
@@ -2778,21 +2784,23 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
                if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
                        goto nla_put_failure;
        }
-       if (rt->rt_gw_family == AF_INET &&
-           nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
-               goto nla_put_failure;
-       } else if (rt->rt_gw_family == AF_INET6) {
-               int alen = sizeof(struct in6_addr);
-               struct nlattr *nla;
-               struct rtvia *via;
-
-               nla = nla_reserve(skb, RTA_VIA, alen + 2);
-               if (!nla)
+       if (rt->rt_uses_gateway) {
+               if (rt->rt_gw_family == AF_INET &&
+                   nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
                        goto nla_put_failure;
-
-               via = nla_data(nla);
-               via->rtvia_family = AF_INET6;
-               memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
+               } else if (rt->rt_gw_family == AF_INET6) {
+                       int alen = sizeof(struct in6_addr);
+                       struct nlattr *nla;
+                       struct rtvia *via;
+
+                       nla = nla_reserve(skb, RTA_VIA, alen + 2);
+                       if (!nla)
+                               goto nla_put_failure;
+
+                       via = nla_data(nla);
+                       via->rtvia_family = AF_INET6;
+                       memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
+               }
        }
 
        expires = rt->dst.expires;
index 79c325a..d8876f0 100644 (file)
@@ -326,7 +326,7 @@ void tcp_enter_memory_pressure(struct sock *sk)
 {
        unsigned long val;
 
-       if (tcp_memory_pressure)
+       if (READ_ONCE(tcp_memory_pressure))
                return;
        val = jiffies;
 
@@ -341,7 +341,7 @@ void tcp_leave_memory_pressure(struct sock *sk)
 {
        unsigned long val;
 
-       if (!tcp_memory_pressure)
+       if (!READ_ONCE(tcp_memory_pressure))
                return;
        val = xchg(&tcp_memory_pressure, 0);
        if (val)
@@ -450,8 +450,8 @@ void tcp_init_sock(struct sock *sk)
 
        icsk->icsk_sync_mss = tcp_sync_mss;
 
-       sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
-       sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
+       WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+       WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
 
        sk_sockets_allocated_inc(sk);
        sk->sk_route_forced_caps = NETIF_F_GSO;
@@ -477,7 +477,7 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
 static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
                                          int target, struct sock *sk)
 {
-       return (tp->rcv_nxt - tp->copied_seq >= target) ||
+       return (READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq) >= target) ||
                (sk->sk_prot->stream_memory_read ?
                sk->sk_prot->stream_memory_read(sk) : false);
 }
@@ -543,10 +543,10 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 
        /* Connected or passive Fast Open socket? */
        if (state != TCP_SYN_SENT &&
-           (state != TCP_SYN_RECV || tp->fastopen_rsk)) {
+           (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) {
                int target = sock_rcvlowat(sk, 0, INT_MAX);
 
-               if (tp->urg_seq == tp->copied_seq &&
+               if (READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) &&
                    !sock_flag(sk, SOCK_URGINLINE) &&
                    tp->urg_data)
                        target++;
@@ -584,7 +584,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
        }
        /* This barrier is coupled with smp_wmb() in tcp_reset() */
        smp_rmb();
-       if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+       if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
                mask |= EPOLLERR;
 
        return mask;
@@ -607,7 +607,8 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
                unlock_sock_fast(sk, slow);
                break;
        case SIOCATMARK:
-               answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
+               answ = tp->urg_data &&
+                      READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq);
                break;
        case SIOCOUTQ:
                if (sk->sk_state == TCP_LISTEN)
@@ -616,7 +617,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
                if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
                        answ = 0;
                else
-                       answ = tp->write_seq - tp->snd_una;
+                       answ = READ_ONCE(tp->write_seq) - tp->snd_una;
                break;
        case SIOCOUTQNSD:
                if (sk->sk_state == TCP_LISTEN)
@@ -625,7 +626,8 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
                if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
                        answ = 0;
                else
-                       answ = tp->write_seq - tp->snd_nxt;
+                       answ = READ_ONCE(tp->write_seq) -
+                              READ_ONCE(tp->snd_nxt);
                break;
        default:
                return -ENOIOCTLCMD;
@@ -657,7 +659,7 @@ static void skb_entail(struct sock *sk, struct sk_buff *skb)
        tcb->sacked  = 0;
        __skb_header_release(skb);
        tcp_add_write_queue_tail(sk, skb);
-       sk->sk_wmem_queued += skb->truesize;
+       sk_wmem_queued_add(sk, skb->truesize);
        sk_mem_charge(sk, skb->truesize);
        if (tp->nonagle & TCP_NAGLE_PUSH)
                tp->nonagle &= ~TCP_NAGLE_PUSH;
@@ -1032,10 +1034,10 @@ new_segment:
                skb->len += copy;
                skb->data_len += copy;
                skb->truesize += copy;
-               sk->sk_wmem_queued += copy;
+               sk_wmem_queued_add(sk, copy);
                sk_mem_charge(sk, copy);
                skb->ip_summed = CHECKSUM_PARTIAL;
-               tp->write_seq += copy;
+               WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
                TCP_SKB_CB(skb)->end_seq += copy;
                tcp_skb_pcount_set(skb, 0);
 
@@ -1362,7 +1364,7 @@ new_segment:
                if (!copied)
                        TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
 
-               tp->write_seq += copy;
+               WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
                TCP_SKB_CB(skb)->end_seq += copy;
                tcp_skb_pcount_set(skb, 0);
 
@@ -1668,9 +1670,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
                sk_eat_skb(sk, skb);
                if (!desc->count)
                        break;
-               tp->copied_seq = seq;
+               WRITE_ONCE(tp->copied_seq, seq);
        }
-       tp->copied_seq = seq;
+       WRITE_ONCE(tp->copied_seq, seq);
 
        tcp_rcv_space_adjust(sk);
 
@@ -1699,7 +1701,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
        else
                cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
        val = min(val, cap);
-       sk->sk_rcvlowat = val ? : 1;
+       WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
 
        /* Check if we need to signal EPOLLIN right now */
        tcp_data_ready(sk);
@@ -1709,7 +1711,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
 
        val <<= 1;
        if (val > sk->sk_rcvbuf) {
-               sk->sk_rcvbuf = val;
+               WRITE_ONCE(sk->sk_rcvbuf, val);
                tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
        }
        return 0;
@@ -1798,13 +1800,11 @@ static int tcp_zerocopy_receive(struct sock *sk,
                }
                if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) {
                        int remaining = zc->recv_skip_hint;
-                       int size = skb_frag_size(frags);
 
-                       while (remaining && (size != PAGE_SIZE ||
+                       while (remaining && (skb_frag_size(frags) != PAGE_SIZE ||
                                             skb_frag_off(frags))) {
-                               remaining -= size;
+                               remaining -= skb_frag_size(frags);
                                frags++;
-                               size = skb_frag_size(frags);
                        }
                        zc->recv_skip_hint -= remaining;
                        break;
@@ -1821,7 +1821,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
 out:
        up_read(&current->mm->mmap_sem);
        if (length) {
-               tp->copied_seq = seq;
+               WRITE_ONCE(tp->copied_seq, seq);
                tcp_rcv_space_adjust(sk);
 
                /* Clean up data we have read: This will do ACK frames. */
@@ -1964,7 +1964,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
        if (unlikely(flags & MSG_ERRQUEUE))
                return inet_recv_error(sk, msg, len, addr_len);
 
-       if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
+       if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) &&
            (sk->sk_state == TCP_ESTABLISHED))
                sk_busy_loop(sk, nonblock);
 
@@ -2119,7 +2119,7 @@ found_ok_skb:
                        if (urg_offset < used) {
                                if (!urg_offset) {
                                        if (!sock_flag(sk, SOCK_URGINLINE)) {
-                                               ++*seq;
+                                               WRITE_ONCE(*seq, *seq + 1);
                                                urg_hole++;
                                                offset++;
                                                used--;
@@ -2141,7 +2141,7 @@ found_ok_skb:
                        }
                }
 
-               *seq += used;
+               WRITE_ONCE(*seq, *seq + used);
                copied += used;
                len -= used;
 
@@ -2168,7 +2168,7 @@ skip_copy:
 
 found_fin_ok:
                /* Process the FIN. */
-               ++*seq;
+               WRITE_ONCE(*seq, *seq + 1);
                if (!(flags & MSG_PEEK))
                        sk_eat_skb(sk, skb);
                break;
@@ -2489,7 +2489,10 @@ adjudge_to_death:
        }
 
        if (sk->sk_state == TCP_CLOSE) {
-               struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+               struct request_sock *req;
+
+               req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
+                                               lockdep_sock_is_held(sk));
                /* We could get here with a non-NULL req if the socket is
                 * aborted (e.g., closed with unread data) before 3WHS
                 * finishes.
@@ -2561,6 +2564,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        int old_state = sk->sk_state;
+       u32 seq;
 
        if (old_state != TCP_CLOSE)
                tcp_set_state(sk, TCP_CLOSE);
@@ -2587,7 +2591,7 @@ int tcp_disconnect(struct sock *sk, int flags)
                __kfree_skb(sk->sk_rx_skb_cache);
                sk->sk_rx_skb_cache = NULL;
        }
-       tp->copied_seq = tp->rcv_nxt;
+       WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
        tp->urg_data = 0;
        tcp_write_queue_purge(sk);
        tcp_fastopen_active_disable_ofo_check(sk);
@@ -2603,9 +2607,12 @@ int tcp_disconnect(struct sock *sk, int flags)
        tp->srtt_us = 0;
        tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
        tp->rcv_rtt_last_tsecr = 0;
-       tp->write_seq += tp->max_window + 2;
-       if (tp->write_seq == 0)
-               tp->write_seq = 1;
+
+       seq = tp->write_seq + tp->max_window + 2;
+       if (!seq)
+               seq = 1;
+       WRITE_ONCE(tp->write_seq, seq);
+
        icsk->icsk_backoff = 0;
        tp->snd_cwnd = 2;
        icsk->icsk_probes_out = 0;
@@ -2932,9 +2939,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                if (sk->sk_state != TCP_CLOSE)
                        err = -EPERM;
                else if (tp->repair_queue == TCP_SEND_QUEUE)
-                       tp->write_seq = val;
+                       WRITE_ONCE(tp->write_seq, val);
                else if (tp->repair_queue == TCP_RECV_QUEUE)
-                       tp->rcv_nxt = val;
+                       WRITE_ONCE(tp->rcv_nxt, val);
                else
                        err = -EINVAL;
                break;
@@ -3833,7 +3840,13 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
 
 void tcp_done(struct sock *sk)
 {
-       struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+       struct request_sock *req;
+
+       /* We might be called with a new socket, after
+        * inet_csk_prepare_forced_close() has been called
+        * so we can not use lockdep_sock_is_held(sk)
+        */
+       req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 1);
 
        if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
                TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
index 95b5954..32772d6 100644 (file)
@@ -388,7 +388,7 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain)
  * which allows 2 outstanding 2-packet sequences, to try to keep pipe
  * full even with ACK-every-other-packet delayed ACKs.
  */
-static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd, int gain)
+static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd)
 {
        struct bbr *bbr = inet_csk_ca(sk);
 
@@ -399,7 +399,7 @@ static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd, int gain)
        cwnd = (cwnd + 1) & ~1U;
 
        /* Ensure gain cycling gets inflight above BDP even for small BDPs. */
-       if (bbr->mode == BBR_PROBE_BW && gain > BBR_UNIT)
+       if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == 0)
                cwnd += 2;
 
        return cwnd;
@@ -411,7 +411,7 @@ static u32 bbr_inflight(struct sock *sk, u32 bw, int gain)
        u32 inflight;
 
        inflight = bbr_bdp(sk, bw, gain);
-       inflight = bbr_quantization_budget(sk, inflight, gain);
+       inflight = bbr_quantization_budget(sk, inflight);
 
        return inflight;
 }
@@ -531,7 +531,7 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
         * due to aggregation (of data and/or ACKs) visible in the ACK stream.
         */
        target_cwnd += bbr_ack_aggregation_cwnd(sk);
-       target_cwnd = bbr_quantization_budget(sk, target_cwnd, gain);
+       target_cwnd = bbr_quantization_budget(sk, target_cwnd);
 
        /* If we're below target cwnd, slow start cwnd toward target cwnd. */
        if (bbr_full_bw_reached(sk))  /* only cut cwnd if we filled the pipe */
index 81a8221..5495061 100644 (file)
@@ -26,8 +26,9 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
        } else if (sk->sk_type == SOCK_STREAM) {
                const struct tcp_sock *tp = tcp_sk(sk);
 
-               r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
-               r->idiag_wqueue = tp->write_seq - tp->snd_una;
+               r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) -
+                                            READ_ONCE(tp->copied_seq), 0);
+               r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una;
        }
        if (info)
                tcp_get_info(sk, info);
index 3fd4512..a915ade 100644 (file)
@@ -253,7 +253,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
         */
        tp = tcp_sk(child);
 
-       tp->fastopen_rsk = req;
+       rcu_assign_pointer(tp->fastopen_rsk, req);
        tcp_rsk(req)->tfo_listener = true;
 
        /* RFC1323: The window in SYN & SYN/ACK segments is never
index 3578357..a2e52ad 100644 (file)
@@ -359,7 +359,8 @@ static void tcp_sndbuf_expand(struct sock *sk)
        sndmem *= nr_segs * per_mss;
 
        if (sk->sk_sndbuf < sndmem)
-               sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
+               WRITE_ONCE(sk->sk_sndbuf,
+                          min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
 }
 
 /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -483,8 +484,9 @@ static void tcp_clamp_window(struct sock *sk)
            !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
            !tcp_under_memory_pressure(sk) &&
            sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
-               sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
-                                   net->ipv4.sysctl_tcp_rmem[2]);
+               WRITE_ONCE(sk->sk_rcvbuf,
+                          min(atomic_read(&sk->sk_rmem_alloc),
+                              net->ipv4.sysctl_tcp_rmem[2]));
        }
        if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
                tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -648,7 +650,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
                rcvbuf = min_t(u64, rcvwin * rcvmem,
                               sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
                if (rcvbuf > sk->sk_rcvbuf) {
-                       sk->sk_rcvbuf = rcvbuf;
+                       WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
 
                        /* Make the window clamp follow along.  */
                        tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
@@ -2666,7 +2668,7 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
        struct tcp_sock *tp = tcp_sk(sk);
        bool recovered = !before(tp->snd_una, tp->high_seq);
 
-       if ((flag & FLAG_SND_UNA_ADVANCED || tp->fastopen_rsk) &&
+       if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) &&
            tcp_try_undo_loss(sk, false))
                return;
 
@@ -2990,7 +2992,7 @@ void tcp_rearm_rto(struct sock *sk)
        /* If the retrans timer is currently being used by Fast Open
         * for SYN-ACK retrans purpose, stay put.
         */
-       if (tp->fastopen_rsk)
+       if (rcu_access_pointer(tp->fastopen_rsk))
                return;
 
        if (!tp->packets_out) {
@@ -3362,7 +3364,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
 
        sock_owned_by_me((struct sock *)tp);
        tp->bytes_received += delta;
-       tp->rcv_nxt = seq;
+       WRITE_ONCE(tp->rcv_nxt, seq);
 }
 
 /* Update our send window.
@@ -5356,7 +5358,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
        }
 
        tp->urg_data = TCP_URG_NOTYET;
-       tp->urg_seq = ptr;
+       WRITE_ONCE(tp->urg_seq, ptr);
 
        /* Disable header prediction. */
        tp->pred_flags = 0;
@@ -5932,7 +5934,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                /* Ok.. it's good. Set up sequence numbers and
                 * move to established.
                 */
-               tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+               WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
                tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
 
                /* RFC1323: The window in SYN & SYN/ACK segments is
@@ -5961,7 +5963,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                /* Remember, tcp_poll() does not lock socket!
                 * Change state from SYN-SENT only after copied_seq
                 * is initialized. */
-               tp->copied_seq = tp->rcv_nxt;
+               WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
 
                smc_check_reset_syn(tp);
 
@@ -6035,8 +6037,8 @@ discard:
                        tp->tcp_header_len = sizeof(struct tcphdr);
                }
 
-               tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
-               tp->copied_seq = tp->rcv_nxt;
+               WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
+               WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
                tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
 
                /* RFC1323: The window in SYN & SYN/ACK segments is
@@ -6087,6 +6089,8 @@ reset_and_undo:
 
 static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
 {
+       struct request_sock *req;
+
        tcp_try_undo_loss(sk, false);
 
        /* Reset rtx states to prevent spurious retransmits_timed_out() */
@@ -6096,7 +6100,9 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
        /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
         * we no longer need req so release it.
         */
-       reqsk_fastopen_remove(sk, tcp_sk(sk)->fastopen_rsk, false);
+       req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
+                                       lockdep_sock_is_held(sk));
+       reqsk_fastopen_remove(sk, req, false);
 
        /* Re-arm the timer because data may have been sent out.
         * This is similar to the regular data transmission case
@@ -6171,7 +6177,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 
        tcp_mstamp_refresh(tp);
        tp->rx_opt.saw_tstamp = 0;
-       req = tp->fastopen_rsk;
+       req = rcu_dereference_protected(tp->fastopen_rsk,
+                                       lockdep_sock_is_held(sk));
        if (req) {
                bool req_stolen;
 
@@ -6211,7 +6218,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
                        tcp_try_undo_spurious_syn(sk);
                        tp->retrans_stamp = 0;
                        tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
-                       tp->copied_seq = tp->rcv_nxt;
+                       WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
                }
                smp_mb();
                tcp_set_state(sk, TCP_ESTABLISHED);
index fd394ad..67b2dc7 100644 (file)
@@ -164,9 +164,11 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
                 * without appearing to create any others.
                 */
                if (likely(!tp->repair)) {
-                       tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
-                       if (tp->write_seq == 0)
-                               tp->write_seq = 1;
+                       u32 seq = tcptw->tw_snd_nxt + 65535 + 2;
+
+                       if (!seq)
+                               seq = 1;
+                       WRITE_ONCE(tp->write_seq, seq);
                        tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
                        tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
                }
@@ -253,7 +255,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                tp->rx_opt.ts_recent       = 0;
                tp->rx_opt.ts_recent_stamp = 0;
                if (likely(!tp->repair))
-                       tp->write_seq      = 0;
+                       WRITE_ONCE(tp->write_seq, 0);
        }
 
        inet->inet_dport = usin->sin_port;
@@ -291,16 +293,17 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
        if (likely(!tp->repair)) {
                if (!tp->write_seq)
-                       tp->write_seq = secure_tcp_seq(inet->inet_saddr,
-                                                      inet->inet_daddr,
-                                                      inet->inet_sport,
-                                                      usin->sin_port);
+                       WRITE_ONCE(tp->write_seq,
+                                  secure_tcp_seq(inet->inet_saddr,
+                                                 inet->inet_daddr,
+                                                 inet->inet_sport,
+                                                 usin->sin_port));
                tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
                                                 inet->inet_saddr,
                                                 inet->inet_daddr);
        }
 
-       inet->inet_id = tp->write_seq ^ jiffies;
+       inet->inet_id = prandom_u32();
 
        if (tcp_fastopen_defer_connect(sk, &err))
                return err;
@@ -478,7 +481,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
        icsk = inet_csk(sk);
        tp = tcp_sk(sk);
        /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
-       fastopen = tp->fastopen_rsk;
+       fastopen = rcu_dereference(tp->fastopen_rsk);
        snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
        if (sk->sk_state != TCP_LISTEN &&
            !between(seq, snd_una, tp->snd_nxt)) {
@@ -771,6 +774,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
        if (sk) {
                ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
                                   inet_twsk(sk)->tw_mark : sk->sk_mark;
+               ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
+                                  inet_twsk(sk)->tw_priority : sk->sk_priority;
                transmit_time = tcp_transmit_time(sk);
        }
        ip_send_unicast_reply(ctl_sk,
@@ -866,6 +871,8 @@ static void tcp_v4_send_ack(const struct sock *sk,
        ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
        ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
                           inet_twsk(sk)->tw_mark : sk->sk_mark;
+       ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
+                          inet_twsk(sk)->tw_priority : sk->sk_priority;
        transmit_time = tcp_transmit_time(sk);
        ip_send_unicast_reply(ctl_sk,
                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -1443,7 +1450,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
        inet_csk(newsk)->icsk_ext_hdr_len = 0;
        if (inet_opt)
                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
-       newinet->inet_id = newtp->write_seq ^ jiffies;
+       newinet->inet_id = prandom_u32();
 
        if (!dst) {
                dst = inet_csk_route_child_sock(sk, newsk, req);
@@ -1640,7 +1647,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
 
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
-       u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
+       u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
        struct skb_shared_info *shinfo;
        const struct tcphdr *th;
        struct tcphdr *thtail;
@@ -1912,7 +1919,7 @@ process:
        if (tcp_v4_inbound_md5_hash(sk, skb))
                goto discard_and_relse;
 
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        if (tcp_filter(sk, skb))
                goto discard_and_relse;
@@ -2117,7 +2124,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
        if (inet_csk(sk)->icsk_bind_hash)
                inet_put_port(sk);
 
-       BUG_ON(tp->fastopen_rsk);
+       BUG_ON(rcu_access_pointer(tp->fastopen_rsk));
 
        /* If socket is aborted during connect operation */
        tcp_free_fastopen_req(tp);
@@ -2451,12 +2458,13 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
                /* Because we don't lock the socket,
                 * we might find a transient negative value.
                 */
-               rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
+               rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
+                                     READ_ONCE(tp->copied_seq), 0);
 
        seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
                        "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
                i, src, srcp, dest, destp, state,
-               tp->write_seq - tp->snd_una,
+               READ_ONCE(tp->write_seq) - tp->snd_una,
                rx_queue,
                timer_active,
                jiffies_delta_to_clock_t(timer_expires - jiffies),
@@ -2673,7 +2681,7 @@ static int __net_init tcp_sk_init(struct net *net)
        net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
        net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
 
-       net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
+       net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
        net->ipv4.sysctl_tcp_sack = 1;
        net->ipv4.sysctl_tcp_window_scaling = 1;
        net->ipv4.sysctl_tcp_timestamps = 1;
index 8bcaf25..c802bc8 100644 (file)
@@ -266,6 +266,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 
                tw->tw_transparent      = inet->transparent;
                tw->tw_mark             = sk->sk_mark;
+               tw->tw_priority         = sk->sk_priority;
                tw->tw_rcv_wscale       = tp->rx_opt.rcv_wscale;
                tcptw->tw_rcv_nxt       = tp->rcv_nxt;
                tcptw->tw_snd_nxt       = tp->snd_nxt;
@@ -461,6 +462,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
        struct tcp_request_sock *treq = tcp_rsk(req);
        struct inet_connection_sock *newicsk;
        struct tcp_sock *oldtp, *newtp;
+       u32 seq;
 
        if (!newsk)
                return NULL;
@@ -474,12 +476,16 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
        /* Now setup tcp_sock */
        newtp->pred_flags = 0;
 
-       newtp->rcv_wup = newtp->copied_seq =
-       newtp->rcv_nxt = treq->rcv_isn + 1;
+       seq = treq->rcv_isn + 1;
+       newtp->rcv_wup = seq;
+       WRITE_ONCE(newtp->copied_seq, seq);
+       WRITE_ONCE(newtp->rcv_nxt, seq);
        newtp->segs_in = 1;
 
-       newtp->snd_sml = newtp->snd_una =
-       newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
+       seq = treq->snt_isn + 1;
+       newtp->snd_sml = newtp->snd_una = seq;
+       WRITE_ONCE(newtp->snd_nxt, seq);
+       newtp->snd_up = seq;
 
        INIT_LIST_HEAD(&newtp->tsq_node);
        INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
@@ -494,7 +500,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
        newtp->total_retrans = req->num_retrans;
 
        tcp_init_xmit_timers(newsk);
-       newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
+       WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1);
 
        if (sock_flag(newsk, SOCK_KEEPOPEN))
                inet_csk_reset_keepalive_timer(newsk,
@@ -540,7 +546,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
        newtp->rx_opt.mss_clamp = req->mss;
        tcp_ecn_openreq_child(newtp, req);
        newtp->fastopen_req = NULL;
-       newtp->fastopen_rsk = NULL;
+       RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);
 
        __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
 
index fec6d67..0488607 100644 (file)
@@ -67,7 +67,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
        struct tcp_sock *tp = tcp_sk(sk);
        unsigned int prior_packets = tp->packets_out;
 
-       tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+       WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(skb)->end_seq);
 
        __skb_unlink(skb, &sk->sk_write_queue);
        tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
@@ -1196,10 +1196,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
        struct tcp_sock *tp = tcp_sk(sk);
 
        /* Advance write_seq and place onto the write_queue. */
-       tp->write_seq = TCP_SKB_CB(skb)->end_seq;
+       WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq);
        __skb_header_release(skb);
        tcp_add_write_queue_tail(sk, skb);
-       sk->sk_wmem_queued += skb->truesize;
+       sk_wmem_queued_add(sk, skb->truesize);
        sk_mem_charge(sk, skb->truesize);
 }
 
@@ -1333,7 +1333,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
                return -ENOMEM; /* We'll just try again later. */
        skb_copy_decrypted(buff, skb);
 
-       sk->sk_wmem_queued += buff->truesize;
+       sk_wmem_queued_add(sk, buff->truesize);
        sk_mem_charge(sk, buff->truesize);
        nlen = skb->len - len - nsize;
        buff->truesize += nlen;
@@ -1443,7 +1443,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 
        if (delta_truesize) {
                skb->truesize      -= delta_truesize;
-               sk->sk_wmem_queued -= delta_truesize;
+               sk_wmem_queued_add(sk, -delta_truesize);
                sk_mem_uncharge(sk, delta_truesize);
                sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
        }
@@ -1888,7 +1888,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
                return -ENOMEM;
        skb_copy_decrypted(buff, skb);
 
-       sk->sk_wmem_queued += buff->truesize;
+       sk_wmem_queued_add(sk, buff->truesize);
        sk_mem_charge(sk, buff->truesize);
        buff->truesize += nlen;
        skb->truesize -= nlen;
@@ -2152,7 +2152,7 @@ static int tcp_mtu_probe(struct sock *sk)
        nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
        if (!nskb)
                return -1;
-       sk->sk_wmem_queued += nskb->truesize;
+       sk_wmem_queued_add(sk, nskb->truesize);
        sk_mem_charge(sk, nskb->truesize);
 
        skb = tcp_send_head(sk);
@@ -2482,7 +2482,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
        /* Don't do any loss probe on a Fast Open connection before 3WHS
         * finishes.
         */
-       if (tp->fastopen_rsk)
+       if (rcu_access_pointer(tp->fastopen_rsk))
                return false;
 
        early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
@@ -3142,7 +3142,7 @@ void tcp_send_fin(struct sock *sk)
                         * if FIN had been sent. This is because retransmit path
                         * does not change tp->snd_nxt.
                         */
-                       tp->snd_nxt++;
+                       WRITE_ONCE(tp->snd_nxt, tp->snd_nxt + 1);
                        return;
                }
        } else {
@@ -3222,7 +3222,7 @@ int tcp_send_synack(struct sock *sk)
                        tcp_rtx_queue_unlink_and_free(skb, sk);
                        __skb_header_release(nskb);
                        tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
-                       sk->sk_wmem_queued += nskb->truesize;
+                       sk_wmem_queued_add(sk, nskb->truesize);
                        sk_mem_charge(sk, nskb->truesize);
                        skb = nskb;
                }
@@ -3426,14 +3426,14 @@ static void tcp_connect_init(struct sock *sk)
        tp->snd_una = tp->write_seq;
        tp->snd_sml = tp->write_seq;
        tp->snd_up = tp->write_seq;
-       tp->snd_nxt = tp->write_seq;
+       WRITE_ONCE(tp->snd_nxt, tp->write_seq);
 
        if (likely(!tp->repair))
                tp->rcv_nxt = 0;
        else
                tp->rcv_tstamp = tcp_jiffies32;
        tp->rcv_wup = tp->rcv_nxt;
-       tp->copied_seq = tp->rcv_nxt;
+       WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
 
        inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
        inet_csk(sk)->icsk_retransmits = 0;
@@ -3447,9 +3447,9 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
 
        tcb->end_seq += skb->len;
        __skb_header_release(skb);
-       sk->sk_wmem_queued += skb->truesize;
+       sk_wmem_queued_add(sk, skb->truesize);
        sk_mem_charge(sk, skb->truesize);
-       tp->write_seq = tcb->end_seq;
+       WRITE_ONCE(tp->write_seq, tcb->end_seq);
        tp->packets_out += tcp_skb_pcount(skb);
 }
 
@@ -3586,11 +3586,11 @@ int tcp_connect(struct sock *sk)
        /* We change tp->snd_nxt after the tcp_transmit_skb() call
         * in order to make this packet get counted in tcpOutSegs.
         */
-       tp->snd_nxt = tp->write_seq;
+       WRITE_ONCE(tp->snd_nxt, tp->write_seq);
        tp->pushed_seq = tp->write_seq;
        buff = tcp_send_head(sk);
        if (unlikely(buff)) {
-               tp->snd_nxt     = TCP_SKB_CB(buff)->seq;
+               WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(buff)->seq);
                tp->pushed_seq  = TCP_SKB_CB(buff)->seq;
        }
        TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
index dbd9d2d..dd5a631 100644 (file)
@@ -198,8 +198,13 @@ static bool retransmits_timed_out(struct sock *sk,
                return false;
 
        start_ts = tcp_sk(sk)->retrans_stamp;
-       if (likely(timeout == 0))
-               timeout = tcp_model_timeout(sk, boundary, TCP_RTO_MIN);
+       if (likely(timeout == 0)) {
+               unsigned int rto_base = TCP_RTO_MIN;
+
+               if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
+                       rto_base = tcp_timeout_init(sk);
+               timeout = tcp_model_timeout(sk, boundary, rto_base);
+       }
 
        return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
 }
@@ -210,7 +215,7 @@ static int tcp_write_timeout(struct sock *sk)
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct net *net = sock_net(sk);
-       bool expired, do_reset;
+       bool expired = false, do_reset;
        int retry_until;
 
        if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
@@ -242,9 +247,10 @@ static int tcp_write_timeout(struct sock *sk)
                        if (tcp_out_of_resources(sk, do_reset))
                                return 1;
                }
+       }
+       if (!expired)
                expired = retransmits_timed_out(sk, retry_until,
                                                icsk->icsk_user_timeout);
-       }
        tcp_fastopen_active_detect_blackhole(sk, expired);
 
        if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
@@ -380,15 +386,13 @@ abort:            tcp_write_err(sk);
  *     Timer for Fast Open socket to retransmit SYNACK. Note that the
  *     sk here is the child socket, not the parent (listener) socket.
  */
-static void tcp_fastopen_synack_timer(struct sock *sk)
+static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        int max_retries = icsk->icsk_syn_retries ? :
            sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
        struct tcp_sock *tp = tcp_sk(sk);
-       struct request_sock *req;
 
-       req = tcp_sk(sk)->fastopen_rsk;
        req->rsk_ops->syn_ack_timeout(req);
 
        if (req->num_timeout >= max_retries) {
@@ -429,11 +433,14 @@ void tcp_retransmit_timer(struct sock *sk)
        struct tcp_sock *tp = tcp_sk(sk);
        struct net *net = sock_net(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
+       struct request_sock *req;
 
-       if (tp->fastopen_rsk) {
+       req = rcu_dereference_protected(tp->fastopen_rsk,
+                                       lockdep_sock_is_held(sk));
+       if (req) {
                WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
                             sk->sk_state != TCP_FIN_WAIT1);
-               tcp_fastopen_synack_timer(sk);
+               tcp_fastopen_synack_timer(sk, req);
                /* Before we receive ACK to our SYN-ACK don't retransmit
                 * anything else (e.g., data or FIN segments).
                 */
index cf75515..1d58ce8 100644 (file)
@@ -388,7 +388,7 @@ static int compute_score(struct sock *sk, struct net *net,
                return -1;
        score += 4;
 
-       if (sk->sk_incoming_cpu == raw_smp_processor_id())
+       if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
                score++;
        return score;
 }
@@ -821,6 +821,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
        int is_udplite = IS_UDPLITE(sk);
        int offset = skb_transport_offset(skb);
        int len = skb->len - offset;
+       int datalen = len - sizeof(*uh);
        __wsum csum = 0;
 
        /*
@@ -854,10 +855,12 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
                        return -EIO;
                }
 
-               skb_shinfo(skb)->gso_size = cork->gso_size;
-               skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
-               skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(len - sizeof(uh),
-                                                        cork->gso_size);
+               if (datalen > cork->gso_size) {
+                       skb_shinfo(skb)->gso_size = cork->gso_size;
+                       skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+                       skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
+                                                                cork->gso_size);
+               }
                goto csum_partial;
        }
 
@@ -1313,6 +1316,20 @@ static void udp_set_dev_scratch(struct sk_buff *skb)
                scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
 }
 
+static void udp_skb_csum_unnecessary_set(struct sk_buff *skb)
+{
+       /* We come here after udp_lib_checksum_complete() returned 0.
+        * This means that __skb_checksum_complete() might have
+        * set skb->csum_valid to 1.
+        * On 64bit platforms, we can set csum_unnecessary
+        * to true, but only if the skb is not shared.
+        */
+#if BITS_PER_LONG == 64
+       if (!skb_shared(skb))
+               udp_skb_scratch(skb)->csum_unnecessary = true;
+#endif
+}
+
 static int udp_skb_truesize(struct sk_buff *skb)
 {
        return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
@@ -1547,10 +1564,7 @@ static struct sk_buff *__first_packet_length(struct sock *sk,
                        *total += skb->truesize;
                        kfree_skb(skb);
                } else {
-                       /* the csum related bits could be changed, refresh
-                        * the scratch area
-                        */
-                       udp_set_dev_scratch(skb);
+                       udp_skb_csum_unnecessary_set(skb);
                        break;
                }
        }
@@ -1574,7 +1588,7 @@ static int first_packet_length(struct sock *sk)
 
        spin_lock_bh(&rcvq->lock);
        skb = __first_packet_length(sk, rcvq, &total);
-       if (!skb && !skb_queue_empty(sk_queue)) {
+       if (!skb && !skb_queue_empty_lockless(sk_queue)) {
                spin_lock(&sk_queue->lock);
                skb_queue_splice_tail_init(sk_queue, rcvq);
                spin_unlock(&sk_queue->lock);
@@ -1647,7 +1661,7 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
                                return skb;
                        }
 
-                       if (skb_queue_empty(sk_queue)) {
+                       if (skb_queue_empty_lockless(sk_queue)) {
                                spin_unlock_bh(&queue->lock);
                                goto busy_check;
                        }
@@ -1673,7 +1687,7 @@ busy_check:
                                break;
 
                        sk_busy_loop(sk, flags & MSG_DONTWAIT);
-               } while (!skb_queue_empty(sk_queue));
+               } while (!skb_queue_empty_lockless(sk_queue));
 
                /* sk_queue is empty, reader_queue may contain peeked packets */
        } while (timeo &&
@@ -1969,7 +1983,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
         */
        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
                goto drop;
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
                int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
@@ -2298,7 +2312,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 
        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
                goto drop;
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        /* No socket. Drop packet silently, if checksum is wrong */
        if (udp_lib_checksum_complete(skb))
@@ -2709,7 +2723,7 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
        __poll_t mask = datagram_poll(file, sock, wait);
        struct sock *sk = sock->sk;
 
-       if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
+       if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
                mask |= EPOLLIN | EPOLLRDNORM;
 
        /* Check for false positives due to checksum errors */
index cdef8f9..35b84b5 100644 (file)
@@ -85,6 +85,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
        xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
                                              RTCF_LOCAL);
        xdst->u.rt.rt_type = rt->rt_type;
+       xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
        xdst->u.rt.rt_gw_family = rt->rt_gw_family;
        if (rt->rt_gw_family == AF_INET)
                xdst->u.rt.rt_gw4 = rt->rt_gw4;
index 6a576ff..34ccef1 100644 (file)
@@ -5964,13 +5964,20 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
        switch (event) {
        case RTM_NEWADDR:
                /*
-                * If the address was optimistic
-                * we inserted the route at the start of
-                * our DAD process, so we don't need
-                * to do it again
+                * If the address was optimistic we inserted the route at the
+                * start of our DAD process, so we don't need to do it again.
+                * If the device was taken down in the middle of the DAD
+                * cycle there is a race where we could get here without a
+                * host route, so nothing to insert. That will be fixed when
+                * the device is brought up.
                 */
-               if (!rcu_access_pointer(ifp->rt->fib6_node))
+               if (ifp->rt && !rcu_access_pointer(ifp->rt->fib6_node)) {
                        ip6_ins_rt(net, ifp->rt);
+               } else if (!ifp->rt && (ifp->idev->dev->flags & IFF_UP)) {
+                       pr_warn("BUG: Address %pI6c on device %s is missing its host route.\n",
+                               &ifp->addr, ifp->idev->dev->name);
+               }
+
                if (ifp->idev->cnf.forwarding)
                        addrconf_join_anycast(ifp);
                if (!ipv6_addr_any(&ifp->peer_addr))
index 783f3c1..2fc0792 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/export.h>
 #include <net/ipv6.h>
 #include <net/ipv6_stubs.h>
+#include <net/addrconf.h>
 #include <net/ip.h>
 
 /* if ipv6 module registers this function is used by xfrm to force all
index d22b6c1..f9e8fe3 100644 (file)
@@ -287,7 +287,8 @@ static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg
        return false;
 
 suppress_route:
-       ip6_rt_put(rt);
+       if (!(arg->flags & FIB_LOOKUP_NOREF))
+               ip6_rt_put(rt);
        return true;
 }
 
index 4da24aa..0a0945a 100644 (file)
@@ -133,7 +133,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
        fl6.daddr = sk->sk_v6_daddr;
 
        res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt),
-                      np->tclass);
+                      np->tclass,  sk->sk_priority);
        rcu_read_unlock();
        return res;
 }
index cf60fae..fbe9d42 100644 (file)
@@ -105,7 +105,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
                        return -1;
 
                score = 1;
-               if (sk->sk_incoming_cpu == raw_smp_processor_id())
+               if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
                        score++;
        }
        return score;
index 87f47bc..6e2af41 100644 (file)
@@ -318,7 +318,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
        if (rt->dst.error == -EAGAIN) {
                ip6_rt_put_flags(rt, flags);
                rt = net->ipv6.ip6_null_entry;
-               if (!(flags | RT6_LOOKUP_F_DST_NOREF))
+               if (!(flags & RT6_LOOKUP_F_DST_NOREF))
                        dst_hold(&rt->dst);
        }
 
index d5779d6..923034c 100644 (file)
@@ -980,9 +980,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
                dsfield = key->tos;
                if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
                        goto tx_err;
-               md = ip_tunnel_info_opts(tun_info);
-               if (!md)
+               if (tun_info->options_len < sizeof(*md))
                        goto tx_err;
+               md = ip_tunnel_info_opts(tun_info);
 
                tun_id = tunnel_id_to_key32(key->tun_id);
                if (md->version == 1) {
@@ -2192,6 +2192,7 @@ static void ip6erspan_tap_setup(struct net_device *dev)
 {
        ether_setup(dev);
 
+       dev->max_mtu = 0;
        dev->netdev_ops = &ip6erspan_netdev_ops;
        dev->needs_free_netdev = true;
        dev->priv_destructor = ip6gre_dev_free;
index d432d00..3d71c7d 100644 (file)
@@ -223,6 +223,16 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
        if (ipv6_addr_is_multicast(&hdr->saddr))
                goto err;
 
+       /* While RFC4291 is not explicit about v4mapped addresses
+        * in IPv6 headers, it seems clear linux dual-stack
+        * model can not deal properly with these.
+        * Security models could be fooled by ::ffff:127.0.0.1 for example.
+        *
+        * https://tools.ietf.org/html/draft-itojun-v6ops-v4mapped-harmful-02
+        */
+       if (ipv6_addr_v4mapped(&hdr->saddr))
+               goto err;
+
        skb->transport_header = skb->network_header + sizeof(*hdr);
        IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 
@@ -371,7 +381,7 @@ resubmit_final:
                        /* Free reference early: we don't need it any more,
                           and it may hold ip_conntrack module loaded
                           indefinitely. */
-                       nf_reset(skb);
+                       nf_reset_ct(skb);
 
                        skb_postpull_rcsum(skb, skb_network_header(skb),
                                           skb_network_header_len(skb));
index 89a4c7c..71827b5 100644 (file)
@@ -193,7 +193,7 @@ bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
  * which are using proper atomic operations or spinlocks.
  */
 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
-            __u32 mark, struct ipv6_txoptions *opt, int tclass)
+            __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
 {
        struct net *net = sock_net(sk);
        const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -258,7 +258,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
        hdr->daddr = *first_hop;
 
        skb->protocol = htons(ETH_P_IPV6);
-       skb->priority = sk->sk_priority;
+       skb->priority = priority;
        skb->mark = mark;
 
        mtu = dst_mtu(dst);
@@ -768,6 +768,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
                                inet6_sk(skb->sk) : NULL;
        struct ip6_frag_state state;
        unsigned int mtu, hlen, nexthdr_offset;
+       ktime_t tstamp = skb->tstamp;
        int hroom, err = 0;
        __be32 frag_id;
        u8 *prevhdr, nexthdr = 0;
@@ -855,6 +856,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
                        if (iter.frag)
                                ip6_fraglist_prepare(skb, &iter);
 
+                       skb->tstamp = tstamp;
                        err = output(net, sk, skb);
                        if (!err)
                                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -913,6 +915,7 @@ slow_path:
                /*
                 *      Put this fragment into the sending queue.
                 */
+               frag->tstamp = tstamp;
                err = output(net, sk, frag);
                if (err)
                        goto fail;
index a9bff55..409e79b 100644 (file)
@@ -119,6 +119,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
                                  struct sk_buff *))
 {
        int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+       ktime_t tstamp = skb->tstamp;
        struct ip6_frag_state state;
        u8 *prevhdr, nexthdr = 0;
        unsigned int mtu, hlen;
@@ -183,6 +184,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
                        if (iter.frag)
                                ip6_fraglist_prepare(skb, &iter);
 
+                       skb->tstamp = tstamp;
                        err = output(net, sk, data, skb);
                        if (err || !iter.frag)
                                break;
@@ -215,6 +217,7 @@ slow_path:
                        goto blackhole;
                }
 
+               skb2->tstamp = tstamp;
                err = output(net, sk, data, skb2);
                if (err)
                        goto blackhole;
index 6120a78..69443e9 100644 (file)
@@ -170,13 +170,13 @@ config IP6_NF_MATCH_RT
          To compile it as a module, choose M here.  If unsure, say N.
 
 config IP6_NF_MATCH_SRH
-        tristate '"srh" Segment Routing header match support'
-        depends on NETFILTER_ADVANCED
-        help
-          srh matching allows you to match packets based on the segment
+       tristate '"srh" Segment Routing header match support'
+       depends on NETFILTER_ADVANCED
+       help
+         srh matching allows you to match packets based on the segment
          routing header of the packet.
 
-          To compile it as a module, choose M here.  If unsure, say N.
+         To compile it as a module, choose M here.  If unsure, say N.
 
 # The targets
 config IP6_NF_TARGET_HL
@@ -249,10 +249,10 @@ config IP6_NF_SECURITY
        depends on SECURITY
        depends on NETFILTER_ADVANCED
        help
-         This option adds a `security' table to iptables, for use
-         with Mandatory Access Control (MAC) policy.
+        This option adds a `security' table to iptables, for use
+        with Mandatory Access Control (MAC) policy.
 
-         If unsure, say N.
+        If unsure, say N.
 
 config IP6_NF_NAT
        tristate "ip6tables NAT support"
index e6c9da9..a0a2de3 100644 (file)
@@ -54,7 +54,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
                return;
 
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
-       nf_reset(skb);
+       nf_reset_ct(skb);
        nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
 #endif
        if (hooknum == NF_INET_PRE_ROUTING ||
index 6e1888e..a77f6b7 100644 (file)
@@ -215,7 +215,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 
                        /* Not releasing hash table! */
                        if (clone) {
-                               nf_reset(clone);
+                               nf_reset_ct(clone);
                                rawv6_rcv(sk, clone);
                        }
                }
index 87f44d3..4804b6d 100644 (file)
@@ -215,7 +215,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
            !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
                tp->rx_opt.ts_recent = 0;
                tp->rx_opt.ts_recent_stamp = 0;
-               tp->write_seq = 0;
+               WRITE_ONCE(tp->write_seq, 0);
        }
 
        sk->sk_v6_daddr = usin->sin6_addr;
@@ -311,10 +311,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
        if (likely(!tp->repair)) {
                if (!tp->write_seq)
-                       tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
-                                                        sk->sk_v6_daddr.s6_addr32,
-                                                        inet->inet_sport,
-                                                        inet->inet_dport);
+                       WRITE_ONCE(tp->write_seq,
+                                  secure_tcpv6_seq(np->saddr.s6_addr32,
+                                                   sk->sk_v6_daddr.s6_addr32,
+                                                   inet->inet_sport,
+                                                   inet->inet_dport));
                tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
                                                   np->saddr.s6_addr32,
                                                   sk->sk_v6_daddr.s6_addr32);
@@ -406,7 +407,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
        tp = tcp_sk(sk);
        /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
-       fastopen = tp->fastopen_rsk;
+       fastopen = rcu_dereference(tp->fastopen_rsk);
        snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
        if (sk->sk_state != TCP_LISTEN &&
            !between(seq, snd_una, tp->snd_nxt)) {
@@ -512,7 +513,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
                opt = ireq->ipv6_opt;
                if (!opt)
                        opt = rcu_dereference(np->opt);
-               err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
+               err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
+                              sk->sk_priority);
                rcu_read_unlock();
                err = net_xmit_eval(err);
        }
@@ -803,7 +805,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
                                 u32 ack, u32 win, u32 tsval, u32 tsecr,
                                 int oif, struct tcp_md5sig_key *key, int rst,
-                                u8 tclass, __be32 label)
+                                u8 tclass, __be32 label, u32 priority)
 {
        const struct tcphdr *th = tcp_hdr(skb);
        struct tcphdr *t1;
@@ -907,7 +909,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
        dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
        if (!IS_ERR(dst)) {
                skb_dst_set(buff, dst);
-               ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
+               ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
+                        priority);
                TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
                if (rst)
                        TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
@@ -930,6 +933,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
        struct sock *sk1 = NULL;
 #endif
        __be32 label = 0;
+       u32 priority = 0;
        struct net *net;
        int oif = 0;
 
@@ -990,16 +994,19 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
                        trace_tcp_send_reset(sk, skb);
                        if (np->repflow)
                                label = ip6_flowlabel(ipv6h);
+                       priority = sk->sk_priority;
                }
-               if (sk->sk_state == TCP_TIME_WAIT)
+               if (sk->sk_state == TCP_TIME_WAIT) {
                        label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
+                       priority = inet_twsk(sk)->tw_priority;
+               }
        } else {
                if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
                        label = ip6_flowlabel(ipv6h);
        }
 
        tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
-                            label);
+                            label, priority);
 
 #ifdef CONFIG_TCP_MD5SIG
 out:
@@ -1010,10 +1017,10 @@ out:
 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
                            u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
                            struct tcp_md5sig_key *key, u8 tclass,
-                           __be32 label)
+                           __be32 label, u32 priority)
 {
        tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
-                            tclass, label);
+                            tclass, label, priority);
 }
 
 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -1025,7 +1032,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
                        tcp_time_stamp_raw() + tcptw->tw_ts_offset,
                        tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
-                       tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
+                       tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
 
        inet_twsk_put(tw);
 }
@@ -1048,7 +1055,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
                        tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
                        req->ts_recent, sk->sk_bound_dev_if,
                        tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
-                       0, 0);
+                       0, 0, sk->sk_priority);
 }
 
 
@@ -1889,7 +1896,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
                /* Because we don't lock the socket,
                 * we might find a transient negative value.
                 */
-               rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
+               rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
+                                     READ_ONCE(tp->copied_seq), 0);
 
        seq_printf(seq,
                   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
@@ -1900,7 +1908,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
                   dest->s6_addr32[0], dest->s6_addr32[1],
                   dest->s6_addr32[2], dest->s6_addr32[3], destp,
                   state,
-                  tp->write_seq - tp->snd_una,
+                  READ_ONCE(tp->write_seq) - tp->snd_una,
                   rx_queue,
                   timer_active,
                   jiffies_delta_to_clock_t(timer_expires - jiffies),
index aae4938..9fec580 100644 (file)
@@ -135,7 +135,7 @@ static int compute_score(struct sock *sk, struct net *net,
                return -1;
        score++;
 
-       if (sk->sk_incoming_cpu == raw_smp_processor_id())
+       if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
                score++;
 
        return score;
@@ -1109,6 +1109,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
        __wsum csum = 0;
        int offset = skb_transport_offset(skb);
        int len = skb->len - offset;
+       int datalen = len - sizeof(*uh);
 
        /*
         * Create a UDP header
@@ -1141,8 +1142,12 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
                        return -EIO;
                }
 
-               skb_shinfo(skb)->gso_size = cork->gso_size;
-               skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+               if (datalen > cork->gso_size) {
+                       skb_shinfo(skb)->gso_size = cork->gso_size;
+                       skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+                       skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
+                                                                cork->gso_size);
+               }
                goto csum_partial;
        }
 
index 8f12f5c..ea9e734 100644 (file)
@@ -378,8 +378,12 @@ static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
 {
        struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
        struct bpf_prog *prog = psock->bpf_prog;
+       int res;
 
-       return BPF_PROG_RUN(prog, skb);
+       preempt_disable();
+       res = BPF_PROG_RUN(prog, skb);
+       preempt_enable();
+       return res;
 }
 
 static int kcm_read_sock_done(struct strparser *strp, int err)
index 105e5a7..f82ea12 100644 (file)
@@ -1078,7 +1078,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
        IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
                              IPSKB_REROUTED);
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        bh_lock_sock(sk);
        if (sock_owned_by_user(sk)) {
index bd3f393..d3b520b 100644 (file)
@@ -56,7 +56,6 @@ static int l2tp_eth_dev_init(struct net_device *dev)
 {
        eth_hw_addr_random(dev);
        eth_broadcast_addr(dev->broadcast);
-       netdev_lockdep_set_classes(dev);
 
        return 0;
 }
@@ -151,7 +150,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
        skb->ip_summed = CHECKSUM_NONE;
 
        skb_dst_drop(skb);
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        rcu_read_lock();
        dev = rcu_dereference(spriv->dev);
index 6228333..0d7c887 100644 (file)
@@ -193,7 +193,7 @@ pass_up:
        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
                goto discard_put;
 
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        return sk_receive_skb(sk, skb, 1);
 
index 687e23a..802f19a 100644 (file)
@@ -206,7 +206,7 @@ pass_up:
        if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
                goto discard_put;
 
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        return sk_receive_skb(sk, skb, 1);
 
index 2017b7d..c74f44d 100644 (file)
@@ -113,22 +113,26 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr)
  *
  *     Send data via reliable llc2 connection.
  *     Returns 0 upon success, non-zero if action did not succeed.
+ *
+ *     This function always consumes a reference to the skb.
  */
 static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock)
 {
        struct llc_sock* llc = llc_sk(sk);
-       int rc = 0;
 
        if (unlikely(llc_data_accept_state(llc->state) ||
                     llc->remote_busy_flag ||
                     llc->p_flag)) {
                long timeout = sock_sndtimeo(sk, noblock);
+               int rc;
 
                rc = llc_ui_wait_for_busy_core(sk, timeout);
+               if (rc) {
+                       kfree_skb(skb);
+                       return rc;
+               }
        }
-       if (unlikely(!rc))
-               rc = llc_build_and_send_pkt(sk, skb);
-       return rc;
+       return llc_build_and_send_pkt(sk, skb);
 }
 
 static void llc_ui_sk_init(struct socket *sock, struct sock *sk)
@@ -899,7 +903,7 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
        int flags = msg->msg_flags;
        int noblock = flags & MSG_DONTWAIT;
-       struct sk_buff *skb;
+       struct sk_buff *skb = NULL;
        size_t size = 0;
        int rc = -EINVAL, copied = 0, hdrlen;
 
@@ -908,10 +912,10 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        lock_sock(sk);
        if (addr) {
                if (msg->msg_namelen < sizeof(*addr))
-                       goto release;
+                       goto out;
        } else {
                if (llc_ui_addr_null(&llc->addr))
-                       goto release;
+                       goto out;
                addr = &llc->addr;
        }
        /* must bind connection to sap if user hasn't done it. */
@@ -919,7 +923,7 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
                /* bind to sap with null dev, exclusive. */
                rc = llc_ui_autobind(sock, addr);
                if (rc)
-                       goto release;
+                       goto out;
        }
        hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr);
        size = hdrlen + len;
@@ -928,12 +932,12 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        copied = size - hdrlen;
        rc = -EINVAL;
        if (copied < 0)
-               goto release;
+               goto out;
        release_sock(sk);
        skb = sock_alloc_send_skb(sk, size, noblock, &rc);
        lock_sock(sk);
        if (!skb)
-               goto release;
+               goto out;
        skb->dev      = llc->dev;
        skb->protocol = llc_proto_type(addr->sllc_arphrd);
        skb_reserve(skb, hdrlen);
@@ -943,29 +947,31 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        if (sk->sk_type == SOCK_DGRAM || addr->sllc_ua) {
                llc_build_and_send_ui_pkt(llc->sap, skb, addr->sllc_mac,
                                          addr->sllc_sap);
+               skb = NULL;
                goto out;
        }
        if (addr->sllc_test) {
                llc_build_and_send_test_pkt(llc->sap, skb, addr->sllc_mac,
                                            addr->sllc_sap);
+               skb = NULL;
                goto out;
        }
        if (addr->sllc_xid) {
                llc_build_and_send_xid_pkt(llc->sap, skb, addr->sllc_mac,
                                           addr->sllc_sap);
+               skb = NULL;
                goto out;
        }
        rc = -ENOPROTOOPT;
        if (!(sk->sk_type == SOCK_STREAM && !addr->sllc_ua))
                goto out;
        rc = llc_ui_send_data(sk, skb, noblock);
+       skb = NULL;
 out:
-       if (rc) {
-               kfree_skb(skb);
-release:
+       kfree_skb(skb);
+       if (rc)
                dprintk("%s: failed sending from %02X to %02X: %d\n",
                        __func__, llc->laddr.lsap, llc->daddr.lsap, rc);
-       }
        release_sock(sk);
        return rc ? : copied;
 }
index 4d78375..647c055 100644 (file)
@@ -372,6 +372,7 @@ int llc_conn_ac_send_i_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
        llc_pdu_init_as_i_cmd(skb, 1, llc->vS, llc->vR);
        rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
        if (likely(!rc)) {
+               skb_get(skb);
                llc_conn_send_pdu(sk, skb);
                llc_conn_ac_inc_vs_by_1(sk, skb);
        }
@@ -389,7 +390,8 @@ static int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb)
        llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR);
        rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
        if (likely(!rc)) {
-               rc = llc_conn_send_pdu(sk, skb);
+               skb_get(skb);
+               llc_conn_send_pdu(sk, skb);
                llc_conn_ac_inc_vs_by_1(sk, skb);
        }
        return rc;
@@ -406,6 +408,7 @@ int llc_conn_ac_send_i_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
        llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR);
        rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
        if (likely(!rc)) {
+               skb_get(skb);
                llc_conn_send_pdu(sk, skb);
                llc_conn_ac_inc_vs_by_1(sk, skb);
        }
@@ -916,7 +919,8 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk,
        llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR);
        rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
        if (likely(!rc)) {
-               rc = llc_conn_send_pdu(sk, skb);
+               skb_get(skb);
+               llc_conn_send_pdu(sk, skb);
                llc_conn_ac_inc_vs_by_1(sk, skb);
        }
        return rc;
index 4ff89cb..7b620ac 100644 (file)
@@ -30,7 +30,7 @@
 #endif
 
 static int llc_find_offset(int state, int ev_type);
-static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *skb);
+static void llc_conn_send_pdus(struct sock *sk);
 static int llc_conn_service(struct sock *sk, struct sk_buff *skb);
 static int llc_exec_conn_trans_actions(struct sock *sk,
                                       struct llc_conn_state_trans *trans,
@@ -55,6 +55,8 @@ int sysctl_llc2_busy_timeout = LLC2_BUSY_TIME * HZ;
  *     (executing it's actions and changing state), upper layer will be
  *     indicated or confirmed, if needed. Returns 0 for success, 1 for
  *     failure. The socket lock has to be held before calling this function.
+ *
+ *     This function always consumes a reference to the skb.
  */
 int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
 {
@@ -62,12 +64,6 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
        struct llc_sock *llc = llc_sk(skb->sk);
        struct llc_conn_state_ev *ev = llc_conn_ev(skb);
 
-       /*
-        * We have to hold the skb, because llc_conn_service will kfree it in
-        * the sending path and we need to look at the skb->cb, where we encode
-        * llc_conn_state_ev.
-        */
-       skb_get(skb);
        ev->ind_prim = ev->cfm_prim = 0;
        /*
         * Send event to state machine
@@ -75,21 +71,12 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
        rc = llc_conn_service(skb->sk, skb);
        if (unlikely(rc != 0)) {
                printk(KERN_ERR "%s: llc_conn_service failed\n", __func__);
-               goto out_kfree_skb;
-       }
-
-       if (unlikely(!ev->ind_prim && !ev->cfm_prim)) {
-               /* indicate or confirm not required */
-               if (!skb->next)
-                       goto out_kfree_skb;
                goto out_skb_put;
        }
 
-       if (unlikely(ev->ind_prim && ev->cfm_prim)) /* Paranoia */
-               skb_get(skb);
-
        switch (ev->ind_prim) {
        case LLC_DATA_PRIM:
+               skb_get(skb);
                llc_save_primitive(sk, skb, LLC_DATA_PRIM);
                if (unlikely(sock_queue_rcv_skb(sk, skb))) {
                        /*
@@ -106,6 +93,7 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
                 * skb->sk pointing to the newly created struct sock in
                 * llc_conn_handler. -acme
                 */
+               skb_get(skb);
                skb_queue_tail(&sk->sk_receive_queue, skb);
                sk->sk_state_change(sk);
                break;
@@ -121,7 +109,6 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
                                sk->sk_state_change(sk);
                        }
                }
-               kfree_skb(skb);
                sock_put(sk);
                break;
        case LLC_RESET_PRIM:
@@ -130,14 +117,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
                 * RESET is not being notified to upper layers for now
                 */
                printk(KERN_INFO "%s: received a reset ind!\n", __func__);
-               kfree_skb(skb);
                break;
        default:
-               if (ev->ind_prim) {
+               if (ev->ind_prim)
                        printk(KERN_INFO "%s: received unknown %d prim!\n",
                                __func__, ev->ind_prim);
-                       kfree_skb(skb);
-               }
                /* No indication */
                break;
        }
@@ -179,25 +163,22 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
                printk(KERN_INFO "%s: received a reset conf!\n", __func__);
                break;
        default:
-               if (ev->cfm_prim) {
+               if (ev->cfm_prim)
                        printk(KERN_INFO "%s: received unknown %d prim!\n",
                                        __func__, ev->cfm_prim);
-                       break;
-               }
-               goto out_skb_put; /* No confirmation */
+               /* No confirmation */
+               break;
        }
-out_kfree_skb:
-       kfree_skb(skb);
 out_skb_put:
        kfree_skb(skb);
        return rc;
 }
 
-int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
+void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
 {
        /* queue PDU to send to MAC layer */
        skb_queue_tail(&sk->sk_write_queue, skb);
-       return llc_conn_send_pdus(sk, skb);
+       llc_conn_send_pdus(sk);
 }
 
 /**
@@ -255,7 +236,7 @@ void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit)
        if (howmany_resend > 0)
                llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
        /* any PDUs to re-send are queued up; start sending to MAC */
-       llc_conn_send_pdus(sk, NULL);
+       llc_conn_send_pdus(sk);
 out:;
 }
 
@@ -296,7 +277,7 @@ void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit)
        if (howmany_resend > 0)
                llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
        /* any PDUs to re-send are queued up; start sending to MAC */
-       llc_conn_send_pdus(sk, NULL);
+       llc_conn_send_pdus(sk);
 out:;
 }
 
@@ -340,16 +321,12 @@ out:
 /**
  *     llc_conn_send_pdus - Sends queued PDUs
  *     @sk: active connection
- *     @hold_skb: the skb held by caller, or NULL if does not care
  *
- *     Sends queued pdus to MAC layer for transmission. When @hold_skb is
- *     NULL, always return 0. Otherwise, return 0 if @hold_skb is sent
- *     successfully, or 1 for failure.
+ *     Sends queued pdus to MAC layer for transmission.
  */
-static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb)
+static void llc_conn_send_pdus(struct sock *sk)
 {
        struct sk_buff *skb;
-       int ret = 0;
 
        while ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL) {
                struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
@@ -361,20 +338,10 @@ static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb)
                        skb_queue_tail(&llc_sk(sk)->pdu_unack_q, skb);
                        if (!skb2)
                                break;
-                       dev_queue_xmit(skb2);
-               } else {
-                       bool is_target = skb == hold_skb;
-                       int rc;
-
-                       if (is_target)
-                               skb_get(skb);
-                       rc = dev_queue_xmit(skb);
-                       if (is_target)
-                               ret = rc;
+                       skb = skb2;
                }
+               dev_queue_xmit(skb);
        }
-
-       return ret;
 }
 
 /**
@@ -846,7 +813,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
        else {
                dprintk("%s: adding to backlog...\n", __func__);
                llc_set_backlog_type(skb, LLC_PACKET);
-               if (sk_add_backlog(sk, skb, sk->sk_rcvbuf))
+               if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))
                        goto drop_unlock;
        }
 out:
index 8db03c2..ad65477 100644 (file)
@@ -38,6 +38,8 @@
  *     closed and -EBUSY when sending data is not permitted in this state or
  *     LLC has send an I pdu with p bit set to 1 and is waiting for it's
  *     response.
+ *
+ *     This function always consumes a reference to the skb.
  */
 int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb)
 {
@@ -46,20 +48,22 @@ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb)
        struct llc_sock *llc = llc_sk(sk);
 
        if (unlikely(llc->state == LLC_CONN_STATE_ADM))
-               goto out;
+               goto out_free;
        rc = -EBUSY;
        if (unlikely(llc_data_accept_state(llc->state) || /* data_conn_refuse */
                     llc->p_flag)) {
                llc->failed_data_req = 1;
-               goto out;
+               goto out_free;
        }
        ev = llc_conn_ev(skb);
        ev->type      = LLC_CONN_EV_TYPE_PRIM;
        ev->prim      = LLC_DATA_PRIM;
        ev->prim_type = LLC_PRIM_TYPE_REQ;
        skb->dev      = llc->dev;
-       rc = llc_conn_state_process(sk, skb);
-out:
+       return llc_conn_state_process(sk, skb);
+
+out_free:
+       kfree_skb(skb);
        return rc;
 }
 
index a94bd56..7ae4cc6 100644 (file)
@@ -58,8 +58,10 @@ int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb)
                            ev->daddr.lsap, LLC_PDU_CMD);
        llc_pdu_init_as_ui_cmd(skb);
        rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
-       if (likely(!rc))
+       if (likely(!rc)) {
+               skb_get(skb);
                rc = dev_queue_xmit(skb);
+       }
        return rc;
 }
 
@@ -81,8 +83,10 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb)
                            ev->daddr.lsap, LLC_PDU_CMD);
        llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0);
        rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
-       if (likely(!rc))
+       if (likely(!rc)) {
+               skb_get(skb);
                rc = dev_queue_xmit(skb);
+       }
        return rc;
 }
 
@@ -135,8 +139,10 @@ int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb)
                            ev->daddr.lsap, LLC_PDU_CMD);
        llc_pdu_init_as_test_cmd(skb);
        rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
-       if (likely(!rc))
+       if (likely(!rc)) {
+               skb_get(skb);
                rc = dev_queue_xmit(skb);
+       }
        return rc;
 }
 
index a7f7b8f..be41906 100644 (file)
@@ -197,29 +197,22 @@ out:
  *     After executing actions of the event, upper layer will be indicated
  *     if needed(on receiving an UI frame). sk can be null for the
  *     datalink_proto case.
+ *
+ *     This function always consumes a reference to the skb.
  */
 static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb)
 {
        struct llc_sap_state_ev *ev = llc_sap_ev(skb);
 
-       /*
-        * We have to hold the skb, because llc_sap_next_state
-        * will kfree it in the sending path and we need to
-        * look at the skb->cb, where we encode llc_sap_state_ev.
-        */
-       skb_get(skb);
        ev->ind_cfm_flag = 0;
        llc_sap_next_state(sap, skb);
-       if (ev->ind_cfm_flag == LLC_IND) {
-               if (skb->sk->sk_state == TCP_LISTEN)
-                       kfree_skb(skb);
-               else {
-                       llc_save_primitive(skb->sk, skb, ev->prim);
 
-                       /* queue skb to the user. */
-                       if (sock_queue_rcv_skb(skb->sk, skb))
-                               kfree_skb(skb);
-               }
+       if (ev->ind_cfm_flag == LLC_IND && skb->sk->sk_state != TCP_LISTEN) {
+               llc_save_primitive(skb->sk, skb, ev->prim);
+
+               /* queue skb to the user. */
+               if (sock_queue_rcv_skb(skb->sk, skb) == 0)
+                       return;
        }
        kfree_skb(skb);
 }
index b1438fd..64b544a 100644 (file)
@@ -487,9 +487,14 @@ static ssize_t ieee80211_if_fmt_aqm(
        const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
 {
        struct ieee80211_local *local = sdata->local;
-       struct txq_info *txqi = to_txq_info(sdata->vif.txq);
+       struct txq_info *txqi;
        int len;
 
+       if (!sdata->vif.txq)
+               return 0;
+
+       txqi = to_txq_info(sdata->vif.txq);
+
        spin_lock_bh(&local->fq.lock);
        rcu_read_lock();
 
@@ -658,7 +663,9 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata)
        DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_5ghz);
        DEBUGFS_ADD(hw_queues);
 
-       if (sdata->local->ops->wake_tx_queue)
+       if (sdata->local->ops->wake_tx_queue &&
+           sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
+           sdata->vif.type != NL80211_IFTYPE_NAN)
                DEBUGFS_ADD(aqm);
 }
 
index 26a2f49..54dd884 100644 (file)
@@ -2633,7 +2633,8 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
 
        rcu_read_lock();
        ssid = ieee80211_bss_get_ie(cbss, WLAN_EID_SSID);
-       if (WARN_ON_ONCE(ssid == NULL))
+       if (WARN_ONCE(!ssid || ssid[1] > IEEE80211_MAX_SSID_LEN,
+                     "invalid SSID element (len=%d)", ssid ? ssid[1] : -1))
                ssid_len = 0;
        else
                ssid_len = ssid[1];
@@ -5233,7 +5234,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 
        rcu_read_lock();
        ssidie = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID);
-       if (!ssidie) {
+       if (!ssidie || ssidie[1] > sizeof(assoc_data->ssid)) {
                rcu_read_unlock();
                kfree(assoc_data);
                return -EINVAL;
index 768d14c..0e05ff0 100644 (file)
@@ -3467,9 +3467,18 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
        case cpu_to_le16(IEEE80211_STYPE_PROBE_RESP):
                /* process for all: mesh, mlme, ibss */
                break;
+       case cpu_to_le16(IEEE80211_STYPE_DEAUTH):
+               if (is_multicast_ether_addr(mgmt->da) &&
+                   !is_broadcast_ether_addr(mgmt->da))
+                       return RX_DROP_MONITOR;
+
+               /* process only for station/IBSS */
+               if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+                   sdata->vif.type != NL80211_IFTYPE_ADHOC)
+                       return RX_DROP_MONITOR;
+               break;
        case cpu_to_le16(IEEE80211_STYPE_ASSOC_RESP):
        case cpu_to_le16(IEEE80211_STYPE_REASSOC_RESP):
-       case cpu_to_le16(IEEE80211_STYPE_DEAUTH):
        case cpu_to_le16(IEEE80211_STYPE_DISASSOC):
                if (is_multicast_ether_addr(mgmt->da) &&
                    !is_broadcast_ether_addr(mgmt->da))
index adf94ba..4d31d96 100644 (file)
@@ -520,10 +520,33 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local,
        return 0;
 }
 
+static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata)
+{
+       struct ieee80211_local *local = sdata->local;
+       struct ieee80211_sub_if_data *sdata_iter;
+
+       if (!ieee80211_is_radar_required(local))
+               return true;
+
+       if (!regulatory_pre_cac_allowed(local->hw.wiphy))
+               return false;
+
+       mutex_lock(&local->iflist_mtx);
+       list_for_each_entry(sdata_iter, &local->interfaces, list) {
+               if (sdata_iter->wdev.cac_started) {
+                       mutex_unlock(&local->iflist_mtx);
+                       return false;
+               }
+       }
+       mutex_unlock(&local->iflist_mtx);
+
+       return true;
+}
+
 static bool ieee80211_can_scan(struct ieee80211_local *local,
                               struct ieee80211_sub_if_data *sdata)
 {
-       if (ieee80211_is_radar_required(local))
+       if (!__ieee80211_can_leave_ch(sdata))
                return false;
 
        if (!list_empty(&local->roc_list))
@@ -630,7 +653,10 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 
        lockdep_assert_held(&local->mtx);
 
-       if (local->scan_req || ieee80211_is_radar_required(local))
+       if (local->scan_req)
+               return -EBUSY;
+
+       if (!__ieee80211_can_leave_ch(sdata))
                return -EBUSY;
 
        if (!ieee80211_can_scan(local, sdata)) {
index 051a02d..32a7a53 100644 (file)
@@ -247,7 +247,8 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
        struct sta_info *sta;
        int i;
 
-       spin_lock_bh(&fq->lock);
+       local_bh_disable();
+       spin_lock(&fq->lock);
 
        if (sdata->vif.type == NL80211_IFTYPE_AP)
                ps = &sdata->bss->ps;
@@ -273,9 +274,9 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
                                                &txqi->flags))
                                continue;
 
-                       spin_unlock_bh(&fq->lock);
+                       spin_unlock(&fq->lock);
                        drv_wake_tx_queue(local, txqi);
-                       spin_lock_bh(&fq->lock);
+                       spin_lock(&fq->lock);
                }
        }
 
@@ -288,12 +289,14 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
            (ps && atomic_read(&ps->num_sta_ps)) || ac != vif->txq->ac)
                goto out;
 
-       spin_unlock_bh(&fq->lock);
+       spin_unlock(&fq->lock);
 
        drv_wake_tx_queue(local, txqi);
+       local_bh_enable();
        return;
 out:
-       spin_unlock_bh(&fq->lock);
+       spin_unlock(&fq->lock);
+       local_bh_enable();
 }
 
 static void
index 0b3f067..ad3fd7f 100644 (file)
@@ -264,9 +264,7 @@ enum {
        ncsi_dev_state_config_ev,
        ncsi_dev_state_config_sma,
        ncsi_dev_state_config_ebf,
-#if IS_ENABLED(CONFIG_IPV6)
-       ncsi_dev_state_config_egmf,
-#endif
+       ncsi_dev_state_config_dgmf,
        ncsi_dev_state_config_ecnt,
        ncsi_dev_state_config_ec,
        ncsi_dev_state_config_ae,
@@ -295,9 +293,6 @@ struct ncsi_dev_priv {
 #define NCSI_DEV_RESET         8            /* Reset state of NC          */
        unsigned int        gma_flag;        /* OEM GMA flag               */
        spinlock_t          lock;            /* Protect the NCSI device    */
-#if IS_ENABLED(CONFIG_IPV6)
-       unsigned int        inet6_addr_num;  /* Number of IPv6 addresses   */
-#endif
        unsigned int        package_probe_id;/* Current ID during probe    */
        unsigned int        package_num;     /* Number of packages         */
        struct list_head    packages;        /* List of packages           */
index 755aab6..70fe026 100644 (file)
@@ -14,7 +14,6 @@
 #include <net/sock.h>
 #include <net/addrconf.h>
 #include <net/ipv6.h>
-#include <net/if_inet6.h>
 #include <net/genetlink.h>
 
 #include "internal.h"
@@ -978,9 +977,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
        case ncsi_dev_state_config_ev:
        case ncsi_dev_state_config_sma:
        case ncsi_dev_state_config_ebf:
-#if IS_ENABLED(CONFIG_IPV6)
-       case ncsi_dev_state_config_egmf:
-#endif
+       case ncsi_dev_state_config_dgmf:
        case ncsi_dev_state_config_ecnt:
        case ncsi_dev_state_config_ec:
        case ncsi_dev_state_config_ae:
@@ -1033,23 +1030,23 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
                } else if (nd->state == ncsi_dev_state_config_ebf) {
                        nca.type = NCSI_PKT_CMD_EBF;
                        nca.dwords[0] = nc->caps[NCSI_CAP_BC].cap;
-                       if (ncsi_channel_is_tx(ndp, nc))
+                       /* if multicast global filtering is supported then
+                        * disable it so that all multicast packet will be
+                        * forwarded to management controller
+                        */
+                       if (nc->caps[NCSI_CAP_GENERIC].cap &
+                           NCSI_CAP_GENERIC_MC)
+                               nd->state = ncsi_dev_state_config_dgmf;
+                       else if (ncsi_channel_is_tx(ndp, nc))
                                nd->state = ncsi_dev_state_config_ecnt;
                        else
                                nd->state = ncsi_dev_state_config_ec;
-#if IS_ENABLED(CONFIG_IPV6)
-                       if (ndp->inet6_addr_num > 0 &&
-                           (nc->caps[NCSI_CAP_GENERIC].cap &
-                            NCSI_CAP_GENERIC_MC))
-                               nd->state = ncsi_dev_state_config_egmf;
-               } else if (nd->state == ncsi_dev_state_config_egmf) {
-                       nca.type = NCSI_PKT_CMD_EGMF;
-                       nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
+               } else if (nd->state == ncsi_dev_state_config_dgmf) {
+                       nca.type = NCSI_PKT_CMD_DGMF;
                        if (ncsi_channel_is_tx(ndp, nc))
                                nd->state = ncsi_dev_state_config_ecnt;
                        else
                                nd->state = ncsi_dev_state_config_ec;
-#endif /* CONFIG_IPV6 */
                } else if (nd->state == ncsi_dev_state_config_ecnt) {
                        if (np->preferred_channel &&
                            nc != np->preferred_channel)
@@ -1483,70 +1480,6 @@ out:
        return -ENODEV;
 }
 
-#if IS_ENABLED(CONFIG_IPV6)
-static int ncsi_inet6addr_event(struct notifier_block *this,
-                               unsigned long event, void *data)
-{
-       struct inet6_ifaddr *ifa = data;
-       struct net_device *dev = ifa->idev->dev;
-       struct ncsi_dev *nd = ncsi_find_dev(dev);
-       struct ncsi_dev_priv *ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
-       struct ncsi_package *np;
-       struct ncsi_channel *nc;
-       struct ncsi_cmd_arg nca;
-       bool action;
-       int ret;
-
-       if (!ndp || (ipv6_addr_type(&ifa->addr) &
-           (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)))
-               return NOTIFY_OK;
-
-       switch (event) {
-       case NETDEV_UP:
-               action = (++ndp->inet6_addr_num) == 1;
-               nca.type = NCSI_PKT_CMD_EGMF;
-               break;
-       case NETDEV_DOWN:
-               action = (--ndp->inet6_addr_num == 0);
-               nca.type = NCSI_PKT_CMD_DGMF;
-               break;
-       default:
-               return NOTIFY_OK;
-       }
-
-       /* We might not have active channel or packages. The IPv6
-        * required multicast will be enabled when active channel
-        * or packages are chosen.
-        */
-       np = ndp->active_package;
-       nc = ndp->active_channel;
-       if (!action || !np || !nc)
-               return NOTIFY_OK;
-
-       /* We needn't enable or disable it if the function isn't supported */
-       if (!(nc->caps[NCSI_CAP_GENERIC].cap & NCSI_CAP_GENERIC_MC))
-               return NOTIFY_OK;
-
-       nca.ndp = ndp;
-       nca.req_flags = 0;
-       nca.package = np->id;
-       nca.channel = nc->id;
-       nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
-       ret = ncsi_xmit_cmd(&nca);
-       if (ret) {
-               netdev_warn(dev, "Fail to %s global multicast filter (%d)\n",
-                           (event == NETDEV_UP) ? "enable" : "disable", ret);
-               return NOTIFY_DONE;
-       }
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block ncsi_inet6addr_notifier = {
-       .notifier_call = ncsi_inet6addr_event,
-};
-#endif /* CONFIG_IPV6 */
-
 static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
 {
        struct ncsi_dev *nd = &ndp->ndev;
@@ -1725,11 +1658,6 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
        }
 
        spin_lock_irqsave(&ncsi_dev_lock, flags);
-#if IS_ENABLED(CONFIG_IPV6)
-       ndp->inet6_addr_num = 0;
-       if (list_empty(&ncsi_dev_list))
-               register_inet6addr_notifier(&ncsi_inet6addr_notifier);
-#endif
        list_add_tail_rcu(&ndp->node, &ncsi_dev_list);
        spin_unlock_irqrestore(&ncsi_dev_lock, flags);
 
@@ -1896,10 +1824,6 @@ void ncsi_unregister_dev(struct ncsi_dev *nd)
 
        spin_lock_irqsave(&ncsi_dev_lock, flags);
        list_del_rcu(&ndp->node);
-#if IS_ENABLED(CONFIG_IPV6)
-       if (list_empty(&ncsi_dev_list))
-               unregister_inet6addr_notifier(&ncsi_inet6addr_notifier);
-#endif
        spin_unlock_irqrestore(&ncsi_dev_lock, flags);
 
        ncsi_unregister_netlink(nd->dev);
index 34ec7af..91efae8 100644 (file)
@@ -697,7 +697,7 @@ config NF_FLOW_TABLE_INET
        tristate "Netfilter flow table mixed IPv4/IPv6 module"
        depends on NF_FLOW_TABLE
        help
-          This option adds the flow table mixed IPv4/IPv6 support.
+         This option adds the flow table mixed IPv4/IPv6 support.
 
          To compile it as a module, choose M here.
 
index f6f1a0d..5b672e0 100644 (file)
@@ -135,7 +135,7 @@ config      IP_VS_WRR
          module, choose M here. If unsure, say N.
 
 config IP_VS_LC
-        tristate "least-connection scheduling"
+       tristate "least-connection scheduling"
        ---help---
          The least-connection scheduling algorithm directs network
          connections to the server with the least number of active 
@@ -145,7 +145,7 @@ config      IP_VS_LC
          module, choose M here. If unsure, say N.
 
 config IP_VS_WLC
-        tristate "weighted least-connection scheduling"
+       tristate "weighted least-connection scheduling"
        ---help---
          The weighted least-connection scheduling algorithm directs network
          connections to the server with the least active connections
@@ -333,7 +333,7 @@ config      IP_VS_NFCT
 
 config IP_VS_PE_SIP
        tristate "SIP persistence engine"
-        depends on IP_VS_PROTO_UDP
+       depends on IP_VS_PROTO_UDP
        depends on NF_CONNTRACK_SIP
        ---help---
          Allow persistence based on the SIP Call-ID
index 4515056..f9b16f2 100644 (file)
@@ -193,21 +193,29 @@ struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *
 
        mutex_lock(&__ip_vs_app_mutex);
 
+       /* increase the module use count */
+       if (!ip_vs_use_count_inc()) {
+               err = -ENOENT;
+               goto out_unlock;
+       }
+
        list_for_each_entry(a, &ipvs->app_list, a_list) {
                if (!strcmp(app->name, a->name)) {
                        err = -EEXIST;
+                       /* decrease the module use count */
+                       ip_vs_use_count_dec();
                        goto out_unlock;
                }
        }
        a = kmemdup(app, sizeof(*app), GFP_KERNEL);
        if (!a) {
                err = -ENOMEM;
+               /* decrease the module use count */
+               ip_vs_use_count_dec();
                goto out_unlock;
        }
        INIT_LIST_HEAD(&a->incs_list);
        list_add(&a->a_list, &ipvs->app_list);
-       /* increase the module use count */
-       ip_vs_use_count_inc();
 
 out_unlock:
        mutex_unlock(&__ip_vs_app_mutex);
index 8b48e7c..3cccc88 100644 (file)
@@ -93,7 +93,6 @@ static bool __ip_vs_addr_is_local_v6(struct net *net,
 static void update_defense_level(struct netns_ipvs *ipvs)
 {
        struct sysinfo i;
-       static int old_secure_tcp = 0;
        int availmem;
        int nomem;
        int to_change = -1;
@@ -174,35 +173,35 @@ static void update_defense_level(struct netns_ipvs *ipvs)
        spin_lock(&ipvs->securetcp_lock);
        switch (ipvs->sysctl_secure_tcp) {
        case 0:
-               if (old_secure_tcp >= 2)
+               if (ipvs->old_secure_tcp >= 2)
                        to_change = 0;
                break;
        case 1:
                if (nomem) {
-                       if (old_secure_tcp < 2)
+                       if (ipvs->old_secure_tcp < 2)
                                to_change = 1;
                        ipvs->sysctl_secure_tcp = 2;
                } else {
-                       if (old_secure_tcp >= 2)
+                       if (ipvs->old_secure_tcp >= 2)
                                to_change = 0;
                }
                break;
        case 2:
                if (nomem) {
-                       if (old_secure_tcp < 2)
+                       if (ipvs->old_secure_tcp < 2)
                                to_change = 1;
                } else {
-                       if (old_secure_tcp >= 2)
+                       if (ipvs->old_secure_tcp >= 2)
                                to_change = 0;
                        ipvs->sysctl_secure_tcp = 1;
                }
                break;
        case 3:
-               if (old_secure_tcp < 2)
+               if (ipvs->old_secure_tcp < 2)
                        to_change = 1;
                break;
        }
-       old_secure_tcp = ipvs->sysctl_secure_tcp;
+       ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp;
        if (to_change >= 0)
                ip_vs_protocol_timeout_change(ipvs,
                                              ipvs->sysctl_secure_tcp > 1);
@@ -1275,7 +1274,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
        struct ip_vs_service *svc = NULL;
 
        /* increase the module use count */
-       ip_vs_use_count_inc();
+       if (!ip_vs_use_count_inc())
+               return -ENOPROTOOPT;
 
        /* Lookup the scheduler by 'u->sched_name' */
        if (strcmp(u->sched_name, "none")) {
@@ -2435,9 +2435,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
        if (copy_from_user(arg, user, len) != 0)
                return -EFAULT;
 
-       /* increase the module use count */
-       ip_vs_use_count_inc();
-
        /* Handle daemons since they have another lock */
        if (cmd == IP_VS_SO_SET_STARTDAEMON ||
            cmd == IP_VS_SO_SET_STOPDAEMON) {
@@ -2450,13 +2447,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
                        ret = -EINVAL;
                        if (strscpy(cfg.mcast_ifn, dm->mcast_ifn,
                                    sizeof(cfg.mcast_ifn)) <= 0)
-                               goto out_dec;
+                               return ret;
                        cfg.syncid = dm->syncid;
                        ret = start_sync_thread(ipvs, &cfg, dm->state);
                } else {
                        ret = stop_sync_thread(ipvs, dm->state);
                }
-               goto out_dec;
+               return ret;
        }
 
        mutex_lock(&__ip_vs_mutex);
@@ -2551,10 +2548,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
   out_unlock:
        mutex_unlock(&__ip_vs_mutex);
-  out_dec:
-       /* decrease the module use count */
-       ip_vs_use_count_dec();
-
        return ret;
 }
 
index 8e104df..166c669 100644 (file)
@@ -68,7 +68,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
        struct ip_vs_pe *tmp;
 
        /* increase the module use count */
-       ip_vs_use_count_inc();
+       if (!ip_vs_use_count_inc())
+               return -ENOENT;
 
        mutex_lock(&ip_vs_pe_mutex);
        /* Make sure that the pe with this name doesn't exist
index 2f9d5cd..d490372 100644 (file)
@@ -179,7 +179,8 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
        }
 
        /* increase the module use count */
-       ip_vs_use_count_inc();
+       if (!ip_vs_use_count_inc())
+               return -ENOENT;
 
        mutex_lock(&ip_vs_sched_mutex);
 
index a4a78c4..8dc892a 100644 (file)
@@ -1762,6 +1762,10 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
        IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n",
                  sizeof(struct ip_vs_sync_conn_v0));
 
+       /* increase the module use count */
+       if (!ip_vs_use_count_inc())
+               return -ENOPROTOOPT;
+
        /* Do not hold one mutex and then to block on another */
        for (;;) {
                rtnl_lock();
@@ -1892,9 +1896,6 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
        mutex_unlock(&ipvs->sync_mutex);
        rtnl_unlock();
 
-       /* increase the module use count */
-       ip_vs_use_count_inc();
-
        return 0;
 
 out:
@@ -1924,11 +1925,17 @@ out:
                }
                kfree(ti);
        }
+
+       /* decrease the module use count */
+       ip_vs_use_count_dec();
        return result;
 
 out_early:
        mutex_unlock(&ipvs->sync_mutex);
        rtnl_unlock();
+
+       /* decrease the module use count */
+       ip_vs_use_count_dec();
        return result;
 }
 
index 9c464d2..888d306 100644 (file)
@@ -613,7 +613,7 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
        if (unlikely(cp->flags & IP_VS_CONN_F_NFCT))
                ret = ip_vs_confirm_conntrack(skb);
        if (ret == NF_ACCEPT) {
-               nf_reset(skb);
+               nf_reset_ct(skb);
                skb_forward_csum(skb);
        }
        return ret;
index 0c63120..5cd610b 100644 (file)
@@ -1792,8 +1792,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
        if (nf_ct_is_confirmed(ct))
                extra_jiffies += nfct_time_stamp;
 
-       if (ct->timeout != extra_jiffies)
-               ct->timeout = extra_jiffies;
+       if (READ_ONCE(ct->timeout) != extra_jiffies)
+               WRITE_ONCE(ct->timeout, extra_jiffies);
 acct:
        if (do_acct)
                nf_ct_acct_update(ct, ctinfo, skb->len);
index 132f522..128245e 100644 (file)
@@ -202,6 +202,8 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
 {
        int err;
 
+       flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+
        err = rhashtable_insert_fast(&flow_table->rhashtable,
                                     &flow->tuplehash[0].node,
                                     nf_flow_offload_rhash_params);
@@ -218,7 +220,6 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
                return err;
        }
 
-       flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
        return 0;
 }
 EXPORT_SYMBOL_GPL(flow_offload_add);
index e4a68dc..d481f9b 100644 (file)
@@ -1715,7 +1715,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
                goto err2;
        }
 
-       nft_trans_chain_policy(trans) = -1;
+       nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
        if (nft_is_base_chain(chain))
                nft_trans_chain_policy(trans) = policy;
 
@@ -3562,8 +3562,11 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
                              NFT_SET_OBJECT))
                        return -EINVAL;
                /* Only one of these operations is supported */
-               if ((flags & (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) ==
-                            (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT))
+               if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ==
+                            (NFT_SET_MAP | NFT_SET_OBJECT))
+                       return -EOPNOTSUPP;
+               if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) ==
+                            (NFT_SET_EVAL | NFT_SET_OBJECT))
                        return -EOPNOTSUPP;
        }
 
@@ -5595,6 +5598,22 @@ struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
 }
 EXPORT_SYMBOL_GPL(nft_flowtable_lookup);
 
+void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
+                                   struct nft_flowtable *flowtable,
+                                   enum nft_trans_phase phase)
+{
+       switch (phase) {
+       case NFT_TRANS_PREPARE:
+       case NFT_TRANS_ABORT:
+       case NFT_TRANS_RELEASE:
+               flowtable->use--;
+               /* fall through */
+       default:
+               return;
+       }
+}
+EXPORT_SYMBOL_GPL(nf_tables_deactivate_flowtable);
+
 static struct nft_flowtable *
 nft_flowtable_lookup_byhandle(const struct nft_table *table,
                              const struct nlattr *nla, u8 genmask)
index 21bb772..ad783f4 100644 (file)
@@ -313,7 +313,7 @@ static int nft_flow_offload_chain(struct nft_chain *chain,
        policy = ppolicy ? *ppolicy : basechain->policy;
 
        /* Only default policy to accept is supported for now. */
-       if (cmd == FLOW_BLOCK_BIND && policy != -1 && policy != NF_ACCEPT)
+       if (cmd == FLOW_BLOCK_BIND && policy == NF_DROP)
                return -EOPNOTSUPP;
 
        if (dev->netdev_ops->ndo_setup_tc)
@@ -347,7 +347,7 @@ int nft_flow_rule_offload_commit(struct net *net)
 
                        policy = nft_trans_chain_policy(trans);
                        err = nft_flow_offload_chain(trans->ctx.chain, &policy,
-                                                    FLOW_BLOCK_BIND);
+                                                    FLOW_BLOCK_UNBIND);
                        break;
                case NFT_MSG_NEWRULE:
                        if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
index af1497a..69d6173 100644 (file)
@@ -218,8 +218,13 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
 static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
 {
        struct nft_connlimit *priv = nft_expr_priv(expr);
+       bool ret;
 
-       return nf_conncount_gc_list(net, &priv->list);
+       local_bh_disable();
+       ret = nf_conncount_gc_list(net, &priv->list);
+       local_bh_enable();
+
+       return ret;
 }
 
 static struct nft_expr_type nft_connlimit_type;
index 22cf236..f29bbc7 100644 (file)
@@ -177,6 +177,23 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
        return nf_ct_netns_get(ctx->net, ctx->family);
 }
 
+static void nft_flow_offload_deactivate(const struct nft_ctx *ctx,
+                                       const struct nft_expr *expr,
+                                       enum nft_trans_phase phase)
+{
+       struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+       nf_tables_deactivate_flowtable(ctx, priv->flowtable, phase);
+}
+
+static void nft_flow_offload_activate(const struct nft_ctx *ctx,
+                                     const struct nft_expr *expr)
+{
+       struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+       priv->flowtable->use++;
+}
+
 static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
                                     const struct nft_expr *expr)
 {
@@ -205,6 +222,8 @@ static const struct nft_expr_ops nft_flow_offload_ops = {
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)),
        .eval           = nft_flow_offload_eval,
        .init           = nft_flow_offload_init,
+       .activate       = nft_flow_offload_activate,
+       .deactivate     = nft_flow_offload_deactivate,
        .destroy        = nft_flow_offload_destroy,
        .validate       = nft_flow_offload_validate,
        .dump           = nft_flow_offload_dump,
index c0560bf..660bad6 100644 (file)
@@ -73,9 +73,6 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
        if (IS_ERR(set))
                return PTR_ERR(set);
 
-       if (set->flags & NFT_SET_EVAL)
-               return -EOPNOTSUPP;
-
        priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]);
        err = nft_validate_register_load(priv->sreg, set->klen);
        if (err < 0)
index 22a80eb..5cb2d89 100644 (file)
@@ -161,13 +161,21 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
 
        switch (priv->offset) {
        case offsetof(struct ethhdr, h_source):
+               if (priv->len != ETH_ALEN)
+                       return -EOPNOTSUPP;
+
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs,
                                  src, ETH_ALEN, reg);
                break;
        case offsetof(struct ethhdr, h_dest):
+               if (priv->len != ETH_ALEN)
+                       return -EOPNOTSUPP;
+
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs,
                                  dst, ETH_ALEN, reg);
                break;
+       default:
+               return -EOPNOTSUPP;
        }
 
        return 0;
@@ -181,14 +189,23 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx,
 
        switch (priv->offset) {
        case offsetof(struct iphdr, saddr):
+               if (priv->len != sizeof(struct in_addr))
+                       return -EOPNOTSUPP;
+
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, src,
                                  sizeof(struct in_addr), reg);
                break;
        case offsetof(struct iphdr, daddr):
+               if (priv->len != sizeof(struct in_addr))
+                       return -EOPNOTSUPP;
+
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, dst,
                                  sizeof(struct in_addr), reg);
                break;
        case offsetof(struct iphdr, protocol):
+               if (priv->len != sizeof(__u8))
+                       return -EOPNOTSUPP;
+
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
                                  sizeof(__u8), reg);
                nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
@@ -208,14 +225,23 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx,
 
        switch (priv->offset) {
        case offsetof(struct ipv6hdr, saddr):
+               if (priv->len != sizeof(struct in6_addr))
+                       return -EOPNOTSUPP;
+
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, src,
                                  sizeof(struct in6_addr), reg);
                break;
        case offsetof(struct ipv6hdr, daddr):
+               if (priv->len != sizeof(struct in6_addr))
+                       return -EOPNOTSUPP;
+
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, dst,
                                  sizeof(struct in6_addr), reg);
                break;
        case offsetof(struct ipv6hdr, nexthdr):
+               if (priv->len != sizeof(__u8))
+                       return -EOPNOTSUPP;
+
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
                                  sizeof(__u8), reg);
                nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
@@ -255,10 +281,16 @@ static int nft_payload_offload_tcp(struct nft_offload_ctx *ctx,
 
        switch (priv->offset) {
        case offsetof(struct tcphdr, source):
+               if (priv->len != sizeof(__be16))
+                       return -EOPNOTSUPP;
+
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src,
                                  sizeof(__be16), reg);
                break;
        case offsetof(struct tcphdr, dest):
+               if (priv->len != sizeof(__be16))
+                       return -EOPNOTSUPP;
+
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst,
                                  sizeof(__be16), reg);
                break;
@@ -277,10 +309,16 @@ static int nft_payload_offload_udp(struct nft_offload_ctx *ctx,
 
        switch (priv->offset) {
        case offsetof(struct udphdr, source):
+               if (priv->len != sizeof(__be16))
+                       return -EOPNOTSUPP;
+
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src,
                                  sizeof(__be16), reg);
                break;
        case offsetof(struct udphdr, dest):
+               if (priv->len != sizeof(__be16))
+                       return -EOPNOTSUPP;
+
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst,
                                  sizeof(__be16), reg);
                break;
index c4f54ad..58d5373 100644 (file)
@@ -63,28 +63,6 @@ static DEFINE_SPINLOCK(nr_list_lock);
 
 static const struct proto_ops nr_proto_ops;
 
-/*
- * NETROM network devices are virtual network devices encapsulating NETROM
- * frames into AX.25 which will be sent through an AX.25 device, so form a
- * special "super class" of normal net devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key nr_netdev_xmit_lock_key;
-static struct lock_class_key nr_netdev_addr_lock_key;
-
-static void nr_set_lockdep_one(struct net_device *dev,
-                              struct netdev_queue *txq,
-                              void *_unused)
-{
-       lockdep_set_class(&txq->_xmit_lock, &nr_netdev_xmit_lock_key);
-}
-
-static void nr_set_lockdep_key(struct net_device *dev)
-{
-       lockdep_set_class(&dev->addr_list_lock, &nr_netdev_addr_lock_key);
-       netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL);
-}
-
 /*
  *     Socket removal during an interrupt is now safe.
  */
@@ -1414,7 +1392,6 @@ static int __init nr_proto_init(void)
                        free_netdev(dev);
                        goto fail;
                }
-               nr_set_lockdep_key(dev);
                dev_nr[i] = dev;
        }
 
index 9b87429..2860441 100644 (file)
@@ -107,9 +107,14 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
        llcp_sock->service_name = kmemdup(llcp_addr.service_name,
                                          llcp_sock->service_name_len,
                                          GFP_KERNEL);
-
+       if (!llcp_sock->service_name) {
+               ret = -ENOMEM;
+               goto put_dev;
+       }
        llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock);
        if (llcp_sock->ssap == LLCP_SAP_MAX) {
+               kfree(llcp_sock->service_name);
+               llcp_sock->service_name = NULL;
                ret = -EADDRINUSE;
                goto put_dev;
        }
@@ -549,11 +554,11 @@ static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
        if (sk->sk_state == LLCP_LISTEN)
                return llcp_accept_poll(sk);
 
-       if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+       if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
                mask |= EPOLLERR |
                        (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
 
-       if (!skb_queue_empty(&sk->sk_receive_queue))
+       if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
                mask |= EPOLLIN | EPOLLRDNORM;
 
        if (sk->sk_state == LLCP_CLOSED)
@@ -1004,10 +1009,13 @@ static int llcp_sock_create(struct net *net, struct socket *sock,
            sock->type != SOCK_RAW)
                return -ESOCKTNOSUPPORT;
 
-       if (sock->type == SOCK_RAW)
+       if (sock->type == SOCK_RAW) {
+               if (!capable(CAP_NET_RAW))
+                       return -EPERM;
                sock->ops = &llcp_rawsock_ops;
-       else
+       } else {
                sock->ops = &llcp_sock_ops;
+       }
 
        sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC, kern);
        if (sk == NULL)
index 3572e11..1c77f52 100644 (file)
@@ -165,7 +165,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 {
        int err;
 
-       err = skb_mpls_push(skb, mpls->mpls_lse, mpls->mpls_ethertype);
+       err = skb_mpls_push(skb, mpls->mpls_lse, mpls->mpls_ethertype,
+                           skb->mac_len);
        if (err)
                return err;
 
@@ -178,7 +179,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 {
        int err;
 
-       err = skb_mpls_pop(skb, ethertype);
+       err = skb_mpls_pop(skb, ethertype, skb->mac_len);
        if (err)
                return err;
 
index dde9d76..d8c364d 100644 (file)
@@ -1881,7 +1881,7 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = {
 /* Called with ovs_mutex or RCU read lock. */
 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
                                   struct net *net, u32 portid, u32 seq,
-                                  u32 flags, u8 cmd)
+                                  u32 flags, u8 cmd, gfp_t gfp)
 {
        struct ovs_header *ovs_header;
        struct ovs_vport_stats vport_stats;
@@ -1902,7 +1902,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
                goto nla_put_failure;
 
        if (!net_eq(net, dev_net(vport->dev))) {
-               int id = peernet2id_alloc(net, dev_net(vport->dev));
+               int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
 
                if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
                        goto nla_put_failure;
@@ -1943,11 +1943,12 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
        struct sk_buff *skb;
        int retval;
 
-       skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+       skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!skb)
                return ERR_PTR(-ENOMEM);
 
-       retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
+       retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
+                                        GFP_KERNEL);
        BUG_ON(retval < 0);
 
        return skb;
@@ -2089,7 +2090,7 @@ restart:
 
        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
                                      info->snd_portid, info->snd_seq, 0,
-                                     OVS_VPORT_CMD_NEW);
+                                     OVS_VPORT_CMD_NEW, GFP_KERNEL);
 
        new_headroom = netdev_get_fwd_headroom(vport->dev);
 
@@ -2150,7 +2151,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 
        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
                                      info->snd_portid, info->snd_seq, 0,
-                                     OVS_VPORT_CMD_SET);
+                                     OVS_VPORT_CMD_SET, GFP_KERNEL);
        BUG_ON(err < 0);
 
        ovs_unlock();
@@ -2190,7 +2191,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 
        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
                                      info->snd_portid, info->snd_seq, 0,
-                                     OVS_VPORT_CMD_DEL);
+                                     OVS_VPORT_CMD_DEL, GFP_KERNEL);
        BUG_ON(err < 0);
 
        /* the vport deletion may trigger dp headroom update */
@@ -2237,7 +2238,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
                goto exit_unlock_free;
        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
                                      info->snd_portid, info->snd_seq, 0,
-                                     OVS_VPORT_CMD_GET);
+                                     OVS_VPORT_CMD_GET, GFP_ATOMIC);
        BUG_ON(err < 0);
        rcu_read_unlock();
 
@@ -2273,7 +2274,8 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
                                                    NETLINK_CB(cb->skb).portid,
                                                    cb->nlh->nlmsg_seq,
                                                    NLM_F_MULTI,
-                                                   OVS_VPORT_CMD_GET) < 0)
+                                                   OVS_VPORT_CMD_GET,
+                                                   GFP_ATOMIC) < 0)
                                goto out;
 
                        j++;
@@ -2294,7 +2296,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
        [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
        [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
-       [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+       [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
        [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
        [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
index d2437b5..58a7b83 100644 (file)
@@ -137,7 +137,7 @@ static void do_setup(struct net_device *netdev)
        netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
                              IFF_NO_QUEUE;
        netdev->needs_free_netdev = true;
-       netdev->priv_destructor = internal_dev_destructor;
+       netdev->priv_destructor = NULL;
        netdev->ethtool_ops = &internal_dev_ethtool_ops;
        netdev->rtnl_link_ops = &internal_dev_link_ops;
 
@@ -159,7 +159,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
        struct internal_dev *internal_dev;
        struct net_device *dev;
        int err;
-       bool free_vport = true;
 
        vport = ovs_vport_alloc(0, &ovs_internal_vport_ops, parms);
        if (IS_ERR(vport)) {
@@ -190,10 +189,9 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
 
        rtnl_lock();
        err = register_netdevice(vport->dev);
-       if (err) {
-               free_vport = false;
+       if (err)
                goto error_unlock;
-       }
+       vport->dev->priv_destructor = internal_dev_destructor;
 
        dev_set_promiscuity(vport->dev, 1);
        rtnl_unlock();
@@ -207,8 +205,7 @@ error_unlock:
 error_free_netdev:
        free_netdev(dev);
 error_free_vport:
-       if (free_vport)
-               ovs_vport_free(vport);
+       ovs_vport_free(vport);
 error:
        return ERR_PTR(err);
 }
@@ -237,7 +234,7 @@ static netdev_tx_t internal_dev_recv(struct sk_buff *skb)
        }
 
        skb_dst_drop(skb);
-       nf_reset(skb);
+       nf_reset_ct(skb);
        secpath_reset(skb);
 
        skb->pkt_type = PACKET_HOST;
index e2742b0..82a50e8 100644 (file)
@@ -1821,7 +1821,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
        skb_dst_drop(skb);
 
        /* drop conntrack reference */
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        spkt = &PACKET_SKB_CB(skb)->sa.pkt;
 
@@ -2121,7 +2121,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
        skb_dst_drop(skb);
 
        /* drop conntrack reference */
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        spin_lock(&sk->sk_receive_queue.lock);
        po->stats.stats1.tp_packets++;
index 96ea9f2..76d499f 100644 (file)
@@ -338,9 +338,9 @@ static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
 
        if (sk->sk_state == TCP_CLOSE)
                return EPOLLERR;
-       if (!skb_queue_empty(&sk->sk_receive_queue))
+       if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
                mask |= EPOLLIN | EPOLLRDNORM;
-       if (!skb_queue_empty(&pn->ctrlreq_queue))
+       if (!skb_queue_empty_lockless(&pn->ctrlreq_queue))
                mask |= EPOLLPRI;
        if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
                return EPOLLHUP;
index 6c8b0f6..88f98f2 100644 (file)
@@ -150,6 +150,7 @@ static void __qrtr_node_release(struct kref *kref)
        list_del(&node->item);
        mutex_unlock(&qrtr_node_lock);
 
+       cancel_work_sync(&node->work);
        skb_queue_purge(&node->rx_queue);
        kfree(node);
 }
index 38ea7f0..c64e154 100644 (file)
@@ -23,6 +23,6 @@ config RDS_TCP
          This transport does not support RDMA operations.
 
 config RDS_DEBUG
-        bool "RDS debugging messages"
+       bool "RDS debugging messages"
        depends on RDS
-        default n
+       default n
index 20c156a..5b5fb4c 100644 (file)
@@ -244,7 +244,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
         */
        if (rs->rs_transport) {
                trans = rs->rs_transport;
-               if (trans->laddr_check(sock_net(sock->sk),
+               if (!trans->laddr_check ||
+                   trans->laddr_check(sock_net(sock->sk),
                                       binding_addr, scope_id) != 0) {
                        ret = -ENOPROTOOPT;
                        goto out;
@@ -263,6 +264,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
        sock_set_flag(sk, SOCK_RCU_FREE);
        ret = rds_add_bound(rs, binding_addr, &port, scope_id);
+       if (ret)
+               rs->rs_transport = NULL;
 
 out:
        release_sock(sk);
index 45acab2..9de2ae2 100644 (file)
@@ -143,6 +143,9 @@ static void rds_ib_add_one(struct ib_device *device)
        refcount_set(&rds_ibdev->refcount, 1);
        INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
 
+       INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
+       INIT_LIST_HEAD(&rds_ibdev->conn_list);
+
        rds_ibdev->max_wrs = device->attrs.max_qp_wr;
        rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE);
 
@@ -203,9 +206,6 @@ static void rds_ib_add_one(struct ib_device *device)
                device->name,
                rds_ibdev->use_fastreg ? "FRMR" : "FMR");
 
-       INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
-       INIT_LIST_HEAD(&rds_ibdev->conn_list);
-
        down_write(&rds_ib_devices_lock);
        list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
        up_write(&rds_ib_devices_lock);
index f0e9ccf..6a0df7c 100644 (file)
@@ -64,28 +64,6 @@ static const struct proto_ops rose_proto_ops;
 
 ax25_address rose_callsign;
 
-/*
- * ROSE network devices are virtual network devices encapsulating ROSE
- * frames into AX.25 which will be sent through an AX.25 device, so form a
- * special "super class" of normal net devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key rose_netdev_xmit_lock_key;
-static struct lock_class_key rose_netdev_addr_lock_key;
-
-static void rose_set_lockdep_one(struct net_device *dev,
-                                struct netdev_queue *txq,
-                                void *_unused)
-{
-       lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key);
-}
-
-static void rose_set_lockdep_key(struct net_device *dev)
-{
-       lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key);
-       netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL);
-}
-
 /*
  *     Convert a ROSE address into text.
  */
@@ -1533,7 +1511,6 @@ static int __init rose_proto_init(void)
                        free_netdev(dev);
                        goto fail;
                }
-               rose_set_lockdep_key(dev);
                dev_rose[i] = dev;
        }
 
index 1091bf3..7c7d10f 100644 (file)
@@ -556,6 +556,7 @@ struct rxrpc_call {
        struct rxrpc_peer       *peer;          /* Peer record for remote address */
        struct rxrpc_sock __rcu *socket;        /* socket responsible */
        struct rxrpc_net        *rxnet;         /* Network namespace to which call belongs */
+       const struct rxrpc_security *security;  /* applied security module */
        struct mutex            user_mutex;     /* User access mutex */
        unsigned long           ack_at;         /* When deferred ACK needs to happen */
        unsigned long           ack_lost_at;    /* When ACK is figured as lost */
@@ -600,6 +601,7 @@ struct rxrpc_call {
        int                     debug_id;       /* debug ID for printks */
        unsigned short          rx_pkt_offset;  /* Current recvmsg packet offset */
        unsigned short          rx_pkt_len;     /* Current recvmsg packet len */
+       bool                    rx_pkt_last;    /* Current recvmsg packet is last */
 
        /* Rx/Tx circular buffer, depending on phase.
         *
index 00c095d..135bf5c 100644 (file)
@@ -84,7 +84,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
                smp_store_release(&b->conn_backlog_head,
                                  (head + 1) & (size - 1));
 
-               trace_rxrpc_conn(conn, rxrpc_conn_new_service,
+               trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
                                 atomic_read(&conn->usage), here);
        }
 
@@ -97,7 +97,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
        call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
        call->state = RXRPC_CALL_SERVER_PREALLOC;
 
-       trace_rxrpc_call(call, rxrpc_call_new_service,
+       trace_rxrpc_call(call->debug_id, rxrpc_call_new_service,
                         atomic_read(&call->usage),
                         here, (const void *)user_call_ID);
 
@@ -307,6 +307,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
 
        rxrpc_see_call(call);
        call->conn = conn;
+       call->security = conn->security;
        call->peer = rxrpc_get_peer(conn->params.peer);
        call->cong_cwnd = call->peer->cong_cwnd;
        return call;
index 32d8dc6..a31c18c 100644 (file)
@@ -240,7 +240,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
        if (p->intr)
                __set_bit(RXRPC_CALL_IS_INTR, &call->flags);
        call->tx_total_len = p->tx_total_len;
-       trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
+       trace_rxrpc_call(call->debug_id, rxrpc_call_new_client,
+                        atomic_read(&call->usage),
                         here, (const void *)p->user_call_ID);
 
        /* We need to protect a partially set up call against the user as we
@@ -290,8 +291,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
        if (ret < 0)
                goto error;
 
-       trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
-                        here, NULL);
+       trace_rxrpc_call(call->debug_id, rxrpc_call_connected,
+                        atomic_read(&call->usage), here, NULL);
 
        rxrpc_start_call_timer(call);
 
@@ -313,8 +314,8 @@ error_dup_user_ID:
 error:
        __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
                                    RX_CALL_DEAD, ret);
-       trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
-                        here, ERR_PTR(ret));
+       trace_rxrpc_call(call->debug_id, rxrpc_call_error,
+                        atomic_read(&call->usage), here, ERR_PTR(ret));
        rxrpc_release_call(rx, call);
        mutex_unlock(&call->user_mutex);
        rxrpc_put_call(call, rxrpc_call_put);
@@ -376,7 +377,8 @@ bool rxrpc_queue_call(struct rxrpc_call *call)
        if (n == 0)
                return false;
        if (rxrpc_queue_work(&call->processor))
-               trace_rxrpc_call(call, rxrpc_call_queued, n + 1, here, NULL);
+               trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1,
+                                here, NULL);
        else
                rxrpc_put_call(call, rxrpc_call_put_noqueue);
        return true;
@@ -391,7 +393,8 @@ bool __rxrpc_queue_call(struct rxrpc_call *call)
        int n = atomic_read(&call->usage);
        ASSERTCMP(n, >=, 1);
        if (rxrpc_queue_work(&call->processor))
-               trace_rxrpc_call(call, rxrpc_call_queued_ref, n, here, NULL);
+               trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n,
+                                here, NULL);
        else
                rxrpc_put_call(call, rxrpc_call_put_noqueue);
        return true;
@@ -406,7 +409,8 @@ void rxrpc_see_call(struct rxrpc_call *call)
        if (call) {
                int n = atomic_read(&call->usage);
 
-               trace_rxrpc_call(call, rxrpc_call_seen, n, here, NULL);
+               trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n,
+                                here, NULL);
        }
 }
 
@@ -418,7 +422,7 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
        const void *here = __builtin_return_address(0);
        int n = atomic_inc_return(&call->usage);
 
-       trace_rxrpc_call(call, op, n, here, NULL);
+       trace_rxrpc_call(call->debug_id, op, n, here, NULL);
 }
 
 /*
@@ -445,7 +449,8 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
 
        _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
 
-       trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage),
+       trace_rxrpc_call(call->debug_id, rxrpc_call_release,
+                        atomic_read(&call->usage),
                         here, (const void *)call->flags);
 
        ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
@@ -488,10 +493,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
 
        _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
 
-       if (conn) {
+       if (conn)
                rxrpc_disconnect_call(call);
-               conn->security->free_call_crypto(call);
-       }
+       if (call->security)
+               call->security->free_call_crypto(call);
 
        rxrpc_cleanup_ring(call);
        _leave("");
@@ -534,12 +539,13 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
 {
        struct rxrpc_net *rxnet = call->rxnet;
        const void *here = __builtin_return_address(0);
+       unsigned int debug_id = call->debug_id;
        int n;
 
        ASSERT(call != NULL);
 
        n = atomic_dec_return(&call->usage);
-       trace_rxrpc_call(call, op, n, here, NULL);
+       trace_rxrpc_call(debug_id, op, n, here, NULL);
        ASSERTCMP(n, >=, 0);
        if (n == 0) {
                _debug("call %d dead", call->debug_id);
index 3f1da1b..376370c 100644 (file)
@@ -212,7 +212,8 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
        rxrpc_get_local(conn->params.local);
        key_get(conn->params.key);
 
-       trace_rxrpc_conn(conn, rxrpc_conn_new_client, atomic_read(&conn->usage),
+       trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client,
+                        atomic_read(&conn->usage),
                         __builtin_return_address(0));
        trace_rxrpc_client(conn, -1, rxrpc_client_alloc);
        _leave(" = %p", conn);
@@ -352,6 +353,7 @@ static int rxrpc_get_client_conn(struct rxrpc_sock *rx,
 
        if (cp->exclusive) {
                call->conn = candidate;
+               call->security = candidate->security;
                call->security_ix = candidate->security_ix;
                call->service_id = candidate->service_id;
                _leave(" = 0 [exclusive %d]", candidate->debug_id);
@@ -403,6 +405,7 @@ static int rxrpc_get_client_conn(struct rxrpc_sock *rx,
 candidate_published:
        set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
        call->conn = candidate;
+       call->security = candidate->security;
        call->security_ix = candidate->security_ix;
        call->service_id = candidate->service_id;
        spin_unlock(&local->client_conns_lock);
@@ -425,6 +428,7 @@ found_extant_conn:
 
        spin_lock(&conn->channel_lock);
        call->conn = conn;
+       call->security = conn->security;
        call->security_ix = conn->security_ix;
        call->service_id = conn->service_id;
        list_add_tail(&call->chan_wait_link, &conn->waiting_calls);
@@ -985,11 +989,12 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn)
 void rxrpc_put_client_conn(struct rxrpc_connection *conn)
 {
        const void *here = __builtin_return_address(0);
+       unsigned int debug_id = conn->debug_id;
        int n;
 
        do {
                n = atomic_dec_return(&conn->usage);
-               trace_rxrpc_conn(conn, rxrpc_conn_put_client, n, here);
+               trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, n, here);
                if (n > 0)
                        return;
                ASSERTCMP(n, >=, 0);
index ed05b69..38d718e 100644 (file)
@@ -269,7 +269,7 @@ bool rxrpc_queue_conn(struct rxrpc_connection *conn)
        if (n == 0)
                return false;
        if (rxrpc_queue_work(&conn->processor))
-               trace_rxrpc_conn(conn, rxrpc_conn_queued, n + 1, here);
+               trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, n + 1, here);
        else
                rxrpc_put_connection(conn);
        return true;
@@ -284,7 +284,7 @@ void rxrpc_see_connection(struct rxrpc_connection *conn)
        if (conn) {
                int n = atomic_read(&conn->usage);
 
-               trace_rxrpc_conn(conn, rxrpc_conn_seen, n, here);
+               trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here);
        }
 }
 
@@ -296,7 +296,7 @@ void rxrpc_get_connection(struct rxrpc_connection *conn)
        const void *here = __builtin_return_address(0);
        int n = atomic_inc_return(&conn->usage);
 
-       trace_rxrpc_conn(conn, rxrpc_conn_got, n, here);
+       trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n, here);
 }
 
 /*
@@ -310,7 +310,7 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
        if (conn) {
                int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
                if (n > 0)
-                       trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here);
+                       trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n + 1, here);
                else
                        conn = NULL;
        }
@@ -333,10 +333,11 @@ static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
 void rxrpc_put_service_conn(struct rxrpc_connection *conn)
 {
        const void *here = __builtin_return_address(0);
+       unsigned int debug_id = conn->debug_id;
        int n;
 
        n = atomic_dec_return(&conn->usage);
-       trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here);
+       trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, n, here);
        ASSERTCMP(n, >=, 0);
        if (n == 1)
                rxrpc_set_service_reap_timer(conn->params.local->rxnet,
@@ -420,7 +421,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
                 */
                if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
                        continue;
-               trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, NULL);
+               trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL);
 
                if (rxrpc_conn_is_client(conn))
                        BUG();
index b30e13f..123d6ce 100644 (file)
@@ -134,7 +134,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
                list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
                write_unlock(&rxnet->conn_lock);
 
-               trace_rxrpc_conn(conn, rxrpc_conn_new_service,
+               trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
                                 atomic_read(&conn->usage),
                                 __builtin_return_address(0));
        }
index c97ebdc..48f67a9 100644 (file)
@@ -147,10 +147,16 @@ void rxrpc_error_report(struct sock *sk)
 {
        struct sock_exterr_skb *serr;
        struct sockaddr_rxrpc srx;
-       struct rxrpc_local *local = sk->sk_user_data;
+       struct rxrpc_local *local;
        struct rxrpc_peer *peer;
        struct sk_buff *skb;
 
+       rcu_read_lock();
+       local = rcu_dereference_sk_user_data(sk);
+       if (unlikely(!local)) {
+               rcu_read_unlock();
+               return;
+       }
        _enter("%p{%d}", sk, local->debug_id);
 
        /* Clear the outstanding error value on the socket so that it doesn't
@@ -160,6 +166,7 @@ void rxrpc_error_report(struct sock *sk)
 
        skb = sock_dequeue_err_skb(sk);
        if (!skb) {
+               rcu_read_unlock();
                _leave("UDP socket errqueue empty");
                return;
        }
@@ -167,11 +174,11 @@ void rxrpc_error_report(struct sock *sk)
        serr = SKB_EXT_ERR(skb);
        if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
                _leave("UDP empty message");
+               rcu_read_unlock();
                rxrpc_free_skb(skb, rxrpc_skb_freed);
                return;
        }
 
-       rcu_read_lock();
        peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx);
        if (peer && !rxrpc_get_peer_maybe(peer))
                peer = NULL;
index 9c3ac96..64830d8 100644 (file)
@@ -216,7 +216,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
        peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
        if (peer) {
                atomic_set(&peer->usage, 1);
-               peer->local = local;
+               peer->local = rxrpc_get_local(local);
                INIT_HLIST_HEAD(&peer->error_targets);
                peer->service_conns = RB_ROOT;
                seqlock_init(&peer->service_conn_lock);
@@ -307,7 +307,6 @@ void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local,
        unsigned long hash_key;
 
        hash_key = rxrpc_peer_hash_key(local, &peer->srx);
-       peer->local = local;
        rxrpc_init_peer(rx, peer, hash_key);
 
        spin_lock(&rxnet->peer_hash_lock);
@@ -382,7 +381,7 @@ struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
        int n;
 
        n = atomic_inc_return(&peer->usage);
-       trace_rxrpc_peer(peer, rxrpc_peer_got, n, here);
+       trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n, here);
        return peer;
 }
 
@@ -396,7 +395,7 @@ struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
        if (peer) {
                int n = atomic_fetch_add_unless(&peer->usage, 1, 0);
                if (n > 0)
-                       trace_rxrpc_peer(peer, rxrpc_peer_got, n + 1, here);
+                       trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n + 1, here);
                else
                        peer = NULL;
        }
@@ -417,6 +416,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
        list_del_init(&peer->keepalive_link);
        spin_unlock_bh(&rxnet->peer_hash_lock);
 
+       rxrpc_put_local(peer->local);
        kfree_rcu(peer, rcu);
 }
 
@@ -426,11 +426,13 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
 void rxrpc_put_peer(struct rxrpc_peer *peer)
 {
        const void *here = __builtin_return_address(0);
+       unsigned int debug_id;
        int n;
 
        if (peer) {
+               debug_id = peer->debug_id;
                n = atomic_dec_return(&peer->usage);
-               trace_rxrpc_peer(peer, rxrpc_peer_put, n, here);
+               trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
                if (n == 0)
                        __rxrpc_put_peer(peer);
        }
@@ -443,13 +445,15 @@ void rxrpc_put_peer(struct rxrpc_peer *peer)
 void rxrpc_put_peer_locked(struct rxrpc_peer *peer)
 {
        const void *here = __builtin_return_address(0);
+       unsigned int debug_id = peer->debug_id;
        int n;
 
        n = atomic_dec_return(&peer->usage);
-       trace_rxrpc_peer(peer, rxrpc_peer_put, n, here);
+       trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
        if (n == 0) {
                hash_del_rcu(&peer->hash_link);
                list_del_init(&peer->keepalive_link);
+               rxrpc_put_local(peer->local);
                kfree_rcu(peer, rcu);
        }
 }
index 3b0becb..8578c39 100644 (file)
@@ -251,8 +251,8 @@ static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
                seq += subpacket;
        }
 
-       return call->conn->security->verify_packet(call, skb, offset, len,
-                                                  seq, cksum);
+       return call->security->verify_packet(call, skb, offset, len,
+                                            seq, cksum);
 }
 
 /*
@@ -267,11 +267,13 @@ static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
  */
 static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
                             u8 *_annotation,
-                            unsigned int *_offset, unsigned int *_len)
+                            unsigned int *_offset, unsigned int *_len,
+                            bool *_last)
 {
        struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
        unsigned int offset = sizeof(struct rxrpc_wire_header);
        unsigned int len;
+       bool last = false;
        int ret;
        u8 annotation = *_annotation;
        u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET;
@@ -281,6 +283,8 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
        len = skb->len - offset;
        if (subpacket < sp->nr_subpackets - 1)
                len = RXRPC_JUMBO_DATALEN;
+       else if (sp->rx_flags & RXRPC_SKB_INCL_LAST)
+               last = true;
 
        if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) {
                ret = rxrpc_verify_packet(call, skb, annotation, offset, len);
@@ -291,7 +295,8 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
 
        *_offset = offset;
        *_len = len;
-       call->conn->security->locate_data(call, skb, _offset, _len);
+       *_last = last;
+       call->security->locate_data(call, skb, _offset, _len);
        return 0;
 }
 
@@ -309,7 +314,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
        rxrpc_serial_t serial;
        rxrpc_seq_t hard_ack, top, seq;
        size_t remain;
-       bool last;
+       bool rx_pkt_last;
        unsigned int rx_pkt_offset, rx_pkt_len;
        int ix, copy, ret = -EAGAIN, ret2;
 
@@ -319,6 +324,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
 
        rx_pkt_offset = call->rx_pkt_offset;
        rx_pkt_len = call->rx_pkt_len;
+       rx_pkt_last = call->rx_pkt_last;
 
        if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) {
                seq = call->rx_hard_ack;
@@ -329,6 +335,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
        /* Barriers against rxrpc_input_data(). */
        hard_ack = call->rx_hard_ack;
        seq = hard_ack + 1;
+
        while (top = smp_load_acquire(&call->rx_top),
               before_eq(seq, top)
               ) {
@@ -356,7 +363,8 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
                if (rx_pkt_offset == 0) {
                        ret2 = rxrpc_locate_data(call, skb,
                                                 &call->rxtx_annotations[ix],
-                                                &rx_pkt_offset, &rx_pkt_len);
+                                                &rx_pkt_offset, &rx_pkt_len,
+                                                &rx_pkt_last);
                        trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq,
                                            rx_pkt_offset, rx_pkt_len, ret2);
                        if (ret2 < 0) {
@@ -396,13 +404,12 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
                }
 
                /* The whole packet has been transferred. */
-               last = sp->hdr.flags & RXRPC_LAST_PACKET;
                if (!(flags & MSG_PEEK))
                        rxrpc_rotate_rx_window(call);
                rx_pkt_offset = 0;
                rx_pkt_len = 0;
 
-               if (last) {
+               if (rx_pkt_last) {
                        ASSERTCMP(seq, ==, READ_ONCE(call->rx_top));
                        ret = 1;
                        goto out;
@@ -415,6 +422,7 @@ out:
        if (!(flags & MSG_PEEK)) {
                call->rx_pkt_offset = rx_pkt_offset;
                call->rx_pkt_len = rx_pkt_len;
+               call->rx_pkt_last = rx_pkt_last;
        }
 done:
        trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq,
index 6a1547b..813fd68 100644 (file)
@@ -419,7 +419,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
                                 call->tx_winsize)
                                sp->hdr.flags |= RXRPC_MORE_PACKETS;
 
-                       ret = conn->security->secure_packet(
+                       ret = call->security->secure_packet(
                                call, skb, skb->mark, skb->head);
                        if (ret < 0)
                                goto out;
@@ -661,6 +661,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
                case RXRPC_CALL_SERVER_PREALLOC:
                case RXRPC_CALL_SERVER_SECURING:
                case RXRPC_CALL_SERVER_ACCEPTING:
+                       rxrpc_put_call(call, rxrpc_call_put);
                        ret = -EBUSY;
                        goto error_release_sock;
                default:
index b3faafe..2985509 100644 (file)
@@ -324,7 +324,7 @@ config NET_SCH_CAKE
        tristate "Common Applications Kept Enhanced (CAKE)"
        help
          Say Y here if you want to use the Common Applications Kept Enhanced
-          (CAKE) queue management algorithm.
+         (CAKE) queue management algorithm.
 
          To compile this driver as a module, choose M here: the module
          will be called sch_cake.
@@ -730,8 +730,8 @@ config NET_CLS_ACT
 
 config NET_ACT_POLICE
        tristate "Traffic Policing"
-        depends on NET_CLS_ACT
-        ---help---
+       depends on NET_CLS_ACT
+       ---help---
          Say Y here if you want to do traffic policing, i.e. strict
          bandwidth limiting. This action replaces the existing policing
          module.
@@ -740,9 +740,9 @@ config NET_ACT_POLICE
          module will be called act_police.
 
 config NET_ACT_GACT
-        tristate "Generic actions"
-        depends on NET_CLS_ACT
-        ---help---
+       tristate "Generic actions"
+       depends on NET_CLS_ACT
+       ---help---
          Say Y here to take generic actions such as dropping and
          accepting packets.
 
@@ -750,15 +750,15 @@ config NET_ACT_GACT
          module will be called act_gact.
 
 config GACT_PROB
-        bool "Probability support"
-        depends on NET_ACT_GACT
-        ---help---
+       bool "Probability support"
+       depends on NET_ACT_GACT
+       ---help---
          Say Y here to use the generic action randomly or deterministically.
 
 config NET_ACT_MIRRED
-        tristate "Redirecting and Mirroring"
-        depends on NET_CLS_ACT
-        ---help---
+       tristate "Redirecting and Mirroring"
+       depends on NET_CLS_ACT
+       ---help---
          Say Y here to allow packets to be mirrored or redirected to
          other devices.
 
@@ -766,10 +766,10 @@ config NET_ACT_MIRRED
          module will be called act_mirred.
 
 config NET_ACT_SAMPLE
-        tristate "Traffic Sampling"
-        depends on NET_CLS_ACT
-        select PSAMPLE
-        ---help---
+       tristate "Traffic Sampling"
+       depends on NET_CLS_ACT
+       select PSAMPLE
+       ---help---
          Say Y here to allow packet sampling tc action. The packet sample
          action consists of statistically choosing packets and sampling
          them using the psample module.
@@ -778,9 +778,9 @@ config NET_ACT_SAMPLE
          module will be called act_sample.
 
 config NET_ACT_IPT
-        tristate "IPtables targets"
-        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
-        ---help---
+       tristate "IPtables targets"
+       depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+       ---help---
          Say Y here to be able to invoke iptables targets after successful
          classification.
 
@@ -788,9 +788,9 @@ config NET_ACT_IPT
          module will be called act_ipt.
 
 config NET_ACT_NAT
-        tristate "Stateless NAT"
-        depends on NET_CLS_ACT
-        ---help---
+       tristate "Stateless NAT"
+       depends on NET_CLS_ACT
+       ---help---
          Say Y here to do stateless NAT on IPv4 packets.  You should use
          netfilter for NAT unless you know what you are doing.
 
@@ -798,18 +798,18 @@ config NET_ACT_NAT
          module will be called act_nat.
 
 config NET_ACT_PEDIT
-        tristate "Packet Editing"
-        depends on NET_CLS_ACT
-        ---help---
+       tristate "Packet Editing"
+       depends on NET_CLS_ACT
+       ---help---
          Say Y here if you want to mangle the content of packets.
 
          To compile this code as a module, choose M here: the
          module will be called act_pedit.
 
 config NET_ACT_SIMP
-        tristate "Simple Example (Debug)"
-        depends on NET_CLS_ACT
-        ---help---
+       tristate "Simple Example (Debug)"
+       depends on NET_CLS_ACT
+       ---help---
          Say Y here to add a simple action for demonstration purposes.
          It is meant as an example and for debugging purposes. It will
          print a configured policy string followed by the packet count
@@ -821,9 +821,9 @@ config NET_ACT_SIMP
          module will be called act_simple.
 
 config NET_ACT_SKBEDIT
-        tristate "SKB Editing"
-        depends on NET_CLS_ACT
-        ---help---
+       tristate "SKB Editing"
+       depends on NET_CLS_ACT
+       ---help---
          Say Y here to change skb priority or queue_mapping settings.
 
          If unsure, say N.
@@ -832,10 +832,10 @@ config NET_ACT_SKBEDIT
          module will be called act_skbedit.
 
 config NET_ACT_CSUM
-        tristate "Checksum Updating"
-        depends on NET_CLS_ACT && INET
-        select LIBCRC32C
-        ---help---
+       tristate "Checksum Updating"
+       depends on NET_CLS_ACT && INET
+       select LIBCRC32C
+       ---help---
          Say Y here to update some common checksum after some direct
          packet alterations.
 
@@ -854,9 +854,9 @@ config NET_ACT_MPLS
          module will be called act_mpls.
 
 config NET_ACT_VLAN
-        tristate "Vlan manipulation"
-        depends on NET_CLS_ACT
-        ---help---
+       tristate "Vlan manipulation"
+       depends on NET_CLS_ACT
+       ---help---
          Say Y here to push or pop vlan headers.
 
          If unsure, say N.
@@ -865,9 +865,9 @@ config NET_ACT_VLAN
          module will be called act_vlan.
 
 config NET_ACT_BPF
-        tristate "BPF based action"
-        depends on NET_CLS_ACT
-        ---help---
+       tristate "BPF based action"
+       depends on NET_CLS_ACT
+       ---help---
          Say Y here to execute BPF code on packets. The BPF code will decide
          if the packet should be dropped or not.
 
@@ -877,10 +877,10 @@ config NET_ACT_BPF
          module will be called act_bpf.
 
 config NET_ACT_CONNMARK
-        tristate "Netfilter Connection Mark Retriever"
-        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
-        depends on NF_CONNTRACK && NF_CONNTRACK_MARK
-        ---help---
+       tristate "Netfilter Connection Mark Retriever"
+       depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+       depends on NF_CONNTRACK && NF_CONNTRACK_MARK
+       ---help---
          Say Y here to allow retrieving of conn mark
 
          If unsure, say N.
@@ -889,10 +889,10 @@ config NET_ACT_CONNMARK
          module will be called act_connmark.
 
 config NET_ACT_CTINFO
-        tristate "Netfilter Connection Mark Actions"
-        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
-        depends on NF_CONNTRACK && NF_CONNTRACK_MARK
-        help
+       tristate "Netfilter Connection Mark Actions"
+       depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+       depends on NF_CONNTRACK && NF_CONNTRACK_MARK
+       help
          Say Y here to allow transfer of a connmark stored information.
          Current actions transfer connmark stored DSCP into
          ipv4/v6 diffserv and/or to transfer connmark to packet
@@ -906,21 +906,21 @@ config NET_ACT_CTINFO
          module will be called act_ctinfo.
 
 config NET_ACT_SKBMOD
-        tristate "skb data modification action"
-        depends on NET_CLS_ACT
-        ---help---
-         Say Y here to allow modification of skb data
+       tristate "skb data modification action"
+       depends on NET_CLS_ACT
+       ---help---
+        Say Y here to allow modification of skb data
 
-         If unsure, say N.
+        If unsure, say N.
 
-         To compile this code as a module, choose M here: the
-         module will be called act_skbmod.
+        To compile this code as a module, choose M here: the
+        module will be called act_skbmod.
 
 config NET_ACT_IFE
-        tristate "Inter-FE action based on IETF ForCES InterFE LFB"
-        depends on NET_CLS_ACT
-        select NET_IFE
-        ---help---
+       tristate "Inter-FE action based on IETF ForCES InterFE LFB"
+       depends on NET_CLS_ACT
+       select NET_IFE
+       ---help---
          Say Y here to allow for sourcing and terminating metadata
          For details refer to netdev01 paper:
          "Distributing Linux Traffic Control Classifier-Action Subsystem"
@@ -930,9 +930,9 @@ config NET_ACT_IFE
          module will be called act_ife.
 
 config NET_ACT_TUNNEL_KEY
-        tristate "IP tunnel metadata manipulation"
-        depends on NET_CLS_ACT
-        ---help---
+       tristate "IP tunnel metadata manipulation"
+       depends on NET_CLS_ACT
+       ---help---
          Say Y here to set/release ip tunnel metadata.
 
          If unsure, say N.
@@ -941,9 +941,9 @@ config NET_ACT_TUNNEL_KEY
          module will be called act_tunnel_key.
 
 config NET_ACT_CT
-        tristate "connection tracking tc action"
-        depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT
-        help
+       tristate "connection tracking tc action"
+       depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT
+       help
          Say Y here to allow sending the packets to conntrack module.
 
          If unsure, say N.
@@ -952,21 +952,20 @@ config NET_ACT_CT
          module will be called act_ct.
 
 config NET_IFE_SKBMARK
-        tristate "Support to encoding decoding skb mark on IFE action"
-        depends on NET_ACT_IFE
+       tristate "Support to encoding decoding skb mark on IFE action"
+       depends on NET_ACT_IFE
 
 config NET_IFE_SKBPRIO
-        tristate "Support to encoding decoding skb prio on IFE action"
-        depends on NET_ACT_IFE
+       tristate "Support to encoding decoding skb prio on IFE action"
+       depends on NET_ACT_IFE
 
 config NET_IFE_SKBTCINDEX
-        tristate "Support to encoding decoding skb tcindex on IFE action"
-        depends on NET_ACT_IFE
+       tristate "Support to encoding decoding skb tcindex on IFE action"
+       depends on NET_ACT_IFE
 
 config NET_TC_SKB_EXT
        bool "TC recirculation support"
        depends on NET_CLS_ACT
-       default y if NET_CLS_ACT
        select SKB_EXTENSIONS
 
        help
index 3397122..69d4676 100644 (file)
@@ -831,6 +831,14 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
        return c;
 }
 
+static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
+       [TCA_ACT_KIND]          = { .type = NLA_STRING },
+       [TCA_ACT_INDEX]         = { .type = NLA_U32 },
+       [TCA_ACT_COOKIE]        = { .type = NLA_BINARY,
+                                   .len = TC_COOKIE_MAX_SIZE },
+       [TCA_ACT_OPTIONS]       = { .type = NLA_NESTED },
+};
+
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                    struct nlattr *nla, struct nlattr *est,
                                    char *name, int ovr, int bind,
@@ -846,8 +854,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
        int err;
 
        if (name == NULL) {
-               err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL,
-                                                 extack);
+               err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
+                                                 tcf_action_policy, extack);
                if (err < 0)
                        goto err_out;
                err = -EINVAL;
@@ -861,13 +869,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                        goto err_out;
                }
                if (tb[TCA_ACT_COOKIE]) {
-                       int cklen = nla_len(tb[TCA_ACT_COOKIE]);
-
-                       if (cklen > TC_COOKIE_MAX_SIZE) {
-                               NL_SET_ERR_MSG(extack, "TC cookie size above the maximum");
-                               goto err_out;
-                       }
-
                        cookie = nla_memdup_cookie(tb);
                        if (!cookie) {
                                NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
@@ -1098,7 +1099,8 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
        int index;
        int err;
 
-       err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL, extack);
+       err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
+                                         tcf_action_policy, extack);
        if (err < 0)
                goto err_out;
 
@@ -1152,7 +1154,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 
        b = skb_tail_pointer(skb);
 
-       err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL, extack);
+       err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
+                                         tcf_action_policy, extack);
        if (err < 0)
                goto err_out;
 
@@ -1350,11 +1353,16 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
                          struct netlink_ext_ack *extack)
 {
        size_t attr_size = 0;
-       int ret = 0;
+       int loop, ret;
        struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
 
-       ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, actions,
-                             &attr_size, true, extack);
+       for (loop = 0; loop < 10; loop++) {
+               ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0,
+                                     actions, &attr_size, true, extack);
+               if (ret != -EAGAIN)
+                       break;
+       }
+
        if (ret < 0)
                return ret;
        ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
@@ -1404,11 +1412,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
                 */
                if (n->nlmsg_flags & NLM_F_REPLACE)
                        ovr = 1;
-replay:
                ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
                                     extack);
-               if (ret == -EAGAIN)
-                       goto replay;
                break;
        case RTM_DELACTION:
                ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
@@ -1440,7 +1445,7 @@ static struct nlattr *find_dump_kind(struct nlattr **nla)
 
        if (tb[1] == NULL)
                return NULL;
-       if (nla_parse_nested_deprecated(tb2, TCA_ACT_MAX, tb[1], NULL, NULL) < 0)
+       if (nla_parse_nested_deprecated(tb2, TCA_ACT_MAX, tb[1], tcf_action_policy, NULL) < 0)
                return NULL;
        kind = tb2[TCA_ACT_KIND];
 
index 9ce073a..08923b2 100644 (file)
@@ -484,7 +484,11 @@ static int __init mirred_init_module(void)
                return err;
 
        pr_info("Mirror/redirect action on\n");
-       return tcf_register_action(&act_mirred_ops, &mirred_net_ops);
+       err = tcf_register_action(&act_mirred_ops, &mirred_net_ops);
+       if (err)
+               unregister_netdevice_notifier(&mirred_device_notifier);
+
+       return err;
 }
 
 static void __exit mirred_cleanup_module(void)
index e168df0..4cf6c55 100644 (file)
@@ -55,7 +55,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
        struct tcf_mpls *m = to_mpls(a);
        struct tcf_mpls_params *p;
        __be32 new_lse;
-       int ret;
+       int ret, mac_len;
 
        tcf_lastuse_update(&m->tcf_tm);
        bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
@@ -63,8 +63,12 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
        /* Ensure 'data' points at mac_header prior calling mpls manipulating
         * functions.
         */
-       if (skb_at_tc_ingress(skb))
+       if (skb_at_tc_ingress(skb)) {
                skb_push_rcsum(skb, skb->mac_len);
+               mac_len = skb->mac_len;
+       } else {
+               mac_len = skb_network_header(skb) - skb_mac_header(skb);
+       }
 
        ret = READ_ONCE(m->tcf_action);
 
@@ -72,12 +76,12 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
 
        switch (p->tcfm_action) {
        case TCA_MPLS_ACT_POP:
-               if (skb_mpls_pop(skb, p->tcfm_proto))
+               if (skb_mpls_pop(skb, p->tcfm_proto, mac_len))
                        goto drop;
                break;
        case TCA_MPLS_ACT_PUSH:
                new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb->protocol));
-               if (skb_mpls_push(skb, new_lse, p->tcfm_proto))
+               if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len))
                        goto drop;
                break;
        case TCA_MPLS_ACT_MODIFY:
index 692c4c9..514456a 100644 (file)
@@ -146,6 +146,7 @@ static bool tcf_sample_dev_ok_push(struct net_device *dev)
        case ARPHRD_TUNNEL6:
        case ARPHRD_SIT:
        case ARPHRD_IPGRE:
+       case ARPHRD_IP6GRE:
        case ARPHRD_VOID:
        case ARPHRD_NONE:
                return false;
index 32577c2..8717c0b 100644 (file)
@@ -162,11 +162,22 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
        return TC_H_MAJ(first);
 }
 
+static bool tcf_proto_check_kind(struct nlattr *kind, char *name)
+{
+       if (kind)
+               return nla_strlcpy(name, kind, IFNAMSIZ) >= IFNAMSIZ;
+       memset(name, 0, IFNAMSIZ);
+       return false;
+}
+
 static bool tcf_proto_is_unlocked(const char *kind)
 {
        const struct tcf_proto_ops *ops;
        bool ret;
 
+       if (strlen(kind) == 0)
+               return false;
+
        ops = tcf_proto_lookup_ops(kind, false, NULL);
        /* On error return false to take rtnl lock. Proto lookup/create
         * functions will perform lookup again and properly handle errors.
@@ -1843,6 +1854,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 {
        struct net *net = sock_net(skb->sk);
        struct nlattr *tca[TCA_MAX + 1];
+       char name[IFNAMSIZ];
        struct tcmsg *t;
        u32 protocol;
        u32 prio;
@@ -1899,13 +1911,19 @@ replay:
        if (err)
                return err;
 
+       if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
+               NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
+               err = -EINVAL;
+               goto errout;
+       }
+
        /* Take rtnl mutex if rtnl_held was set to true on previous iteration,
         * block is shared (no qdisc found), qdisc is not unlocked, classifier
         * type is not specified, classifier is not unlocked.
         */
        if (rtnl_held ||
            (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
-           !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+           !tcf_proto_is_unlocked(name)) {
                rtnl_held = true;
                rtnl_lock();
        }
@@ -2063,6 +2081,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 {
        struct net *net = sock_net(skb->sk);
        struct nlattr *tca[TCA_MAX + 1];
+       char name[IFNAMSIZ];
        struct tcmsg *t;
        u32 protocol;
        u32 prio;
@@ -2102,13 +2121,18 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
        if (err)
                return err;
 
+       if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
+               NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
+               err = -EINVAL;
+               goto errout;
+       }
        /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
         * found), qdisc is not unlocked, classifier type is not specified,
         * classifier is not unlocked.
         */
        if (!prio ||
            (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
-           !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+           !tcf_proto_is_unlocked(name)) {
                rtnl_held = true;
                rtnl_lock();
        }
@@ -2216,6 +2240,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 {
        struct net *net = sock_net(skb->sk);
        struct nlattr *tca[TCA_MAX + 1];
+       char name[IFNAMSIZ];
        struct tcmsg *t;
        u32 protocol;
        u32 prio;
@@ -2252,12 +2277,17 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
        if (err)
                return err;
 
+       if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
+               NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
+               err = -EINVAL;
+               goto errout;
+       }
        /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
         * unlocked, classifier type is not specified, classifier is not
         * unlocked.
         */
        if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
-           !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+           !tcf_proto_is_unlocked(name)) {
                rtnl_held = true;
                rtnl_lock();
        }
@@ -2894,8 +2924,10 @@ out:
 void tcf_exts_destroy(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-       tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
-       kfree(exts->actions);
+       if (exts->actions) {
+               tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
+               kfree(exts->actions);
+       }
        exts->nr_actions = 0;
 #endif
 }
index bf10bda..8229ed4 100644 (file)
@@ -162,16 +162,20 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
        cls_bpf.name = obj->bpf_name;
        cls_bpf.exts_integrated = obj->exts_integrated;
 
-       if (oldprog)
+       if (oldprog && prog)
                err = tc_setup_cb_replace(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
                                          skip_sw, &oldprog->gen_flags,
                                          &oldprog->in_hw_count,
                                          &prog->gen_flags, &prog->in_hw_count,
                                          true);
-       else
+       else if (prog)
                err = tc_setup_cb_add(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
                                      skip_sw, &prog->gen_flags,
                                      &prog->in_hw_count, true);
+       else
+               err = tc_setup_cb_destroy(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
+                                         skip_sw, &oldprog->gen_flags,
+                                         &oldprog->in_hw_count, true);
 
        if (prog && err) {
                cls_bpf_offload_cmd(tp, oldprog, prog, extack);
index 82bd14e..3177dcb 100644 (file)
@@ -446,7 +446,7 @@ META_COLLECTOR(int_sk_wmem_queued)
                *err = -1;
                return;
        }
-       dst->value = sk->sk_wmem_queued;
+       dst->value = READ_ONCE(sk->sk_wmem_queued);
 }
 
 META_COLLECTOR(int_sk_fwd_alloc)
@@ -554,7 +554,7 @@ META_COLLECTOR(int_sk_rcvlowat)
                *err = -1;
                return;
        }
-       dst->value = sk->sk_rcvlowat;
+       dst->value = READ_ONCE(sk->sk_rcvlowat);
 }
 
 META_COLLECTOR(int_sk_rcvtimeo)
index 06c7a2d..39b427d 100644 (file)
@@ -1127,6 +1127,33 @@ static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
        [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
 };
 
+static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1],
+                        struct nlattr *opt,
+                        struct netlink_ext_ack *extack)
+{
+       int err;
+
+       if (!opt) {
+               NL_SET_ERR_MSG(extack, "CBQ options are required for this operation");
+               return -EINVAL;
+       }
+
+       err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt,
+                                         cbq_policy, extack);
+       if (err < 0)
+               return err;
+
+       if (tb[TCA_CBQ_WRROPT]) {
+               const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]);
+
+               if (wrr->priority > TC_CBQ_MAXPRIO) {
+                       NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO");
+                       err = -EINVAL;
+               }
+       }
+       return err;
+}
+
 static int cbq_init(struct Qdisc *sch, struct nlattr *opt,
                    struct netlink_ext_ack *extack)
 {
@@ -1139,13 +1166,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt,
        hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
        q->delay_timer.function = cbq_undelay;
 
-       if (!opt) {
-               NL_SET_ERR_MSG(extack, "CBQ options are required for this operation");
-               return -EINVAL;
-       }
-
-       err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, cbq_policy,
-                                         extack);
+       err = cbq_opt_parse(tb, opt, extack);
        if (err < 0)
                return err;
 
@@ -1464,13 +1485,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
        struct cbq_class *parent;
        struct qdisc_rate_table *rtab = NULL;
 
-       if (!opt) {
-               NL_SET_ERR_MSG(extack, "Mandatory qdisc options missing");
-               return -EINVAL;
-       }
-
-       err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, cbq_policy,
-                                         extack);
+       err = cbq_opt_parse(tb, opt, extack);
        if (err < 0)
                return err;
 
index 93b58fd..b2905b0 100644 (file)
@@ -306,7 +306,7 @@ static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q)
        if (err < 0)
                goto skip;
 
-       if (ecmd.base.speed != SPEED_UNKNOWN)
+       if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN)
                speed = ecmd.base.speed;
 
 skip:
@@ -392,7 +392,6 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
 {
        struct cbs_sched_data *q = qdisc_priv(sch);
        struct net_device *dev = qdisc_dev(sch);
-       int err;
 
        if (!opt) {
                NL_SET_ERR_MSG(extack, "Missing CBS qdisc options  which are mandatory");
@@ -404,6 +403,10 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
        if (!q->qdisc)
                return -ENOMEM;
 
+       spin_lock(&cbs_list_lock);
+       list_add(&q->cbs_list, &cbs_list);
+       spin_unlock(&cbs_list_lock);
+
        qdisc_hash_add(q->qdisc, false);
 
        q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
@@ -413,17 +416,7 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
 
        qdisc_watchdog_init(&q->watchdog, sch);
 
-       err = cbs_change(sch, opt, extack);
-       if (err)
-               return err;
-
-       if (!q->offload) {
-               spin_lock(&cbs_list_lock);
-               list_add(&q->cbs_list, &cbs_list);
-               spin_unlock(&cbs_list_lock);
-       }
-
-       return 0;
+       return cbs_change(sch, opt, extack);
 }
 
 static void cbs_destroy(struct Qdisc *sch)
@@ -431,15 +424,18 @@ static void cbs_destroy(struct Qdisc *sch)
        struct cbs_sched_data *q = qdisc_priv(sch);
        struct net_device *dev = qdisc_dev(sch);
 
-       spin_lock(&cbs_list_lock);
-       list_del(&q->cbs_list);
-       spin_unlock(&cbs_list_lock);
+       /* Nothing to do if we couldn't create the underlying qdisc */
+       if (!q->qdisc)
+               return;
 
        qdisc_watchdog_cancel(&q->watchdog);
        cbs_disable_offload(dev, q);
 
-       if (q->qdisc)
-               qdisc_put(q->qdisc);
+       spin_lock(&cbs_list_lock);
+       list_del(&q->cbs_list);
+       spin_unlock(&cbs_list_lock);
+
+       qdisc_put(q->qdisc);
 }
 
 static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
index bad1cbe..05605b3 100644 (file)
@@ -361,6 +361,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt,
                goto errout;
 
        err = -EINVAL;
+       if (!tb[TCA_DSMARK_INDICES])
+               goto errout;
        indices = nla_get_u16(tb[TCA_DSMARK_INDICES]);
 
        if (hweight32(indices) != 1)
index cebfb65..b1da558 100644 (file)
@@ -177,7 +177,7 @@ static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
 
                parent = *p;
                skb = rb_to_skb(parent);
-               if (ktime_after(txtime, skb->tstamp)) {
+               if (ktime_compare(txtime, skb->tstamp) >= 0) {
                        p = &parent->rb_right;
                        leftmost = false;
                } else {
index 17bd8f5..8769b4b 100644 (file)
@@ -799,9 +799,6 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 };
 EXPORT_SYMBOL(pfifo_fast_ops);
 
-static struct lock_class_key qdisc_tx_busylock;
-static struct lock_class_key qdisc_running_key;
-
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
                          const struct Qdisc_ops *ops,
                          struct netlink_ext_ack *extack)
@@ -854,17 +851,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
        }
 
        spin_lock_init(&sch->busylock);
-       lockdep_set_class(&sch->busylock,
-                         dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
-
        /* seqlock has the same scope of busylock, for NOLOCK qdisc */
        spin_lock_init(&sch->seqlock);
-       lockdep_set_class(&sch->busylock,
-                         dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
-
        seqcount_init(&sch->running);
-       lockdep_set_class(&sch->running,
-                         dev->qdisc_running_key ?: &qdisc_running_key);
 
        sch->ops = ops;
        sch->flags = ops->static_flags;
@@ -875,6 +864,12 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
        dev_hold(dev);
        refcount_set(&sch->refcnt, 1);
 
+       if (sch != &noop_qdisc) {
+               lockdep_set_class(&sch->busylock, &dev->qdisc_tx_busylock_key);
+               lockdep_set_class(&sch->seqlock, &dev->qdisc_tx_busylock_key);
+               lockdep_set_class(&sch->running, &dev->qdisc_running_key);
+       }
+
        return sch;
 errout1:
        kfree(p);
@@ -1043,6 +1038,8 @@ static void attach_one_default_qdisc(struct net_device *dev,
 
        if (dev->priv_flags & IFF_NO_QUEUE)
                ops = &noqueue_qdisc_ops;
+       else if(dev->type == ARPHRD_CAN)
+               ops = &pfifo_fast_ops;
 
        qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
        if (!qdisc) {
index 23cd1c8..be35f03 100644 (file)
@@ -5,11 +5,11 @@
  * Copyright (C) 2013 Nandita Dukkipati <nanditad@google.com>
  */
 
-#include <linux/jhash.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
+#include <linux/siphash.h>
 #include <net/pkt_sched.h>
 #include <net/sock.h>
 
@@ -126,7 +126,7 @@ struct wdrr_bucket {
 
 struct hhf_sched_data {
        struct wdrr_bucket buckets[WDRR_BUCKET_CNT];
-       u32                perturbation;   /* hash perturbation */
+       siphash_key_t      perturbation;   /* hash perturbation */
        u32                quantum;        /* psched_mtu(qdisc_dev(sch)); */
        u32                drop_overlimit; /* number of times max qdisc packet
                                            * limit was hit
@@ -264,7 +264,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
        }
 
        /* Get hashed flow-id of the skb. */
-       hash = skb_get_hash_perturb(skb, q->perturbation);
+       hash = skb_get_hash_perturb(skb, &q->perturbation);
 
        /* Check if this packet belongs to an already established HH flow. */
        flow_pos = hash & HHF_BIT_MASK;
@@ -582,7 +582,7 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt,
 
        sch->limit = 1000;
        q->quantum = psched_mtu(qdisc_dev(sch));
-       q->perturbation = prandom_u32();
+       get_random_bytes(&q->perturbation, sizeof(q->perturbation));
        INIT_LIST_HEAD(&q->new_buckets);
        INIT_LIST_HEAD(&q->old_buckets);
 
index 7bcf20e..8184c87 100644 (file)
@@ -1302,6 +1302,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
        struct htb_class *cl = (struct htb_class *)*arg, *parent;
        struct nlattr *opt = tca[TCA_OPTIONS];
        struct nlattr *tb[TCA_HTB_MAX + 1];
+       struct Qdisc *parent_qdisc = NULL;
        struct tc_htb_opt *hopt;
        u64 rate64, ceil64;
        int warn = 0;
@@ -1401,7 +1402,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                if (parent && !parent->level) {
                        /* turn parent into inner node */
                        qdisc_purge_queue(parent->leaf.q);
-                       qdisc_put(parent->leaf.q);
+                       parent_qdisc = parent->leaf.q;
                        if (parent->prio_activity)
                                htb_deactivate(q, parent);
 
@@ -1480,6 +1481,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
        cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
 
        sch_tree_unlock(sch);
+       qdisc_put(parent_qdisc);
 
        if (warn)
                pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
index e108774..b2b7fdb 100644 (file)
@@ -174,7 +174,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
 {
        struct multiq_sched_data *q = qdisc_priv(sch);
        struct tc_multiq_qopt *qopt;
-       int i;
+       struct Qdisc **removed;
+       int i, n_removed = 0;
 
        if (!netif_is_multiqueue(qdisc_dev(sch)))
                return -EOPNOTSUPP;
@@ -185,6 +186,11 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
 
        qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
 
+       removed = kmalloc(sizeof(*removed) * (q->max_bands - q->bands),
+                         GFP_KERNEL);
+       if (!removed)
+               return -ENOMEM;
+
        sch_tree_lock(sch);
        q->bands = qopt->bands;
        for (i = q->bands; i < q->max_bands; i++) {
@@ -192,13 +198,17 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
                        struct Qdisc *child = q->queues[i];
 
                        q->queues[i] = &noop_qdisc;
-                       qdisc_tree_flush_backlog(child);
-                       qdisc_put(child);
+                       qdisc_purge_queue(child);
+                       removed[n_removed++] = child;
                }
        }
 
        sch_tree_unlock(sch);
 
+       for (i = 0; i < n_removed; i++)
+               qdisc_put(removed[i]);
+       kfree(removed);
+
        for (i = 0; i < q->bands; i++) {
                if (q->queues[i] == &noop_qdisc) {
                        struct Qdisc *child, *old;
@@ -213,11 +223,10 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
                                if (child != &noop_qdisc)
                                        qdisc_hash_add(child, true);
 
-                               if (old != &noop_qdisc) {
-                                       qdisc_tree_flush_backlog(old);
-                                       qdisc_put(old);
-                               }
+                               if (old != &noop_qdisc)
+                                       qdisc_purge_queue(old);
                                sch_tree_unlock(sch);
+                               qdisc_put(old);
                        }
                }
        }
index b17f2ed..42e557d 100644 (file)
@@ -476,7 +476,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
         * skb will be queued.
         */
        if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
-               struct Qdisc *rootq = qdisc_root(sch);
+               struct Qdisc *rootq = qdisc_root_bh(sch);
                u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
 
                q->duplicate = 0;
@@ -509,6 +509,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                if (skb->ip_summed == CHECKSUM_PARTIAL &&
                    skb_checksum_help(skb)) {
                        qdisc_drop(skb, sch, to_free);
+                       skb = NULL;
                        goto finish_segs;
                }
 
@@ -593,9 +594,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 finish_segs:
        if (segs) {
                unsigned int len, last_len;
-               int nb = 0;
+               int nb;
 
-               len = skb->len;
+               len = skb ? skb->len : 0;
+               nb = skb ? 1 : 0;
 
                while (segs) {
                        skb2 = segs->next;
@@ -612,7 +614,10 @@ finish_segs:
                        }
                        segs = skb2;
                }
-               qdisc_tree_reduce_backlog(sch, -nb, prev_len - len);
+               /* Parent qdiscs accounted for 1 skb of size @prev_len */
+               qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
+       } else if (!skb) {
+               return NET_XMIT_DROP;
        }
        return NET_XMIT_SUCCESS;
 }
@@ -777,7 +782,7 @@ static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
        struct disttable *d;
        int i;
 
-       if (n > NETEM_DIST_MAX)
+       if (!n || n > NETEM_DIST_MAX)
                return -EINVAL;
 
        d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL);
index 1dff850..4074c50 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/errno.h>
 #include <linux/skbuff.h>
 #include <linux/random.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
 #include <net/ip.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
@@ -45,7 +45,7 @@ struct sfb_bucket {
  * (Section 4.4 of SFB reference : moving hash functions)
  */
 struct sfb_bins {
-       u32               perturbation; /* jhash perturbation */
+       siphash_key_t     perturbation; /* siphash key */
        struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS];
 };
 
@@ -217,7 +217,8 @@ static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_da
 
 static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q)
 {
-       q->bins[slot].perturbation = prandom_u32();
+       get_random_bytes(&q->bins[slot].perturbation,
+                        sizeof(q->bins[slot].perturbation));
 }
 
 static void sfb_swap_slot(struct sfb_sched_data *q)
@@ -314,9 +315,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                /* If using external classifiers, get result and record it. */
                if (!sfb_classify(skb, fl, &ret, &salt))
                        goto other_drop;
-               sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
+               sfbhash = siphash_1u32(salt, &q->bins[slot].perturbation);
        } else {
-               sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation);
+               sfbhash = skb_get_hash_perturb(skb, &q->bins[slot].perturbation);
        }
 
 
@@ -352,7 +353,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                /* Inelastic flow */
                if (q->double_buffering) {
                        sfbhash = skb_get_hash_perturb(skb,
-                           q->bins[slot].perturbation);
+                           &q->bins[slot].perturbation);
                        if (!sfbhash)
                                sfbhash = 1;
                        sfb_skb_cb(skb)->hashes[slot] = sfbhash;
@@ -488,7 +489,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
                      struct netlink_ext_ack *extack)
 {
        struct sfb_sched_data *q = qdisc_priv(sch);
-       struct Qdisc *child;
+       struct Qdisc *child, *old;
        struct nlattr *tb[TCA_SFB_MAX + 1];
        const struct tc_sfb_qopt *ctl = &sfb_default_ops;
        u32 limit;
@@ -518,8 +519,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
                qdisc_hash_add(child, true);
        sch_tree_lock(sch);
 
-       qdisc_tree_flush_backlog(q->qdisc);
-       qdisc_put(q->qdisc);
+       qdisc_purge_queue(q->qdisc);
+       old = q->qdisc;
        q->qdisc = child;
 
        q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval);
@@ -542,6 +543,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
        sfb_init_perturbation(1, q);
 
        sch_tree_unlock(sch);
+       qdisc_put(old);
 
        return 0;
 }
index 68404a9..c787d4d 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <net/netlink.h>
@@ -117,7 +117,7 @@ struct sfq_sched_data {
        u8              headdrop;
        u8              maxdepth;       /* limit of packets per flow */
 
-       u32             perturbation;
+       siphash_key_t   perturbation;
        u8              cur_depth;      /* depth of longest slot */
        u8              flags;
        unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
@@ -157,7 +157,7 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
 static unsigned int sfq_hash(const struct sfq_sched_data *q,
                             const struct sk_buff *skb)
 {
-       return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
+       return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1);
 }
 
 static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -607,9 +607,11 @@ static void sfq_perturbation(struct timer_list *t)
        struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
        struct Qdisc *sch = q->sch;
        spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+       siphash_key_t nkey;
 
+       get_random_bytes(&nkey, sizeof(nkey));
        spin_lock(root_lock);
-       q->perturbation = prandom_u32();
+       q->perturbation = nkey;
        if (!q->filter_list && q->tail)
                sfq_rehash(sch);
        spin_unlock(root_lock);
@@ -688,7 +690,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
        del_timer(&q->perturb_timer);
        if (q->perturb_period) {
                mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
-               q->perturbation = prandom_u32();
+               get_random_bytes(&q->perturbation, sizeof(q->perturbation));
        }
        sch_tree_unlock(sch);
        kfree(p);
@@ -745,7 +747,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
        q->quantum = psched_mtu(qdisc_dev(sch));
        q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
        q->perturb_period = 0;
-       q->perturbation = prandom_u32();
+       get_random_bytes(&q->perturbation, sizeof(q->perturbation));
 
        if (opt) {
                int err = sfq_change(sch, opt);
index 2f7b342..2121187 100644 (file)
@@ -1044,12 +1044,11 @@ static void taprio_set_picos_per_byte(struct net_device *dev,
        if (err < 0)
                goto skip;
 
-       if (ecmd.base.speed != SPEED_UNKNOWN)
+       if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN)
                speed = ecmd.base.speed;
 
 skip:
-       picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8,
-                                  speed * 1000 * 1000);
+       picos_per_byte = (USEC_PER_SEC * 8) / speed;
 
        atomic64_set(&q->picos_per_byte, picos_per_byte);
        netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
@@ -1153,7 +1152,7 @@ EXPORT_SYMBOL_GPL(taprio_offload_free);
  * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump().
  * This is left as TODO.
  */
-void taprio_offload_config_changed(struct taprio_sched *q)
+static void taprio_offload_config_changed(struct taprio_sched *q)
 {
        struct sched_gate_list *oper, *admin;
 
@@ -1342,6 +1341,10 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
                NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
                goto out;
        }
+
+       /* Everything went ok, return success. */
+       err = 0;
+
 out:
        return err;
 }
index fc9a4c6..0851166 100644 (file)
@@ -175,7 +175,7 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
                mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
                mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
                mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
-               mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+               mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
                mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
 
                if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0)
index 1008cdc..2277981 100644 (file)
@@ -201,7 +201,7 @@ int sctp_rcv(struct sk_buff *skb)
 
        if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family))
                goto discard_release;
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        if (sk_filter(sk, skb))
                goto discard_release;
@@ -243,7 +243,7 @@ int sctp_rcv(struct sk_buff *skb)
                bh_lock_sock(sk);
        }
 
-       if (sock_owned_by_user(sk)) {
+       if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) {
                if (sctp_add_backlog(sk, skb)) {
                        bh_unlock_sock(sk);
                        sctp_chunk_free(chunk);
@@ -321,8 +321,8 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
                local_bh_disable();
                bh_lock_sock(sk);
 
-               if (sock_owned_by_user(sk)) {
-                       if (sk_add_backlog(sk, skb, sk->sk_rcvbuf))
+               if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) {
+                       if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))
                                sctp_chunk_free(chunk);
                        else
                                backloged = 1;
@@ -336,7 +336,13 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
                if (backloged)
                        return 0;
        } else {
-               sctp_inq_push(inqueue, chunk);
+               if (!sctp_newsk_ready(sk)) {
+                       if (!sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))
+                               return 0;
+                       sctp_chunk_free(chunk);
+               } else {
+                       sctp_inq_push(inqueue, chunk);
+               }
        }
 
 done:
@@ -358,7 +364,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
        struct sctp_ep_common *rcvr = chunk->rcvr;
        int ret;
 
-       ret = sk_add_backlog(sk, skb, sk->sk_rcvbuf);
+       ret = sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf));
        if (!ret) {
                /* Hold the assoc/ep while hanging on the backlog queue.
                 * This way, we know structures we need will not disappear
index e5f2fc7..dd860fe 100644 (file)
@@ -215,7 +215,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 
        rcu_read_lock();
        res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt),
-                      tclass);
+                      tclass, sk->sk_priority);
        rcu_read_unlock();
        return res;
 }
index e41ed2e..48d6395 100644 (file)
@@ -2155,7 +2155,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
        case SCTP_PARAM_SET_PRIMARY:
                if (ep->asconf_enable)
                        break;
-               goto fallthrough;
+               goto unhandled;
 
        case SCTP_PARAM_HOST_NAME_ADDRESS:
                /* Tell the peer, we won't support this param.  */
@@ -2166,11 +2166,11 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
        case SCTP_PARAM_FWD_TSN_SUPPORT:
                if (ep->prsctp_enable)
                        break;
-               goto fallthrough;
+               goto unhandled;
 
        case SCTP_PARAM_RANDOM:
                if (!ep->auth_enable)
-                       goto fallthrough;
+                       goto unhandled;
 
                /* SCTP-AUTH: Secion 6.1
                 * If the random number is not 32 byte long the association
@@ -2187,7 +2187,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
 
        case SCTP_PARAM_CHUNKS:
                if (!ep->auth_enable)
-                       goto fallthrough;
+                       goto unhandled;
 
                /* SCTP-AUTH: Section 3.2
                 * The CHUNKS parameter MUST be included once in the INIT or
@@ -2203,7 +2203,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
 
        case SCTP_PARAM_HMAC_ALGO:
                if (!ep->auth_enable)
-                       goto fallthrough;
+                       goto unhandled;
 
                hmacs = (struct sctp_hmac_algo_param *)param.p;
                n_elt = (ntohs(param.p->length) -
@@ -2226,7 +2226,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
                        retval = SCTP_IERROR_ABORT;
                }
                break;
-fallthrough:
+unhandled:
        default:
                pr_debug("%s: unrecognized param:%d for chunk:%d\n",
                         __func__, ntohs(param.p->type), cid);
index 939b8d2..ffd3262 100644 (file)
@@ -8476,7 +8476,7 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
        mask = 0;
 
        /* Is there any exceptional events?  */
-       if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+       if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
                mask |= EPOLLERR |
                        (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
        if (sk->sk_shutdown & RCV_SHUTDOWN)
@@ -8485,7 +8485,7 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
                mask |= EPOLLHUP;
 
        /* Is it readable?  Reconsider this code with TCP-style support.  */
-       if (!skb_queue_empty(&sk->sk_receive_queue))
+       if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
                mask |= EPOLLIN | EPOLLRDNORM;
 
        /* The association is either gone or not ready.  */
@@ -8871,7 +8871,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
                if (sk_can_busy_loop(sk)) {
                        sk_busy_loop(sk, noblock);
 
-                       if (!skb_queue_empty(&sk->sk_receive_queue))
+                       if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
                                continue;
                }
 
@@ -9306,7 +9306,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
        newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
        newinet->inet_dport = htons(asoc->peer.port);
        newinet->pmtudisc = inet->pmtudisc;
-       newinet->inet_id = asoc->next_tsn ^ jiffies;
+       newinet->inet_id = prandom_u32();
 
        newinet->uc_ttl = inet->uc_ttl;
        newinet->mc_loop = 1;
@@ -9500,7 +9500,7 @@ struct proto sctp_prot = {
        .backlog_rcv =  sctp_backlog_rcv,
        .hash        =  sctp_hash,
        .unhash      =  sctp_unhash,
-       .get_port    =  sctp_get_port,
+       .no_autobind =  true,
        .obj_size    =  sizeof(struct sctp_sock),
        .useroffset  =  offsetof(struct sctp_sock, subscribe),
        .usersize    =  offsetof(struct sctp_sock, initmsg) -
@@ -9542,7 +9542,7 @@ struct proto sctpv6_prot = {
        .backlog_rcv    = sctp_backlog_rcv,
        .hash           = sctp_hash,
        .unhash         = sctp_unhash,
-       .get_port       = sctp_get_port,
+       .no_autobind    = true,
        .obj_size       = sizeof(struct sctp6_sock),
        .useroffset     = offsetof(struct sctp6_sock, sctp.subscribe),
        .usersize       = offsetof(struct sctp6_sock, sctp.initmsg) -
index 5b93258..47946f4 100644 (file)
@@ -123,6 +123,12 @@ struct proto smc_proto6 = {
 };
 EXPORT_SYMBOL_GPL(smc_proto6);
 
+static void smc_restore_fallback_changes(struct smc_sock *smc)
+{
+       smc->clcsock->file->private_data = smc->sk.sk_socket;
+       smc->clcsock->file = NULL;
+}
+
 static int __smc_release(struct smc_sock *smc)
 {
        struct sock *sk = &smc->sk;
@@ -141,6 +147,7 @@ static int __smc_release(struct smc_sock *smc)
                }
                sk->sk_state = SMC_CLOSED;
                sk->sk_state_change(sk);
+               smc_restore_fallback_changes(smc);
        }
 
        sk->sk_prot->unhash(sk);
@@ -700,8 +707,6 @@ static int __smc_connect(struct smc_sock *smc)
        int smc_type;
        int rc = 0;
 
-       sock_hold(&smc->sk); /* sock put in passive closing */
-
        if (smc->use_fallback)
                return smc_connect_fallback(smc, smc->fallback_rsn);
 
@@ -846,6 +851,8 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
        rc = kernel_connect(smc->clcsock, addr, alen, flags);
        if (rc && rc != -EINPROGRESS)
                goto out;
+
+       sock_hold(&smc->sk); /* sock put in passive closing */
        if (flags & O_NONBLOCK) {
                if (schedule_work(&smc->connect_work))
                        smc->connect_nonblock = 1;
@@ -1291,8 +1298,8 @@ static void smc_listen_work(struct work_struct *work)
        /* check if RDMA is available */
        if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */
                /* prepare RDMA check */
-               memset(&ini, 0, sizeof(ini));
                ini.is_smcd = false;
+               ini.ism_dev = NULL;
                ini.ib_lcl = &pclc->lcl;
                rc = smc_find_rdma_device(new_smc, &ini);
                if (rc) {
index 4ca50dd..2ba97ff 100644 (file)
@@ -213,7 +213,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
        lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
        if (!lgr) {
                rc = SMC_CLC_DECL_MEM;
-               goto out;
+               goto ism_put_vlan;
        }
        lgr->is_smcd = ini->is_smcd;
        lgr->sync_err = 0;
@@ -289,6 +289,9 @@ clear_llc_lnk:
        smc_llc_link_clear(lnk);
 free_lgr:
        kfree(lgr);
+ism_put_vlan:
+       if (ini->is_smcd && ini->vlan_id)
+               smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
 out:
        if (rc < 0) {
                if (rc == -ENOMEM)
@@ -558,7 +561,7 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
        }
 
        rtnl_lock();
-       nest_lvl = dev_get_nest_level(ndev);
+       nest_lvl = ndev->lower_level;
        for (i = 0; i < nest_lvl; i++) {
                struct list_head *lower = &ndev->adj_list.lower;
 
index bab2da8..2920b00 100644 (file)
@@ -718,7 +718,7 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
        int i, nest_lvl;
 
        rtnl_lock();
-       nest_lvl = dev_get_nest_level(ndev);
+       nest_lvl = ndev->lower_level;
        for (i = 0; i < nest_lvl; i++) {
                struct list_head *lower = &ndev->adj_list.lower;
 
index 413a6ab..97e8369 100644 (file)
@@ -211,8 +211,7 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
        rc = sk_wait_event(sk, timeo,
                           sk->sk_err ||
                           sk->sk_shutdown & RCV_SHUTDOWN ||
-                          fcrit(conn) ||
-                          smc_cdc_rxed_any_close_or_senddone(conn),
+                          fcrit(conn),
                           &wait);
        remove_wait_queue(sk_sleep(sk), &wait);
        sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
@@ -262,6 +261,18 @@ static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len,
        return -EAGAIN;
 }
 
+static bool smc_rx_recvmsg_data_available(struct smc_sock *smc)
+{
+       struct smc_connection *conn = &smc->conn;
+
+       if (smc_rx_data_available(conn))
+               return true;
+       else if (conn->urg_state == SMC_URG_VALID)
+               /* we received a single urgent Byte - skip */
+               smc_rx_update_cons(smc, 0);
+       return false;
+}
+
 /* smc_rx_recvmsg - receive data from RMBE
  * @msg:       copy data to receive buffer
  * @pipe:      copy data to pipe if set - indicates splice() call
@@ -303,16 +314,18 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
                if (read_done >= target || (pipe && read_done))
                        break;
 
-               if (atomic_read(&conn->bytes_to_rcv))
+               if (smc_rx_recvmsg_data_available(smc))
                        goto copy;
-               else if (conn->urg_state == SMC_URG_VALID)
-                       /* we received a single urgent Byte - skip */
-                       smc_rx_update_cons(smc, 0);
 
                if (sk->sk_shutdown & RCV_SHUTDOWN ||
-                   smc_cdc_rxed_any_close_or_senddone(conn) ||
-                   conn->local_tx_ctrl.conn_state_flags.peer_conn_abort)
+                   conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) {
+                       /* smc_cdc_msg_recv_action() could have run after
+                        * above smc_rx_recvmsg_data_available()
+                        */
+                       if (smc_rx_recvmsg_data_available(smc))
+                               goto copy;
                        break;
+               }
 
                if (read_done) {
                        if (sk->sk_err ||
index 4ce42c6..d75fddc 100644 (file)
@@ -1960,7 +1960,7 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
 
        if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len))
                goto unwrap_failed;
-       if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset))
+       if (xdr_buf_read_mic(rcv_buf, &mic, mic_offset))
                goto unwrap_failed;
        maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
        if (maj_stat == GSS_S_CONTEXT_EXPIRED)
index 339e8c0..195b40c 100644 (file)
@@ -220,7 +220,7 @@ void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs)
                goto out;
 
        spin_lock_bh(&xprt->bc_pa_lock);
-       xprt->bc_alloc_max -= max_reqs;
+       xprt->bc_alloc_max -= min(max_reqs, xprt->bc_alloc_max);
        list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
                dprintk("RPC:        req=%p\n", req);
                list_del(&req->rq_bc_pa_list);
@@ -307,8 +307,8 @@ void xprt_free_bc_rqst(struct rpc_rqst *req)
                 */
                dprintk("RPC:       Last session removed req=%p\n", req);
                xprt_free_allocation(req);
-               return;
        }
+       xprt_put(xprt);
 }
 
 /*
@@ -339,7 +339,7 @@ found:
                spin_unlock(&xprt->bc_pa_lock);
                if (new) {
                        if (req != new)
-                               xprt_free_bc_rqst(new);
+                               xprt_free_allocation(new);
                        break;
                } else if (req)
                        break;
@@ -368,6 +368,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
        set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
 
        dprintk("RPC:       add callback request to list\n");
+       xprt_get(xprt);
        spin_lock(&bc_serv->sv_cb_lock);
        list_add(&req->rq_bc_list, &bc_serv->sv_cb_list);
        wake_up(&bc_serv->sv_cb_waitq);
index 6f1528f..a349094 100644 (file)
@@ -373,7 +373,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
        spin_lock(&cache_list_lock);
        cd->nextcheck = 0;
        cd->entries = 0;
-       atomic_set(&cd->readers, 0);
+       atomic_set(&cd->writers, 0);
        cd->last_close = 0;
        cd->last_warn = -1;
        list_add(&cd->others, &cache_list);
@@ -1029,11 +1029,13 @@ static int cache_open(struct inode *inode, struct file *filp,
                }
                rp->offset = 0;
                rp->q.reader = 1;
-               atomic_inc(&cd->readers);
+
                spin_lock(&queue_lock);
                list_add(&rp->q.list, &cd->queue);
                spin_unlock(&queue_lock);
        }
+       if (filp->f_mode & FMODE_WRITE)
+               atomic_inc(&cd->writers);
        filp->private_data = rp;
        return 0;
 }
@@ -1062,8 +1064,10 @@ static int cache_release(struct inode *inode, struct file *filp,
                filp->private_data = NULL;
                kfree(rp);
 
+       }
+       if (filp->f_mode & FMODE_WRITE) {
+               atomic_dec(&cd->writers);
                cd->last_close = seconds_since_boot();
-               atomic_dec(&cd->readers);
        }
        module_put(cd->owner);
        return 0;
@@ -1171,7 +1175,7 @@ static void warn_no_listener(struct cache_detail *detail)
 
 static bool cache_listeners_exist(struct cache_detail *detail)
 {
-       if (atomic_read(&detail->readers))
+       if (atomic_read(&detail->writers))
                return true;
        if (detail->last_close == 0)
                /* This cache was never opened */
@@ -1520,6 +1524,9 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
        cd->nextcheck = now;
        cache_flush();
 
+       if (cd->flush)
+               cd->flush();
+
        *ppos += count;
        return count;
 }
index a07b516..f7f7856 100644 (file)
@@ -1837,7 +1837,7 @@ call_allocate(struct rpc_task *task)
                return;
        }
 
-       rpc_exit(task, -ERESTARTSYS);
+       rpc_call_rpcerror(task, -ERESTARTSYS);
 }
 
 static int
@@ -1862,6 +1862,7 @@ rpc_xdr_encode(struct rpc_task *task)
                     req->rq_rbuffer,
                     req->rq_rcvsize);
 
+       req->rq_reply_bytes_recvd = 0;
        req->rq_snd_buf.head[0].iov_len = 0;
        xdr_init_encode(&xdr, &req->rq_snd_buf,
                        req->rq_snd_buf.head[0].iov_base, req);
@@ -1881,6 +1882,8 @@ call_encode(struct rpc_task *task)
        if (!rpc_task_need_encode(task))
                goto out;
        dprint_status(task);
+       /* Dequeue task from the receive queue while we're encoding */
+       xprt_request_dequeue_xprt(task);
        /* Encode here so that rpcsec_gss can use correct sequence number. */
        rpc_xdr_encode(task);
        /* Did the encode result in an error condition? */
@@ -2479,6 +2482,7 @@ call_decode(struct rpc_task *task)
        struct rpc_clnt *clnt = task->tk_client;
        struct rpc_rqst *req = task->tk_rqstp;
        struct xdr_stream xdr;
+       int err;
 
        dprint_status(task);
 
@@ -2501,6 +2505,15 @@ call_decode(struct rpc_task *task)
         * before it changed req->rq_reply_bytes_recvd.
         */
        smp_rmb();
+
+       /*
+        * Did we ever call xprt_complete_rqst()? If not, we should assume
+        * the message is incomplete.
+        */
+       err = -EAGAIN;
+       if (!req->rq_reply_bytes_recvd)
+               goto out;
+
        req->rq_rcv_buf.len = req->rq_private_buf.len;
 
        /* Check that the softirq receive buffer is valid */
@@ -2509,7 +2522,9 @@ call_decode(struct rpc_task *task)
 
        xdr_init_decode(&xdr, &req->rq_rcv_buf,
                        req->rq_rcv_buf.head[0].iov_base, req);
-       switch (rpc_decode_header(task, &xdr)) {
+       err = rpc_decode_header(task, &xdr);
+out:
+       switch (err) {
        case 0:
                task->tk_action = rpc_exit_task;
                task->tk_status = rpcauth_unwrap_resp(task, &xdr);
@@ -2518,9 +2533,6 @@ call_decode(struct rpc_task *task)
                return;
        case -EAGAIN:
                task->tk_status = 0;
-               xdr_free_bvec(&req->rq_rcv_buf);
-               req->rq_reply_bytes_recvd = 0;
-               req->rq_rcv_buf.len = 0;
                if (task->tk_client->cl_discrtry)
                        xprt_conditional_disconnect(req->rq_xprt,
                                                    req->rq_connect_cookie);
@@ -2561,7 +2573,7 @@ rpc_encode_header(struct rpc_task *task, struct xdr_stream *xdr)
        return 0;
 out_fail:
        trace_rpc_bad_callhdr(task);
-       rpc_exit(task, error);
+       rpc_call_rpcerror(task, error);
        return error;
 }
 
@@ -2628,7 +2640,7 @@ out_garbage:
                return -EAGAIN;
        }
 out_err:
-       rpc_exit(task, error);
+       rpc_call_rpcerror(task, error);
        return error;
 
 out_unparsable:
index 1f275ab..360afe1 100644 (file)
@@ -541,33 +541,14 @@ rpc_wake_up_task_on_wq_queue_action_locked(struct workqueue_struct *wq,
        return NULL;
 }
 
-static void
-rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq,
-               struct rpc_wait_queue *queue, struct rpc_task *task)
-{
-       rpc_wake_up_task_on_wq_queue_action_locked(wq, queue, task, NULL, NULL);
-}
-
 /*
  * Wake up a queued task while the queue lock is being held
  */
-static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task)
-{
-       rpc_wake_up_task_on_wq_queue_locked(rpciod_workqueue, queue, task);
-}
-
-/*
- * Wake up a task on a specific queue
- */
-void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
-               struct rpc_wait_queue *queue,
-               struct rpc_task *task)
+static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue,
+                                         struct rpc_task *task)
 {
-       if (!RPC_IS_QUEUED(task))
-               return;
-       spin_lock(&queue->lock);
-       rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
-       spin_unlock(&queue->lock);
+       rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
+                                                  task, NULL, NULL);
 }
 
 /*
@@ -930,8 +911,10 @@ static void __rpc_execute(struct rpc_task *task)
                /*
                 * Signalled tasks should exit rather than sleep.
                 */
-               if (RPC_SIGNALLED(task))
+               if (RPC_SIGNALLED(task)) {
+                       task->tk_rpc_status = -ERESTARTSYS;
                        rpc_exit(task, -ERESTARTSYS);
+               }
 
                /*
                 * The queue->lock protects against races with
@@ -967,6 +950,7 @@ static void __rpc_execute(struct rpc_task *task)
                         */
                        dprintk("RPC: %5u got signal\n", task->tk_pid);
                        set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
+                       task->tk_rpc_status = -ERESTARTSYS;
                        rpc_exit(task, -ERESTARTSYS);
                }
                dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
index 220b799..d11b705 100644 (file)
@@ -1233,8 +1233,8 @@ svc_generic_init_request(struct svc_rqst *rqstp,
 
        if (rqstp->rq_vers >= progp->pg_nvers )
                goto err_bad_vers;
-         versp = progp->pg_vers[rqstp->rq_vers];
-         if (!versp)
+       versp = progp->pg_vers[rqstp->rq_vers];
+       if (!versp)
                goto err_bad_vers;
 
        /*
index 48c93b9..14ba9e7 100644 (file)
@@ -560,7 +560,7 @@ EXPORT_SYMBOL_GPL(xdr_init_encode);
  * required at the end of encoding, or any other time when the xdr_buf
  * data might be read.
  */
-void xdr_commit_encode(struct xdr_stream *xdr)
+inline void xdr_commit_encode(struct xdr_stream *xdr)
 {
        int shift = xdr->scratch.iov_len;
        void *page;
@@ -1236,43 +1236,60 @@ xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
 }
 EXPORT_SYMBOL_GPL(xdr_encode_word);
 
-/* If the netobj starting offset bytes from the start of xdr_buf is contained
- * entirely in the head or the tail, set object to point to it; otherwise
- * try to find space for it at the end of the tail, copy it there, and
- * set obj to point to it. */
-int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned int offset)
+/**
+ * xdr_buf_read_mic() - obtain the address of the GSS mic from xdr buf
+ * @buf: pointer to buffer containing a mic
+ * @mic: on success, returns the address of the mic
+ * @offset: the offset in buf where mic may be found
+ *
+ * This function may modify the xdr buf if the mic is found to be straddling
+ * a boundary between head, pages, and tail.  On success the mic can be read
+ * from the address returned.  There is no need to free the mic.
+ *
+ * Return: Success returns 0, otherwise an integer error.
+ */
+int xdr_buf_read_mic(struct xdr_buf *buf, struct xdr_netobj *mic, unsigned int offset)
 {
        struct xdr_buf subbuf;
+       unsigned int boundary;
 
-       if (xdr_decode_word(buf, offset, &obj->len))
+       if (xdr_decode_word(buf, offset, &mic->len))
                return -EFAULT;
-       if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len))
+       offset += 4;
+
+       /* Is the mic partially in the head? */
+       boundary = buf->head[0].iov_len;
+       if (offset < boundary && (offset + mic->len) > boundary)
+               xdr_shift_buf(buf, boundary - offset);
+
+       /* Is the mic partially in the pages? */
+       boundary += buf->page_len;
+       if (offset < boundary && (offset + mic->len) > boundary)
+               xdr_shrink_pagelen(buf, boundary - offset);
+
+       if (xdr_buf_subsegment(buf, &subbuf, offset, mic->len))
                return -EFAULT;
 
-       /* Is the obj contained entirely in the head? */
-       obj->data = subbuf.head[0].iov_base;
-       if (subbuf.head[0].iov_len == obj->len)
+       /* Is the mic contained entirely in the head? */
+       mic->data = subbuf.head[0].iov_base;
+       if (subbuf.head[0].iov_len == mic->len)
                return 0;
-       /* ..or is the obj contained entirely in the tail? */
-       obj->data = subbuf.tail[0].iov_base;
-       if (subbuf.tail[0].iov_len == obj->len)
+       /* ..or is the mic contained entirely in the tail? */
+       mic->data = subbuf.tail[0].iov_base;
+       if (subbuf.tail[0].iov_len == mic->len)
                return 0;
 
-       /* use end of tail as storage for obj:
-        * (We don't copy to the beginning because then we'd have
-        * to worry about doing a potentially overlapping copy.
-        * This assumes the object is at most half the length of the
-        * tail.) */
-       if (obj->len > buf->buflen - buf->len)
+       /* Find a contiguous area in @buf to hold all of @mic */
+       if (mic->len > buf->buflen - buf->len)
                return -ENOMEM;
        if (buf->tail[0].iov_len != 0)
-               obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
+               mic->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
        else
-               obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
-       __read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len);
+               mic->data = buf->head[0].iov_base + buf->head[0].iov_len;
+       __read_bytes_from_xdr_buf(&subbuf, mic->data, mic->len);
        return 0;
 }
-EXPORT_SYMBOL_GPL(xdr_buf_read_netobj);
+EXPORT_SYMBOL_GPL(xdr_buf_read_mic);
 
 /* Returns 0 on success, or else a negative error code. */
 static int
index 2e71f54..41df4c5 100644 (file)
@@ -456,6 +456,12 @@ void xprt_release_rqst_cong(struct rpc_task *task)
 }
 EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
 
+static void xprt_clear_congestion_window_wait_locked(struct rpc_xprt *xprt)
+{
+       if (test_and_clear_bit(XPRT_CWND_WAIT, &xprt->state))
+               __xprt_lock_write_next_cong(xprt);
+}
+
 /*
  * Clear the congestion window wait flag and wake up the next
  * entry on xprt->sending
@@ -671,6 +677,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
        spin_lock(&xprt->transport_lock);
        xprt_clear_connected(xprt);
        xprt_clear_write_space_locked(xprt);
+       xprt_clear_congestion_window_wait_locked(xprt);
        xprt_wake_pending_tasks(xprt, -ENOTCONN);
        spin_unlock(&xprt->transport_lock);
 }
@@ -1323,6 +1330,36 @@ xprt_request_dequeue_transmit(struct rpc_task *task)
        spin_unlock(&xprt->queue_lock);
 }
 
+/**
+ * xprt_request_dequeue_xprt - remove a task from the transmit+receive queue
+ * @task: pointer to rpc_task
+ *
+ * Remove a task from the transmit and receive queues, and ensure that
+ * it is not pinned by the receive work item.
+ */
+void
+xprt_request_dequeue_xprt(struct rpc_task *task)
+{
+       struct rpc_rqst *req = task->tk_rqstp;
+       struct rpc_xprt *xprt = req->rq_xprt;
+
+       if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) ||
+           test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
+           xprt_is_pinned_rqst(req)) {
+               spin_lock(&xprt->queue_lock);
+               xprt_request_dequeue_transmit_locked(task);
+               xprt_request_dequeue_receive_locked(task);
+               while (xprt_is_pinned_rqst(req)) {
+                       set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+                       spin_unlock(&xprt->queue_lock);
+                       xprt_wait_on_pinned_rqst(req);
+                       spin_lock(&xprt->queue_lock);
+                       clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+               }
+               spin_unlock(&xprt->queue_lock);
+       }
+}
+
 /**
  * xprt_request_prepare - prepare an encoded request for transport
  * @req: pointer to rpc_rqst
@@ -1747,28 +1784,6 @@ void xprt_retry_reserve(struct rpc_task *task)
        xprt_do_reserve(xprt, task);
 }
 
-static void
-xprt_request_dequeue_all(struct rpc_task *task, struct rpc_rqst *req)
-{
-       struct rpc_xprt *xprt = req->rq_xprt;
-
-       if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) ||
-           test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
-           xprt_is_pinned_rqst(req)) {
-               spin_lock(&xprt->queue_lock);
-               xprt_request_dequeue_transmit_locked(task);
-               xprt_request_dequeue_receive_locked(task);
-               while (xprt_is_pinned_rqst(req)) {
-                       set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
-                       spin_unlock(&xprt->queue_lock);
-                       xprt_wait_on_pinned_rqst(req);
-                       spin_lock(&xprt->queue_lock);
-                       clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
-               }
-               spin_unlock(&xprt->queue_lock);
-       }
-}
-
 /**
  * xprt_release - release an RPC request slot
  * @task: task which is finished with the slot
@@ -1788,7 +1803,7 @@ void xprt_release(struct rpc_task *task)
        }
 
        xprt = req->rq_xprt;
-       xprt_request_dequeue_all(task, req);
+       xprt_request_dequeue_xprt(task);
        spin_lock(&xprt->transport_lock);
        xprt->ops->release_xprt(xprt, task);
        if (xprt->ops->release_request)
@@ -1927,6 +1942,11 @@ static void xprt_destroy_cb(struct work_struct *work)
        rpc_destroy_wait_queue(&xprt->sending);
        rpc_destroy_wait_queue(&xprt->backlog);
        kfree(xprt->servername);
+       /*
+        * Destroy any existing back channel
+        */
+       xprt_destroy_backchannel(xprt, UINT_MAX);
+
        /*
         * Tear down transport state and free the rpc_xprt
         */
index 59e624b..b458bf5 100644 (file)
@@ -54,9 +54,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
 
 unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *xprt)
 {
-       struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
-
-       return r_xprt->rx_buf.rb_bc_srv_max_requests;
+       return RPCRDMA_BACKWARD_WRS >> 1;
 }
 
 static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
@@ -165,6 +163,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
        spin_lock(&xprt->bc_pa_lock);
        list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
        spin_unlock(&xprt->bc_pa_lock);
+       xprt_put(xprt);
 }
 
 static struct rpc_rqst *rpcrdma_bc_rqst_get(struct rpcrdma_xprt *r_xprt)
@@ -261,6 +260,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
 
        /* Queue rqst for ULP's callback service */
        bc_serv = xprt->bc_serv;
+       xprt_get(xprt);
        spin_lock(&bc_serv->sv_cb_lock);
        list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list);
        spin_unlock(&bc_serv->sv_cb_lock);
index 0b6dad7..30065a2 100644 (file)
@@ -7,67 +7,37 @@
 /* Lightweight memory registration using Fast Registration Work
  * Requests (FRWR).
  *
- * FRWR features ordered asynchronous registration and deregistration
- * of arbitrarily sized memory regions. This is the fastest and safest
+ * FRWR features ordered asynchronous registration and invalidation
+ * of arbitrarily-sized memory regions. This is the fastest and safest
  * but most complex memory registration mode.
  */
 
 /* Normal operation
  *
- * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
+ * A Memory Region is prepared for RDMA Read or Write using a FAST_REG
  * Work Request (frwr_map). When the RDMA operation is finished, this
  * Memory Region is invalidated using a LOCAL_INV Work Request
- * (frwr_unmap_sync).
+ * (frwr_unmap_async and frwr_unmap_sync).
  *
- * Typically these Work Requests are not signaled, and neither are RDMA
- * SEND Work Requests (with the exception of signaling occasionally to
- * prevent provider work queue overflows). This greatly reduces HCA
+ * Typically FAST_REG Work Requests are not signaled, and neither are
+ * RDMA Send Work Requests (with the exception of signaling occasionally
+ * to prevent provider work queue overflows). This greatly reduces HCA
  * interrupt workload.
- *
- * As an optimization, frwr_unmap marks MRs INVALID before the
- * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
- * rb_mrs immediately so that no work (like managing a linked list
- * under a spinlock) is needed in the completion upcall.
- *
- * But this means that frwr_map() can occasionally encounter an MR
- * that is INVALID but the LOCAL_INV WR has not completed. Work Queue
- * ordering prevents a subsequent FAST_REG WR from executing against
- * that MR while it is still being invalidated.
  */
 
 /* Transport recovery
  *
- * ->op_map and the transport connect worker cannot run at the same
- * time, but ->op_unmap can fire while the transport connect worker
- * is running. Thus MR recovery is handled in ->op_map, to guarantee
- * that recovered MRs are owned by a sending RPC, and not one where
- * ->op_unmap could fire at the same time transport reconnect is
- * being done.
- *
- * When the underlying transport disconnects, MRs are left in one of
- * four states:
- *
- * INVALID:    The MR was not in use before the QP entered ERROR state.
- *
- * VALID:      The MR was registered before the QP entered ERROR state.
- *
- * FLUSHED_FR: The MR was being registered when the QP entered ERROR
- *             state, and the pending WR was flushed.
- *
- * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR
- *             state, and the pending WR was flushed.
- *
- * When frwr_map encounters FLUSHED and VALID MRs, they are recovered
- * with ib_dereg_mr and then are re-initialized. Because MR recovery
- * allocates fresh resources, it is deferred to a workqueue, and the
- * recovered MRs are placed back on the rb_mrs list when recovery is
- * complete. frwr_map allocates another MR for the current RPC while
- * the broken MR is reset.
- *
- * To ensure that frwr_map doesn't encounter an MR that is marked
- * INVALID but that is about to be flushed due to a previous transport
- * disconnect, the transport connect worker attempts to drain all
- * pending send queue WRs before the transport is reconnected.
+ * frwr_map and frwr_unmap_* cannot run at the same time the transport
+ * connect worker is running. The connect worker holds the transport
+ * send lock, just as ->send_request does. This prevents frwr_map and
+ * the connect worker from running concurrently. When a connection is
+ * closed, the Receive completion queue is drained before the allowing
+ * the connect worker to get control. This prevents frwr_unmap and the
+ * connect worker from running concurrently.
+ *
+ * When the underlying transport disconnects, MRs that are in flight
+ * are flushed and are likely unusable. Thus all flushed MRs are
+ * destroyed. New MRs are created on demand.
  */
 
 #include <linux/sunrpc/rpc_rdma.h>
@@ -118,15 +88,8 @@ void frwr_release_mr(struct rpcrdma_mr *mr)
        kfree(mr);
 }
 
-/* MRs are dynamically allocated, so simply clean up and release the MR.
- * A replacement MR will subsequently be allocated on demand.
- */
-static void
-frwr_mr_recycle_worker(struct work_struct *work)
+static void frwr_mr_recycle(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
 {
-       struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
-       struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-
        trace_xprtrdma_mr_recycle(mr);
 
        if (mr->mr_dir != DMA_NONE) {
@@ -136,14 +99,40 @@ frwr_mr_recycle_worker(struct work_struct *work)
                mr->mr_dir = DMA_NONE;
        }
 
-       spin_lock(&r_xprt->rx_buf.rb_mrlock);
+       spin_lock(&r_xprt->rx_buf.rb_lock);
        list_del(&mr->mr_all);
        r_xprt->rx_stats.mrs_recycled++;
-       spin_unlock(&r_xprt->rx_buf.rb_mrlock);
+       spin_unlock(&r_xprt->rx_buf.rb_lock);
 
        frwr_release_mr(mr);
 }
 
+/* MRs are dynamically allocated, so simply clean up and release the MR.
+ * A replacement MR will subsequently be allocated on demand.
+ */
+static void
+frwr_mr_recycle_worker(struct work_struct *work)
+{
+       struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr,
+                                            mr_recycle);
+
+       frwr_mr_recycle(mr->mr_xprt, mr);
+}
+
+/* frwr_recycle - Discard MRs
+ * @req: request to reset
+ *
+ * Used after a reconnect. These MRs could be in flight, we can't
+ * tell. Safe thing to do is release them.
+ */
+void frwr_recycle(struct rpcrdma_req *req)
+{
+       struct rpcrdma_mr *mr;
+
+       while ((mr = rpcrdma_mr_pop(&req->rl_registered)))
+               frwr_mr_recycle(mr->mr_xprt, mr);
+}
+
 /* frwr_reset - Place MRs back on the free list
  * @req: request to reset
  *
@@ -156,12 +145,10 @@ frwr_mr_recycle_worker(struct work_struct *work)
  */
 void frwr_reset(struct rpcrdma_req *req)
 {
-       while (!list_empty(&req->rl_registered)) {
-               struct rpcrdma_mr *mr;
+       struct rpcrdma_mr *mr;
 
-               mr = rpcrdma_mr_pop(&req->rl_registered);
-               rpcrdma_mr_unmap_and_put(mr);
-       }
+       while ((mr = rpcrdma_mr_pop(&req->rl_registered)))
+               rpcrdma_mr_put(mr);
 }
 
 /**
@@ -179,11 +166,14 @@ int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
        struct ib_mr *frmr;
        int rc;
 
+       /* NB: ib_alloc_mr and device drivers typically allocate
+        *     memory with GFP_KERNEL.
+        */
        frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
        if (IS_ERR(frmr))
                goto out_mr_err;
 
-       sg = kcalloc(depth, sizeof(*sg), GFP_KERNEL);
+       sg = kcalloc(depth, sizeof(*sg), GFP_NOFS);
        if (!sg)
                goto out_list_err;
 
@@ -203,8 +193,6 @@ out_mr_err:
        return rc;
 
 out_list_err:
-       dprintk("RPC:       %s: sg allocation failure\n",
-               __func__);
        ib_dereg_mr(frmr);
        return -ENOMEM;
 }
@@ -290,8 +278,8 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
        ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
        ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
 
-       ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
-                               ia->ri_max_frwr_depth);
+       ia->ri_max_segs =
+               DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ia->ri_max_frwr_depth);
        /* Reply chunks require segments for head and tail buffers */
        ia->ri_max_segs += 2;
        if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
@@ -323,31 +311,25 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
  * @nsegs: number of segments remaining
  * @writing: true when RDMA Write will be used
  * @xid: XID of RPC using the registered memory
- * @out: initialized MR
+ * @mr: MR to fill in
  *
  * Prepare a REG_MR Work Request to register a memory region
  * for remote access via RDMA READ or RDMA WRITE.
  *
  * Returns the next segment or a negative errno pointer.
- * On success, the prepared MR is planted in @out.
+ * On success, @mr is filled in.
  */
 struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
                                struct rpcrdma_mr_seg *seg,
                                int nsegs, bool writing, __be32 xid,
-                               struct rpcrdma_mr **out)
+                               struct rpcrdma_mr *mr)
 {
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-       bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
-       struct rpcrdma_mr *mr;
-       struct ib_mr *ibmr;
        struct ib_reg_wr *reg_wr;
+       struct ib_mr *ibmr;
        int i, n;
        u8 key;
 
-       mr = rpcrdma_mr_get(r_xprt);
-       if (!mr)
-               goto out_getmr_err;
-
        if (nsegs > ia->ri_max_frwr_depth)
                nsegs = ia->ri_max_frwr_depth;
        for (i = 0; i < nsegs;) {
@@ -362,7 +344,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
 
                ++seg;
                ++i;
-               if (holes_ok)
+               if (ia->ri_mrtype == IB_MR_TYPE_SG_GAPS)
                        continue;
                if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
                    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
@@ -397,22 +379,15 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
        mr->mr_offset = ibmr->iova;
        trace_xprtrdma_mr_map(mr);
 
-       *out = mr;
        return seg;
 
-out_getmr_err:
-       xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
-       return ERR_PTR(-EAGAIN);
-
 out_dmamap_err:
        mr->mr_dir = DMA_NONE;
        trace_xprtrdma_frwr_sgerr(mr, i);
-       rpcrdma_mr_put(mr);
        return ERR_PTR(-EIO);
 
 out_mapmr_err:
        trace_xprtrdma_frwr_maperr(mr, n);
-       rpcrdma_mr_recycle(mr);
        return ERR_PTR(-EIO);
 }
 
@@ -485,7 +460,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
                if (mr->mr_handle == rep->rr_inv_rkey) {
                        list_del_init(&mr->mr_list);
                        trace_xprtrdma_mr_remoteinv(mr);
-                       rpcrdma_mr_unmap_and_put(mr);
+                       rpcrdma_mr_put(mr);
                        break;  /* only one invalidated MR per RPC */
                }
 }
@@ -495,7 +470,7 @@ static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr)
        if (wc->status != IB_WC_SUCCESS)
                rpcrdma_mr_recycle(mr);
        else
-               rpcrdma_mr_unmap_and_put(mr);
+               rpcrdma_mr_put(mr);
 }
 
 /**
@@ -532,8 +507,8 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
 
        /* WARNING: Only wr_cqe and status are reliable at this point */
        trace_xprtrdma_wc_li_wake(wc, frwr);
-       complete(&frwr->fr_linv_done);
        __frwr_release_mr(wc, mr);
+       complete(&frwr->fr_linv_done);
 }
 
 /**
@@ -562,8 +537,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
         */
        frwr = NULL;
        prev = &first;
-       while (!list_empty(&req->rl_registered)) {
-               mr = rpcrdma_mr_pop(&req->rl_registered);
+       while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
 
                trace_xprtrdma_mr_localinv(mr);
                r_xprt->rx_stats.local_inv_needed++;
@@ -632,11 +606,15 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
        struct rpcrdma_frwr *frwr =
                container_of(cqe, struct rpcrdma_frwr, fr_cqe);
        struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+       struct rpcrdma_rep *rep = mr->mr_req->rl_reply;
 
        /* WARNING: Only wr_cqe and status are reliable at this point */
        trace_xprtrdma_wc_li_done(wc, frwr);
-       rpcrdma_complete_rqst(frwr->fr_req->rl_reply);
        __frwr_release_mr(wc, mr);
+
+       /* Ensure @rep is generated before __frwr_release_mr */
+       smp_rmb();
+       rpcrdma_complete_rqst(rep);
 }
 
 /**
@@ -662,15 +640,13 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
         */
        frwr = NULL;
        prev = &first;
-       while (!list_empty(&req->rl_registered)) {
-               mr = rpcrdma_mr_pop(&req->rl_registered);
+       while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
 
                trace_xprtrdma_mr_localinv(mr);
                r_xprt->rx_stats.local_inv_needed++;
 
                frwr = &mr->frwr;
                frwr->fr_cqe.done = frwr_wc_localinv;
-               frwr->fr_req = req;
                last = &frwr->fr_invwr;
                last->next = NULL;
                last->wr_cqe = &frwr->fr_cqe;
index 4345e69..b86b5fd 100644 (file)
@@ -342,6 +342,32 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
        return 0;
 }
 
+static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
+                                                struct rpcrdma_req *req,
+                                                struct rpcrdma_mr_seg *seg,
+                                                int nsegs, bool writing,
+                                                struct rpcrdma_mr **mr)
+{
+       *mr = rpcrdma_mr_pop(&req->rl_free_mrs);
+       if (!*mr) {
+               *mr = rpcrdma_mr_get(r_xprt);
+               if (!*mr)
+                       goto out_getmr_err;
+               trace_xprtrdma_mr_get(req);
+               (*mr)->mr_req = req;
+       }
+
+       rpcrdma_mr_push(*mr, &req->rl_registered);
+       return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
+
+out_getmr_err:
+       trace_xprtrdma_nomrs(req);
+       xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
+       if (r_xprt->rx_ep.rep_connected != -ENODEV)
+               schedule_work(&r_xprt->rx_buf.rb_refresh_worker);
+       return ERR_PTR(-EAGAIN);
+}
+
 /* Register and XDR encode the Read list. Supports encoding a list of read
  * segments that belong to a single read chunk.
  *
@@ -356,9 +382,10 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
  *
  * Only a single @pos value is currently supported.
  */
-static noinline int
-rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-                        struct rpc_rqst *rqst, enum rpcrdma_chunktype rtype)
+static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
+                                   struct rpcrdma_req *req,
+                                   struct rpc_rqst *rqst,
+                                   enum rpcrdma_chunktype rtype)
 {
        struct xdr_stream *xdr = &req->rl_stream;
        struct rpcrdma_mr_seg *seg;
@@ -379,10 +406,9 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
                return nsegs;
 
        do {
-               seg = frwr_map(r_xprt, seg, nsegs, false, rqst->rq_xid, &mr);
+               seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, false, &mr);
                if (IS_ERR(seg))
                        return PTR_ERR(seg);
-               rpcrdma_mr_push(mr, &req->rl_registered);
 
                if (encode_read_segment(xdr, mr, pos) < 0)
                        return -EMSGSIZE;
@@ -411,9 +437,10 @@ done:
  *
  * Only a single Write chunk is currently supported.
  */
-static noinline int
-rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-                         struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype)
+static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
+                                    struct rpcrdma_req *req,
+                                    struct rpc_rqst *rqst,
+                                    enum rpcrdma_chunktype wtype)
 {
        struct xdr_stream *xdr = &req->rl_stream;
        struct rpcrdma_mr_seg *seg;
@@ -440,10 +467,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 
        nchunks = 0;
        do {
-               seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr);
+               seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
                if (IS_ERR(seg))
                        return PTR_ERR(seg);
-               rpcrdma_mr_push(mr, &req->rl_registered);
 
                if (encode_rdma_segment(xdr, mr) < 0)
                        return -EMSGSIZE;
@@ -474,9 +500,10 @@ done:
  * Returns zero on success, or a negative errno if a failure occurred.
  * @xdr is advanced to the next position in the stream.
  */
-static noinline int
-rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-                          struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype)
+static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
+                                     struct rpcrdma_req *req,
+                                     struct rpc_rqst *rqst,
+                                     enum rpcrdma_chunktype wtype)
 {
        struct xdr_stream *xdr = &req->rl_stream;
        struct rpcrdma_mr_seg *seg;
@@ -501,10 +528,9 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 
        nchunks = 0;
        do {
-               seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr);
+               seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
                if (IS_ERR(seg))
                        return PTR_ERR(seg);
-               rpcrdma_mr_push(mr, &req->rl_registered);
 
                if (encode_rdma_segment(xdr, mr) < 0)
                        return -EMSGSIZE;
@@ -841,12 +867,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
         * chunks. Very likely the connection has been replaced,
         * so these registrations are invalid and unusable.
         */
-       while (unlikely(!list_empty(&req->rl_registered))) {
-               struct rpcrdma_mr *mr;
-
-               mr = rpcrdma_mr_pop(&req->rl_registered);
-               rpcrdma_mr_recycle(mr);
-       }
+       frwr_recycle(req);
 
        /* This implementation supports the following combinations
         * of chunk lists in one RPC-over-RDMA Call message:
@@ -1240,8 +1261,6 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
        struct rpc_rqst *rqst = rep->rr_rqst;
        int status;
 
-       xprt->reestablish_timeout = 0;
-
        switch (rep->rr_proc) {
        case rdma_msg:
                status = rpcrdma_decode_msg(r_xprt, rep, rqst);
@@ -1300,6 +1319,12 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
        u32 credits;
        __be32 *p;
 
+       /* Any data means we had a useful conversation, so
+        * then we don't need to delay the next reconnect.
+        */
+       if (xprt->reestablish_timeout)
+               xprt->reestablish_timeout = 0;
+
        /* Fixed transport header fields */
        xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
                        rep->rr_hdrbuf.head[0].iov_base, NULL);
index abdb300..97bca50 100644 (file)
@@ -73,8 +73,6 @@ atomic_t rdma_stat_rq_prod;
 atomic_t rdma_stat_sq_poll;
 atomic_t rdma_stat_sq_prod;
 
-struct workqueue_struct *svc_rdma_wq;
-
 /*
  * This function implements reading and resetting an atomic_t stat
  * variable through read/write to a proc file. Any write to the file
@@ -230,7 +228,6 @@ static struct ctl_table svcrdma_root_table[] = {
 void svc_rdma_cleanup(void)
 {
        dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
-       destroy_workqueue(svc_rdma_wq);
        if (svcrdma_table_header) {
                unregister_sysctl_table(svcrdma_table_header);
                svcrdma_table_header = NULL;
@@ -246,10 +243,6 @@ int svc_rdma_init(void)
        dprintk("\tmax_bc_requests  : %u\n", svcrdma_max_bc_requests);
        dprintk("\tmax_inline       : %d\n", svcrdma_max_req_size);
 
-       svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
-       if (!svc_rdma_wq)
-               return -ENOMEM;
-
        if (!svcrdma_table_header)
                svcrdma_table_header =
                        register_sysctl_table(svcrdma_root_table);
index 65e2fb9..96bccd3 100644 (file)
@@ -172,9 +172,10 @@ static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
 void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
 {
        struct svc_rdma_recv_ctxt *ctxt;
+       struct llist_node *node;
 
-       while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) {
-               list_del(&ctxt->rc_list);
+       while ((node = llist_del_first(&rdma->sc_recv_ctxts))) {
+               ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
                svc_rdma_recv_ctxt_destroy(rdma, ctxt);
        }
 }
@@ -183,21 +184,18 @@ static struct svc_rdma_recv_ctxt *
 svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
 {
        struct svc_rdma_recv_ctxt *ctxt;
+       struct llist_node *node;
 
-       spin_lock(&rdma->sc_recv_lock);
-       ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts);
-       if (!ctxt)
+       node = llist_del_first(&rdma->sc_recv_ctxts);
+       if (!node)
                goto out_empty;
-       list_del(&ctxt->rc_list);
-       spin_unlock(&rdma->sc_recv_lock);
+       ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
 
 out:
        ctxt->rc_page_count = 0;
        return ctxt;
 
 out_empty:
-       spin_unlock(&rdma->sc_recv_lock);
-
        ctxt = svc_rdma_recv_ctxt_alloc(rdma);
        if (!ctxt)
                return NULL;
@@ -218,11 +216,9 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
        for (i = 0; i < ctxt->rc_page_count; i++)
                put_page(ctxt->rc_pages[i]);
 
-       if (!ctxt->rc_temp) {
-               spin_lock(&rdma->sc_recv_lock);
-               list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);
-               spin_unlock(&rdma->sc_recv_lock);
-       } else
+       if (!ctxt->rc_temp)
+               llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
+       else
                svc_rdma_recv_ctxt_destroy(rdma, ctxt);
 }
 
index 4d3db6e..145a361 100644 (file)
@@ -140,14 +140,13 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
        INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
        INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
        INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
-       INIT_LIST_HEAD(&cma_xprt->sc_recv_ctxts);
+       init_llist_head(&cma_xprt->sc_recv_ctxts);
        INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
        init_waitqueue_head(&cma_xprt->sc_send_wait);
 
        spin_lock_init(&cma_xprt->sc_lock);
        spin_lock_init(&cma_xprt->sc_rq_dto_lock);
        spin_lock_init(&cma_xprt->sc_send_lock);
-       spin_lock_init(&cma_xprt->sc_recv_lock);
        spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
 
        /*
@@ -630,8 +629,9 @@ static void svc_rdma_free(struct svc_xprt *xprt)
 {
        struct svcxprt_rdma *rdma =
                container_of(xprt, struct svcxprt_rdma, sc_xprt);
+
        INIT_WORK(&rdma->sc_work, __svc_rdma_free);
-       queue_work(svc_rdma_wq, &rdma->sc_work);
+       schedule_work(&rdma->sc_work);
 }
 
 static int svc_rdma_has_wspace(struct svc_xprt *xprt)
index 2ec349e..160558b 100644 (file)
@@ -423,8 +423,6 @@ void xprt_rdma_close(struct rpc_xprt *xprt)
 
        if (ep->rep_connected == -ENODEV)
                return;
-       if (ep->rep_connected > 0)
-               xprt->reestablish_timeout = 0;
        rpcrdma_ep_disconnect(ep, ia);
 
        /* Prepare @xprt for the next connection by reinitializing
@@ -434,6 +432,7 @@ void xprt_rdma_close(struct rpc_xprt *xprt)
        xprt->cwnd = RPC_CWNDSHIFT;
 
 out:
+       xprt->reestablish_timeout = 0;
        ++xprt->connect_cookie;
        xprt_disconnect_done(xprt);
 }
@@ -494,9 +493,9 @@ xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
  * @reconnect_timeout: reconnect timeout after server disconnects
  *
  */
-static void xprt_rdma_tcp_set_connect_timeout(struct rpc_xprt *xprt,
-                                             unsigned long connect_timeout,
-                                             unsigned long reconnect_timeout)
+static void xprt_rdma_set_connect_timeout(struct rpc_xprt *xprt,
+                                         unsigned long connect_timeout,
+                                         unsigned long reconnect_timeout)
 {
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 
@@ -571,6 +570,7 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
        return;
 
 out_sleep:
+       set_bit(XPRT_CONGESTED, &xprt->state);
        rpc_sleep_on(&xprt->backlog, task, NULL);
        task->tk_status = -EAGAIN;
 }
@@ -589,7 +589,8 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
 
        memset(rqst, 0, sizeof(*rqst));
        rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
-       rpc_wake_up_next(&xprt->backlog);
+       if (unlikely(!rpc_wake_up_next(&xprt->backlog)))
+               clear_bit(XPRT_CONGESTED, &xprt->state);
 }
 
 static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
@@ -803,7 +804,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
        .send_request           = xprt_rdma_send_request,
        .close                  = xprt_rdma_close,
        .destroy                = xprt_rdma_destroy,
-       .set_connect_timeout    = xprt_rdma_tcp_set_connect_timeout,
+       .set_connect_timeout    = xprt_rdma_set_connect_timeout,
        .print_stats            = xprt_rdma_print_stats,
        .enable_swap            = xprt_rdma_enable_swap,
        .disable_swap           = xprt_rdma_disable_swap,
index b10aa16..3a90753 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/slab.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc_rdma.h>
+#include <linux/log2.h>
 
 #include <asm-generic/barrier.h>
 #include <asm/bitops.h>
  * internal functions
  */
 static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
+static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf);
 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
 static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
+static void rpcrdma_mr_free(struct rpcrdma_mr *mr);
 static struct rpcrdma_regbuf *
 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
                     gfp_t flags);
@@ -405,9 +408,8 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
        struct rpcrdma_ep *ep = &r_xprt->rx_ep;
        struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
        struct rpcrdma_req *req;
-       struct rpcrdma_rep *rep;
 
-       cancel_delayed_work_sync(&buf->rb_refresh_worker);
+       cancel_work_sync(&buf->rb_refresh_worker);
 
        /* This is similar to rpcrdma_ep_destroy, but:
         * - Don't cancel the connect worker.
@@ -429,8 +431,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
        /* The ULP is responsible for ensuring all DMA
         * mappings and MRs are gone.
         */
-       list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
-               rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
+       rpcrdma_reps_destroy(buf);
        list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
                rpcrdma_regbuf_dma_unmap(req->rl_rdmabuf);
                rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
@@ -604,10 +605,10 @@ void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
  * Unlike a normal reconnection, a fresh PD and a new set
  * of MRs and buffers is needed.
  */
-static int
-rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
-                        struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
+                                   struct ib_qp_init_attr *qp_init_attr)
 {
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
        int rc, err;
 
        trace_xprtrdma_reinsert(r_xprt);
@@ -624,7 +625,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
        }
 
        rc = -ENETUNREACH;
-       err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+       err = rdma_create_qp(ia->ri_id, ia->ri_pd, qp_init_attr);
        if (err) {
                pr_err("rpcrdma: rdma_create_qp returned %d\n", err);
                goto out3;
@@ -641,16 +642,16 @@ out1:
        return rc;
 }
 
-static int
-rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
-                    struct rpcrdma_ia *ia)
+static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt,
+                               struct ib_qp_init_attr *qp_init_attr)
 {
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
        struct rdma_cm_id *id, *old;
        int err, rc;
 
        trace_xprtrdma_reconnect(r_xprt);
 
-       rpcrdma_ep_disconnect(ep, ia);
+       rpcrdma_ep_disconnect(&r_xprt->rx_ep, ia);
 
        rc = -EHOSTUNREACH;
        id = rpcrdma_create_id(r_xprt, ia);
@@ -672,7 +673,7 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
                goto out_destroy;
        }
 
-       err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
+       err = rdma_create_qp(id, ia->ri_pd, qp_init_attr);
        if (err)
                goto out_destroy;
 
@@ -697,25 +698,27 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
        struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
                                                   rx_ia);
        struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+       struct ib_qp_init_attr qp_init_attr;
        int rc;
 
 retry:
+       memcpy(&qp_init_attr, &ep->rep_attr, sizeof(qp_init_attr));
        switch (ep->rep_connected) {
        case 0:
                dprintk("RPC:       %s: connecting...\n", __func__);
-               rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+               rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &qp_init_attr);
                if (rc) {
                        rc = -ENETUNREACH;
                        goto out_noupdate;
                }
                break;
        case -ENODEV:
-               rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia);
+               rc = rpcrdma_ep_recreate_xprt(r_xprt, &qp_init_attr);
                if (rc)
                        goto out_noupdate;
                break;
        default:
-               rc = rpcrdma_ep_reconnect(r_xprt, ep, ia);
+               rc = rpcrdma_ep_reconnect(r_xprt, &qp_init_attr);
                if (rc)
                        goto out;
        }
@@ -729,6 +732,8 @@ retry:
        if (rc)
                goto out;
 
+       if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
+               xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
        wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
        if (ep->rep_connected <= 0) {
                if (ep->rep_connected == -EAGAIN)
@@ -942,14 +947,12 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
        struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
        unsigned int count;
-       LIST_HEAD(free);
-       LIST_HEAD(all);
 
        for (count = 0; count < ia->ri_max_segs; count++) {
                struct rpcrdma_mr *mr;
                int rc;
 
-               mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+               mr = kzalloc(sizeof(*mr), GFP_NOFS);
                if (!mr)
                        break;
 
@@ -961,15 +964,13 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
 
                mr->mr_xprt = r_xprt;
 
-               list_add(&mr->mr_list, &free);
-               list_add(&mr->mr_all, &all);
+               spin_lock(&buf->rb_lock);
+               list_add(&mr->mr_list, &buf->rb_mrs);
+               list_add(&mr->mr_all, &buf->rb_all_mrs);
+               spin_unlock(&buf->rb_lock);
        }
 
-       spin_lock(&buf->rb_mrlock);
-       list_splice(&free, &buf->rb_mrs);
-       list_splice(&all, &buf->rb_all);
        r_xprt->rx_stats.mrs_allocated += count;
-       spin_unlock(&buf->rb_mrlock);
        trace_xprtrdma_createmrs(r_xprt, count);
 }
 
@@ -977,7 +978,7 @@ static void
 rpcrdma_mr_refresh_worker(struct work_struct *work)
 {
        struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
-                                                 rb_refresh_worker.work);
+                                                 rb_refresh_worker);
        struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
                                                   rx_buf);
 
@@ -999,12 +1000,18 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
        struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
        struct rpcrdma_regbuf *rb;
        struct rpcrdma_req *req;
+       size_t maxhdrsize;
 
        req = kzalloc(sizeof(*req), flags);
        if (req == NULL)
                goto out1;
 
-       rb = rpcrdma_regbuf_alloc(RPCRDMA_HDRBUF_SIZE, DMA_TO_DEVICE, flags);
+       /* Compute maximum header buffer size in bytes */
+       maxhdrsize = rpcrdma_fixed_maxsz + 3 +
+                    r_xprt->rx_ia.ri_max_segs * rpcrdma_readchunk_maxsz;
+       maxhdrsize *= sizeof(__be32);
+       rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize),
+                                 DMA_TO_DEVICE, flags);
        if (!rb)
                goto out2;
        req->rl_rdmabuf = rb;
@@ -1018,6 +1025,7 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
        if (!req->rl_recvbuf)
                goto out4;
 
+       INIT_LIST_HEAD(&req->rl_free_mrs);
        INIT_LIST_HEAD(&req->rl_registered);
        spin_lock(&buffer->rb_lock);
        list_add(&req->rl_all, &buffer->rb_allreqs);
@@ -1065,6 +1073,40 @@ out:
        return NULL;
 }
 
+static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+{
+       rpcrdma_regbuf_free(rep->rr_rdmabuf);
+       kfree(rep);
+}
+
+static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
+{
+       struct llist_node *node;
+
+       /* Calls to llist_del_first are required to be serialized */
+       node = llist_del_first(&buf->rb_free_reps);
+       if (!node)
+               return NULL;
+       return llist_entry(node, struct rpcrdma_rep, rr_node);
+}
+
+static void rpcrdma_rep_put(struct rpcrdma_buffer *buf,
+                           struct rpcrdma_rep *rep)
+{
+       if (!rep->rr_temp)
+               llist_add(&rep->rr_node, &buf->rb_free_reps);
+       else
+               rpcrdma_rep_destroy(rep);
+}
+
+static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
+{
+       struct rpcrdma_rep *rep;
+
+       while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
+               rpcrdma_rep_destroy(rep);
+}
+
 /**
  * rpcrdma_buffer_create - Create initial set of req/rep objects
  * @r_xprt: transport instance to (re)initialize
@@ -1078,12 +1120,10 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 
        buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests;
        buf->rb_bc_srv_max_requests = 0;
-       spin_lock_init(&buf->rb_mrlock);
        spin_lock_init(&buf->rb_lock);
        INIT_LIST_HEAD(&buf->rb_mrs);
-       INIT_LIST_HEAD(&buf->rb_all);
-       INIT_DELAYED_WORK(&buf->rb_refresh_worker,
-                         rpcrdma_mr_refresh_worker);
+       INIT_LIST_HEAD(&buf->rb_all_mrs);
+       INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker);
 
        rpcrdma_mrs_create(r_xprt);
 
@@ -1102,7 +1142,7 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
        }
 
        buf->rb_credits = 1;
-       INIT_LIST_HEAD(&buf->rb_recv_bufs);
+       init_llist_head(&buf->rb_free_reps);
 
        rc = rpcrdma_sendctxs_create(r_xprt);
        if (rc)
@@ -1114,12 +1154,6 @@ out:
        return rc;
 }
 
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
-{
-       rpcrdma_regbuf_free(rep->rr_rdmabuf);
-       kfree(rep);
-}
-
 /**
  * rpcrdma_req_destroy - Destroy an rpcrdma_req object
  * @req: unused object to be destroyed
@@ -1127,11 +1161,13 @@ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
  * This function assumes that the caller prevents concurrent device
  * unload and transport tear-down.
  */
-void
-rpcrdma_req_destroy(struct rpcrdma_req *req)
+void rpcrdma_req_destroy(struct rpcrdma_req *req)
 {
        list_del(&req->rl_all);
 
+       while (!list_empty(&req->rl_free_mrs))
+               rpcrdma_mr_free(rpcrdma_mr_pop(&req->rl_free_mrs));
+
        rpcrdma_regbuf_free(req->rl_recvbuf);
        rpcrdma_regbuf_free(req->rl_sendbuf);
        rpcrdma_regbuf_free(req->rl_rdmabuf);
@@ -1147,25 +1183,19 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
        unsigned int count;
 
        count = 0;
-       spin_lock(&buf->rb_mrlock);
-       while (!list_empty(&buf->rb_all)) {
-               mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all);
+       spin_lock(&buf->rb_lock);
+       while ((mr = list_first_entry_or_null(&buf->rb_all_mrs,
+                                             struct rpcrdma_mr,
+                                             mr_all)) != NULL) {
                list_del(&mr->mr_all);
-
-               spin_unlock(&buf->rb_mrlock);
-
-               /* Ensure MW is not on any rl_registered list */
-               if (!list_empty(&mr->mr_list))
-                       list_del(&mr->mr_list);
+               spin_unlock(&buf->rb_lock);
 
                frwr_release_mr(mr);
                count++;
-               spin_lock(&buf->rb_mrlock);
+               spin_lock(&buf->rb_lock);
        }
-       spin_unlock(&buf->rb_mrlock);
+       spin_unlock(&buf->rb_lock);
        r_xprt->rx_stats.mrs_allocated = 0;
-
-       dprintk("RPC:       %s: released %u MRs\n", __func__, count);
 }
 
 /**
@@ -1179,18 +1209,10 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
 void
 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 {
-       cancel_delayed_work_sync(&buf->rb_refresh_worker);
+       cancel_work_sync(&buf->rb_refresh_worker);
 
        rpcrdma_sendctxs_destroy(buf);
-
-       while (!list_empty(&buf->rb_recv_bufs)) {
-               struct rpcrdma_rep *rep;
-
-               rep = list_first_entry(&buf->rb_recv_bufs,
-                                      struct rpcrdma_rep, rr_list);
-               list_del(&rep->rr_list);
-               rpcrdma_rep_destroy(rep);
-       }
+       rpcrdma_reps_destroy(buf);
 
        while (!list_empty(&buf->rb_send_bufs)) {
                struct rpcrdma_req *req;
@@ -1215,54 +1237,20 @@ struct rpcrdma_mr *
 rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
 {
        struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-       struct rpcrdma_mr *mr = NULL;
-
-       spin_lock(&buf->rb_mrlock);
-       if (!list_empty(&buf->rb_mrs))
-               mr = rpcrdma_mr_pop(&buf->rb_mrs);
-       spin_unlock(&buf->rb_mrlock);
+       struct rpcrdma_mr *mr;
 
-       if (!mr)
-               goto out_nomrs;
+       spin_lock(&buf->rb_lock);
+       mr = rpcrdma_mr_pop(&buf->rb_mrs);
+       spin_unlock(&buf->rb_lock);
        return mr;
-
-out_nomrs:
-       trace_xprtrdma_nomrs(r_xprt);
-       if (r_xprt->rx_ep.rep_connected != -ENODEV)
-               schedule_delayed_work(&buf->rb_refresh_worker, 0);
-
-       /* Allow the reply handler and refresh worker to run */
-       cond_resched();
-
-       return NULL;
-}
-
-static void
-__rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr)
-{
-       spin_lock(&buf->rb_mrlock);
-       rpcrdma_mr_push(mr, &buf->rb_mrs);
-       spin_unlock(&buf->rb_mrlock);
-}
-
-/**
- * rpcrdma_mr_put - Release an rpcrdma_mr object
- * @mr: object to release
- *
- */
-void
-rpcrdma_mr_put(struct rpcrdma_mr *mr)
-{
-       __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr);
 }
 
 /**
- * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it
- * @mr: object to release
+ * rpcrdma_mr_put - DMA unmap an MR and release it
+ * @mr: MR to release
  *
  */
-void
-rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
+void rpcrdma_mr_put(struct rpcrdma_mr *mr)
 {
        struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
 
@@ -1272,7 +1260,19 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
                                mr->mr_sg, mr->mr_nents, mr->mr_dir);
                mr->mr_dir = DMA_NONE;
        }
-       __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
+
+       rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
+}
+
+static void rpcrdma_mr_free(struct rpcrdma_mr *mr)
+{
+       struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+
+       mr->mr_req = NULL;
+       spin_lock(&buf->rb_lock);
+       rpcrdma_mr_push(mr, &buf->rb_mrs);
+       spin_unlock(&buf->rb_lock);
 }
 
 /**
@@ -1303,39 +1303,24 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
  */
 void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
 {
-       struct rpcrdma_rep *rep = req->rl_reply;
-
+       if (req->rl_reply)
+               rpcrdma_rep_put(buffers, req->rl_reply);
        req->rl_reply = NULL;
 
        spin_lock(&buffers->rb_lock);
        list_add(&req->rl_list, &buffers->rb_send_bufs);
-       if (rep) {
-               if (!rep->rr_temp) {
-                       list_add(&rep->rr_list, &buffers->rb_recv_bufs);
-                       rep = NULL;
-               }
-       }
        spin_unlock(&buffers->rb_lock);
-       if (rep)
-               rpcrdma_rep_destroy(rep);
 }
 
-/*
- * Put reply buffers back into pool when not attached to
- * request. This happens in error conditions.
+/**
+ * rpcrdma_recv_buffer_put - Release rpcrdma_rep back to free list
+ * @rep: rep to release
+ *
+ * Used after error conditions.
  */
-void
-rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
+void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
 {
-       struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
-
-       if (!rep->rr_temp) {
-               spin_lock(&buffers->rb_lock);
-               list_add(&rep->rr_list, &buffers->rb_recv_bufs);
-               spin_unlock(&buffers->rb_lock);
-       } else {
-               rpcrdma_rep_destroy(rep);
-       }
+       rpcrdma_rep_put(&rep->rr_rxprt->rx_buf, rep);
 }
 
 /* Returns a pointer to a rpcrdma_regbuf object, or NULL.
@@ -1483,7 +1468,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
        count = 0;
 
        needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
-       if (ep->rep_receive_count > needed)
+       if (likely(ep->rep_receive_count > needed))
                goto out;
        needed -= ep->rep_receive_count;
        if (!temp)
@@ -1491,22 +1476,10 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
 
        /* fast path: all needed reps can be found on the free list */
        wr = NULL;
-       spin_lock(&buf->rb_lock);
        while (needed) {
-               rep = list_first_entry_or_null(&buf->rb_recv_bufs,
-                                              struct rpcrdma_rep, rr_list);
+               rep = rpcrdma_rep_get_locked(buf);
                if (!rep)
-                       break;
-
-               list_del(&rep->rr_list);
-               rep->rr_recv_wr.next = wr;
-               wr = &rep->rr_recv_wr;
-               --needed;
-       }
-       spin_unlock(&buf->rb_lock);
-
-       while (needed) {
-               rep = rpcrdma_rep_create(r_xprt, temp);
+                       rep = rpcrdma_rep_create(r_xprt, temp);
                if (!rep)
                        break;
 
@@ -1523,7 +1496,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
                if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
                        goto release_wrs;
 
-               trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
+               trace_xprtrdma_post_recv(rep);
                ++count;
        }
 
index 92ce09f..65e6b0e 100644 (file)
@@ -47,6 +47,7 @@
 #include <linux/atomic.h>              /* atomic_t, etc */
 #include <linux/kref.h>                        /* struct kref */
 #include <linux/workqueue.h>           /* struct work_struct */
+#include <linux/llist.h>
 
 #include <rdma/rdma_cm.h>              /* RDMA connection api */
 #include <rdma/ib_verbs.h>             /* RDMA verbs api */
@@ -117,9 +118,6 @@ struct rpcrdma_ep {
 #endif
 
 /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
- *
- * The below structure appears at the front of a large region of kmalloc'd
- * memory, which always starts on a good alignment boundary.
  */
 
 struct rpcrdma_regbuf {
@@ -158,25 +156,22 @@ static inline void *rdmab_data(const struct rpcrdma_regbuf *rb)
 
 /* To ensure a transport can always make forward progress,
  * the number of RDMA segments allowed in header chunk lists
- * is capped at 8. This prevents less-capable devices and
- * memory registrations from overrunning the Send buffer
- * while building chunk lists.
+ * is capped at 16. This prevents less-capable devices from
+ * overrunning the Send buffer while building chunk lists.
  *
  * Elements of the Read list take up more room than the
- * Write list or Reply chunk. 8 read segments means the Read
- * list (or Write list or Reply chunk) cannot consume more
- * than
- *
- * ((8 + 2) * read segment size) + 1 XDR words, or 244 bytes.
+ * Write list or Reply chunk. 16 read segments means the
+ * chunk lists cannot consume more than
  *
- * And the fixed part of the header is another 24 bytes.
+ * ((16 + 2) * read segment size) + 1 XDR words,
  *
- * The smallest inline threshold is 1024 bytes, ensuring that
- * at least 750 bytes are available for RPC messages.
+ * or about 400 bytes. The fixed part of the header is
+ * another 24 bytes. Thus when the inline threshold is
+ * 1024 bytes, at least 600 bytes are available for RPC
+ * message bodies.
  */
 enum {
-       RPCRDMA_MAX_HDR_SEGS = 8,
-       RPCRDMA_HDRBUF_SIZE = 256,
+       RPCRDMA_MAX_HDR_SEGS = 16,
 };
 
 /*
@@ -206,7 +201,7 @@ struct rpcrdma_rep {
        struct rpc_rqst         *rr_rqst;
        struct xdr_buf          rr_hdrbuf;
        struct xdr_stream       rr_stream;
-       struct list_head        rr_list;
+       struct llist_node       rr_node;
        struct ib_recv_wr       rr_recv_wr;
 };
 
@@ -240,20 +235,20 @@ struct rpcrdma_sendctx {
  * An external memory region is any buffer or page that is registered
  * on the fly (ie, not pre-registered).
  */
-struct rpcrdma_req;
 struct rpcrdma_frwr {
        struct ib_mr                    *fr_mr;
        struct ib_cqe                   fr_cqe;
        struct completion               fr_linv_done;
-       struct rpcrdma_req              *fr_req;
        union {
                struct ib_reg_wr        fr_regwr;
                struct ib_send_wr       fr_invwr;
        };
 };
 
+struct rpcrdma_req;
 struct rpcrdma_mr {
        struct list_head        mr_list;
+       struct rpcrdma_req      *mr_req;
        struct scatterlist      *mr_sg;
        int                     mr_nents;
        enum dma_data_direction mr_dir;
@@ -331,7 +326,8 @@ struct rpcrdma_req {
        struct list_head        rl_all;
        struct kref             rl_kref;
 
-       struct list_head        rl_registered;  /* registered segments */
+       struct list_head        rl_free_mrs;
+       struct list_head        rl_registered;
        struct rpcrdma_mr_seg   rl_segments[RPCRDMA_MAX_SEGS];
 };
 
@@ -344,7 +340,7 @@ rpcr_to_rdmar(const struct rpc_rqst *rqst)
 static inline void
 rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list)
 {
-       list_add_tail(&mr->mr_list, list);
+       list_add(&mr->mr_list, list);
 }
 
 static inline struct rpcrdma_mr *
@@ -352,8 +348,9 @@ rpcrdma_mr_pop(struct list_head *list)
 {
        struct rpcrdma_mr *mr;
 
-       mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
-       list_del_init(&mr->mr_list);
+       mr = list_first_entry_or_null(list, struct rpcrdma_mr, mr_list);
+       if (mr)
+               list_del_init(&mr->mr_list);
        return mr;
 }
 
@@ -364,19 +361,19 @@ rpcrdma_mr_pop(struct list_head *list)
  * One of these is associated with a transport instance
  */
 struct rpcrdma_buffer {
-       spinlock_t              rb_mrlock;      /* protect rb_mrs list */
+       spinlock_t              rb_lock;
+       struct list_head        rb_send_bufs;
        struct list_head        rb_mrs;
-       struct list_head        rb_all;
 
        unsigned long           rb_sc_head;
        unsigned long           rb_sc_tail;
        unsigned long           rb_sc_last;
        struct rpcrdma_sendctx  **rb_sc_ctxs;
 
-       spinlock_t              rb_lock;        /* protect buf lists */
-       struct list_head        rb_send_bufs;
-       struct list_head        rb_recv_bufs;
        struct list_head        rb_allreqs;
+       struct list_head        rb_all_mrs;
+
+       struct llist_head       rb_free_reps;
 
        u32                     rb_max_requests;
        u32                     rb_credits;     /* most recent credit grant */
@@ -384,7 +381,7 @@ struct rpcrdma_buffer {
        u32                     rb_bc_srv_max_requests;
        u32                     rb_bc_max_requests;
 
-       struct delayed_work     rb_refresh_worker;
+       struct work_struct      rb_refresh_worker;
 };
 
 /*
@@ -490,7 +487,6 @@ struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt);
 
 struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
 void rpcrdma_mr_put(struct rpcrdma_mr *mr);
-void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
 
 static inline void
 rpcrdma_mr_recycle(struct rpcrdma_mr *mr)
@@ -546,6 +542,7 @@ rpcrdma_data_dir(bool writing)
 /* Memory registration calls xprtrdma/frwr_ops.c
  */
 bool frwr_is_supported(struct ib_device *device);
+void frwr_recycle(struct rpcrdma_req *req);
 void frwr_reset(struct rpcrdma_req *req);
 int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep);
 int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
@@ -554,7 +551,7 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
 struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
                                struct rpcrdma_mr_seg *seg,
                                int nsegs, bool writing, __be32 xid,
-                               struct rpcrdma_mr **mr);
+                               struct rpcrdma_mr *mr);
 int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req);
 void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
 void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
index e2176c1..70e52f5 100644 (file)
@@ -562,10 +562,14 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
                printk(KERN_WARNING "Callback slot table overflowed\n");
                return -ESHUTDOWN;
        }
+       if (transport->recv.copied && !req->rq_private_buf.len)
+               return -ESHUTDOWN;
 
        ret = xs_read_stream_request(transport, msg, flags, req);
        if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
                xprt_complete_bc_request(req, transport->recv.copied);
+       else
+               req->rq_private_buf.len = transport->recv.copied;
 
        return ret;
 }
@@ -587,7 +591,7 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
        /* Look up and lock the request corresponding to the given XID */
        spin_lock(&xprt->queue_lock);
        req = xprt_lookup_rqst(xprt, transport->recv.xid);
-       if (!req) {
+       if (!req || (transport->recv.copied && !req->rq_private_buf.len)) {
                msg->msg_flags |= MSG_TRUNC;
                goto out;
        }
@@ -599,6 +603,8 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
        spin_lock(&xprt->queue_lock);
        if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
                xprt_complete_rqst(req->rq_task, transport->recv.copied);
+       else
+               req->rq_private_buf.len = transport->recv.copied;
        xprt_unpin_rqst(req);
 out:
        spin_unlock(&xprt->queue_lock);
@@ -1243,19 +1249,21 @@ static void xs_error_report(struct sock *sk)
 {
        struct sock_xprt *transport;
        struct rpc_xprt *xprt;
-       int err;
 
        read_lock_bh(&sk->sk_callback_lock);
        if (!(xprt = xprt_from_sock(sk)))
                goto out;
 
        transport = container_of(xprt, struct sock_xprt, xprt);
-       err = -sk->sk_err;
-       if (err == 0)
+       transport->xprt_err = -sk->sk_err;
+       if (transport->xprt_err == 0)
                goto out;
        dprintk("RPC:       xs_error_report client %p, error=%d...\n",
-                       xprt, -err);
-       trace_rpc_socket_error(xprt, sk->sk_socket, err);
+                       xprt, -transport->xprt_err);
+       trace_rpc_socket_error(xprt, sk->sk_socket, transport->xprt_err);
+
+       /* barrier ensures xprt_err is set before XPRT_SOCK_WAKE_ERROR */
+       smp_mb__before_atomic();
        xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR);
  out:
        read_unlock_bh(&sk->sk_callback_lock);
@@ -2470,7 +2478,6 @@ static void xs_wake_write(struct sock_xprt *transport)
 static void xs_wake_error(struct sock_xprt *transport)
 {
        int sockerr;
-       int sockerr_len = sizeof(sockerr);
 
        if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
                return;
@@ -2479,9 +2486,7 @@ static void xs_wake_error(struct sock_xprt *transport)
                goto out;
        if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
                goto out;
-       if (kernel_getsockopt(transport->sock, SOL_SOCKET, SO_ERROR,
-                               (char *)&sockerr, &sockerr_len) != 0)
-               goto out;
+       sockerr = xchg(&transport->xprt_err, 0);
        if (sockerr < 0)
                xprt_wake_pending_tasks(&transport->xprt, sockerr);
 out:
index 6cc75ff..999eab5 100644 (file)
@@ -160,6 +160,7 @@ struct tipc_link {
        struct {
                u16 len;
                u16 limit;
+               struct sk_buff *target_bskb;
        } backlog[5];
        u16 snd_nxt;
        u16 window;
@@ -880,6 +881,7 @@ static void link_prepare_wakeup(struct tipc_link *l)
 void tipc_link_reset(struct tipc_link *l)
 {
        struct sk_buff_head list;
+       u32 imp;
 
        __skb_queue_head_init(&list);
 
@@ -901,11 +903,10 @@ void tipc_link_reset(struct tipc_link *l)
        __skb_queue_purge(&l->deferdq);
        __skb_queue_purge(&l->backlogq);
        __skb_queue_purge(&l->failover_deferdq);
-       l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
-       l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0;
-       l->backlog[TIPC_HIGH_IMPORTANCE].len = 0;
-       l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0;
-       l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0;
+       for (imp = 0; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) {
+               l->backlog[imp].len = 0;
+               l->backlog[imp].target_bskb = NULL;
+       }
        kfree_skb(l->reasm_buf);
        kfree_skb(l->reasm_tnlmsg);
        kfree_skb(l->failover_reasm_skb);
@@ -947,7 +948,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
        u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
        struct sk_buff_head *transmq = &l->transmq;
        struct sk_buff_head *backlogq = &l->backlogq;
-       struct sk_buff *skb, *_skb, *bskb;
+       struct sk_buff *skb, *_skb, **tskb;
        int pkt_cnt = skb_queue_len(list);
        int rc = 0;
 
@@ -999,19 +1000,21 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
                        seqno++;
                        continue;
                }
-               if (tipc_msg_bundle(skb_peek_tail(backlogq), hdr, mtu)) {
+               tskb = &l->backlog[imp].target_bskb;
+               if (tipc_msg_bundle(*tskb, hdr, mtu)) {
                        kfree_skb(__skb_dequeue(list));
                        l->stats.sent_bundled++;
                        continue;
                }
-               if (tipc_msg_make_bundle(&bskb, hdr, mtu, l->addr)) {
+               if (tipc_msg_make_bundle(tskb, hdr, mtu, l->addr)) {
                        kfree_skb(__skb_dequeue(list));
-                       __skb_queue_tail(backlogq, bskb);
-                       l->backlog[msg_importance(buf_msg(bskb))].len++;
+                       __skb_queue_tail(backlogq, *tskb);
+                       l->backlog[imp].len++;
                        l->stats.sent_bundled++;
                        l->stats.sent_bundles++;
                        continue;
                }
+               l->backlog[imp].target_bskb = NULL;
                l->backlog[imp].len += skb_queue_len(list);
                skb_queue_splice_tail_init(list, backlogq);
        }
@@ -1027,6 +1030,7 @@ static void tipc_link_advance_backlog(struct tipc_link *l,
        u16 seqno = l->snd_nxt;
        u16 ack = l->rcv_nxt - 1;
        u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
+       u32 imp;
 
        while (skb_queue_len(&l->transmq) < l->window) {
                skb = skb_peek(&l->backlogq);
@@ -1037,7 +1041,10 @@ static void tipc_link_advance_backlog(struct tipc_link *l,
                        break;
                __skb_dequeue(&l->backlogq);
                hdr = buf_msg(skb);
-               l->backlog[msg_importance(hdr)].len--;
+               imp = msg_importance(hdr);
+               l->backlog[imp].len--;
+               if (unlikely(skb == l->backlog[imp].target_bskb))
+                       l->backlog[imp].target_bskb = NULL;
                __skb_queue_tail(&l->transmq, skb);
                /* next retransmit attempt */
                if (link_is_bc_sndlink(l))
index e6d49cd..922d262 100644 (file)
@@ -543,10 +543,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb,  struct tipc_msg *msg,
        bmsg = buf_msg(_skb);
        tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0,
                      INT_H_SIZE, dnode);
-       if (msg_isdata(msg))
-               msg_set_importance(bmsg, TIPC_CRITICAL_IMPORTANCE);
-       else
-               msg_set_importance(bmsg, TIPC_SYSTEM_IMPORTANCE);
+       msg_set_importance(bmsg, msg_importance(msg));
        msg_set_seqno(bmsg, msg_seqno(msg));
        msg_set_ack(bmsg, msg_ack(msg));
        msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
index 3b9f8cc..4b92b19 100644 (file)
@@ -740,7 +740,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
                /* fall through */
        case TIPC_LISTEN:
        case TIPC_CONNECTING:
-               if (!skb_queue_empty(&sk->sk_receive_queue))
+               if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
                        revents |= EPOLLIN | EPOLLRDNORM;
                break;
        case TIPC_OPEN:
@@ -748,7 +748,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
                        revents |= EPOLLOUT;
                if (!tipc_sk_type_connectionless(sk))
                        break;
-               if (skb_queue_empty(&sk->sk_receive_queue))
+               if (skb_queue_empty_lockless(&sk->sk_receive_queue))
                        break;
                revents |= EPOLLIN | EPOLLRDNORM;
                break;
@@ -2119,13 +2119,13 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
        struct tipc_msg *hdr = buf_msg(skb);
 
        if (unlikely(msg_in_group(hdr)))
-               return sk->sk_rcvbuf;
+               return READ_ONCE(sk->sk_rcvbuf);
 
        if (unlikely(!msg_connected(hdr)))
-               return sk->sk_rcvbuf << msg_importance(hdr);
+               return READ_ONCE(sk->sk_rcvbuf) << msg_importance(hdr);
 
        if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
-               return sk->sk_rcvbuf;
+               return READ_ONCE(sk->sk_rcvbuf);
 
        return FLOWCTL_MSG_LIM;
 }
@@ -3790,7 +3790,7 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf)
        i += scnprintf(buf + i, sz - i, " %d", sk->sk_sndbuf);
        i += scnprintf(buf + i, sz - i, " | %d", sk_rmem_alloc_get(sk));
        i += scnprintf(buf + i, sz - i, " %d", sk->sk_rcvbuf);
-       i += scnprintf(buf + i, sz - i, " | %d\n", sk->sk_backlog.len);
+       i += scnprintf(buf + i, sz - i, " | %d\n", READ_ONCE(sk->sk_backlog.len));
 
        if (dqueues & TIPC_DUMP_SK_SNDQ) {
                i += scnprintf(buf + i, sz - i, "sk_write_queue: ");
index 67e87db..0d8da80 100644 (file)
@@ -2599,7 +2599,7 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
                mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
 
        /* readable? */
-       if (!skb_queue_empty(&sk->sk_receive_queue))
+       if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
                mask |= EPOLLIN | EPOLLRDNORM;
 
        /* Connection-based need to check for termination and startup */
@@ -2628,7 +2628,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
        mask = 0;
 
        /* exceptional events? */
-       if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+       if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
                mask |= EPOLLERR |
                        (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
 
@@ -2638,7 +2638,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
                mask |= EPOLLHUP;
 
        /* readable? */
-       if (!skb_queue_empty(&sk->sk_receive_queue))
+       if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
                mask |= EPOLLIN | EPOLLRDNORM;
 
        /* Connection-based need to check for termination and startup */
index ab47bf3..582a3e4 100644 (file)
@@ -638,7 +638,7 @@ struct sock *__vsock_create(struct net *net,
 }
 EXPORT_SYMBOL_GPL(__vsock_create);
 
-static void __vsock_release(struct sock *sk)
+static void __vsock_release(struct sock *sk, int level)
 {
        if (sk) {
                struct sk_buff *skb;
@@ -648,9 +648,17 @@ static void __vsock_release(struct sock *sk)
                vsk = vsock_sk(sk);
                pending = NULL; /* Compiler warning. */
 
+               /* The release call is supposed to use lock_sock_nested()
+                * rather than lock_sock(), if a sock lock should be acquired.
+                */
                transport->release(vsk);
 
-               lock_sock(sk);
+               /* When "level" is SINGLE_DEPTH_NESTING, use the nested
+                * version to avoid the warning "possible recursive locking
+                * detected". When "level" is 0, lock_sock_nested(sk, level)
+                * is the same as lock_sock(sk).
+                */
+               lock_sock_nested(sk, level);
                sock_orphan(sk);
                sk->sk_shutdown = SHUTDOWN_MASK;
 
@@ -659,7 +667,7 @@ static void __vsock_release(struct sock *sk)
 
                /* Clean up any sockets that never were accepted. */
                while ((pending = vsock_dequeue_accept(sk)) != NULL) {
-                       __vsock_release(pending);
+                       __vsock_release(pending, SINGLE_DEPTH_NESTING);
                        sock_put(pending);
                }
 
@@ -708,7 +716,7 @@ EXPORT_SYMBOL_GPL(vsock_stream_has_space);
 
 static int vsock_release(struct socket *sock)
 {
-       __vsock_release(sock->sk);
+       __vsock_release(sock->sk, 0);
        sock->sk = NULL;
        sock->state = SS_FREE;
 
@@ -862,7 +870,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
                 * the queue and write as long as the socket isn't shutdown for
                 * sending.
                 */
-               if (!skb_queue_empty(&sk->sk_receive_queue) ||
+               if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
                    (sk->sk_shutdown & RCV_SHUTDOWN)) {
                        mask |= EPOLLIN | EPOLLRDNORM;
                }
index 261521d..c443db7 100644 (file)
@@ -559,7 +559,7 @@ static void hvs_release(struct vsock_sock *vsk)
        struct sock *sk = sk_vsock(vsk);
        bool remove_sock;
 
-       lock_sock(sk);
+       lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
        remove_sock = hvs_close_lock_held(vsk);
        release_sock(sk);
        if (remove_sock)
index 5bb70c6..481f7f8 100644 (file)
@@ -204,10 +204,14 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
        return virtio_transport_get_ops()->send_pkt(pkt);
 }
 
-static void virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
+static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
                                        struct virtio_vsock_pkt *pkt)
 {
+       if (vvs->rx_bytes + pkt->len > vvs->buf_alloc)
+               return false;
+
        vvs->rx_bytes += pkt->len;
+       return true;
 }
 
 static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
@@ -458,6 +462,9 @@ void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
                vvs->buf_size_max = val;
        vvs->buf_size = val;
        vvs->buf_alloc = val;
+
+       virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM,
+                                           NULL);
 }
 EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size);
 
@@ -820,7 +827,7 @@ void virtio_transport_release(struct vsock_sock *vsk)
        struct sock *sk = &vsk->sk;
        bool remove_sock = true;
 
-       lock_sock(sk);
+       lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
        if (sk->sk_type == SOCK_STREAM)
                remove_sock = virtio_transport_close(vsk);
 
@@ -876,14 +883,18 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
                              struct virtio_vsock_pkt *pkt)
 {
        struct virtio_vsock_sock *vvs = vsk->trans;
-       bool free_pkt = false;
+       bool can_enqueue, free_pkt = false;
 
        pkt->len = le32_to_cpu(pkt->hdr.len);
        pkt->off = 0;
 
        spin_lock_bh(&vvs->rx_lock);
 
-       virtio_transport_inc_rx_pkt(vvs, pkt);
+       can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt);
+       if (!can_enqueue) {
+               free_pkt = true;
+               goto out;
+       }
 
        /* Try to copy small packets into the buffer of last packet queued,
         * to avoid wasting memory queueing the entire buffer with a small
index e851caf..fcac5c6 100644 (file)
@@ -204,6 +204,11 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
                return false;
        }
 
+       /* channel 14 is only for IEEE 802.11b */
+       if (chandef->center_freq1 == 2484 &&
+           chandef->width != NL80211_CHAN_WIDTH_20_NOHT)
+               return false;
+
        if (cfg80211_chandef_is_edmg(chandef) &&
            !cfg80211_edmg_chandef_valid(chandef))
                return false;
index d21b158..7b72286 100644 (file)
@@ -201,6 +201,38 @@ cfg80211_get_dev_from_info(struct net *netns, struct genl_info *info)
        return __cfg80211_rdev_from_attrs(netns, info->attrs);
 }
 
+static int validate_beacon_head(const struct nlattr *attr,
+                               struct netlink_ext_ack *extack)
+{
+       const u8 *data = nla_data(attr);
+       unsigned int len = nla_len(attr);
+       const struct element *elem;
+       const struct ieee80211_mgmt *mgmt = (void *)data;
+       unsigned int fixedlen = offsetof(struct ieee80211_mgmt,
+                                        u.beacon.variable);
+
+       if (len < fixedlen)
+               goto err;
+
+       if (ieee80211_hdrlen(mgmt->frame_control) !=
+           offsetof(struct ieee80211_mgmt, u.beacon))
+               goto err;
+
+       data += fixedlen;
+       len -= fixedlen;
+
+       for_each_element(elem, data, len) {
+               /* nothing */
+       }
+
+       if (for_each_element_completed(elem, data, len))
+               return 0;
+
+err:
+       NL_SET_ERR_MSG_ATTR(extack, attr, "malformed beacon head");
+       return -EINVAL;
+}
+
 static int validate_ie_attr(const struct nlattr *attr,
                            struct netlink_ext_ack *extack)
 {
@@ -338,8 +370,9 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 
        [NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 },
        [NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 },
-       [NL80211_ATTR_BEACON_HEAD] = { .type = NLA_BINARY,
-                                      .len = IEEE80211_MAX_DATA_LEN },
+       [NL80211_ATTR_BEACON_HEAD] =
+               NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_beacon_head,
+                                      IEEE80211_MAX_DATA_LEN),
        [NL80211_ATTR_BEACON_TAIL] =
                NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr,
                                       IEEE80211_MAX_DATA_LEN),
@@ -360,7 +393,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
        [NL80211_ATTR_MNTR_FLAGS] = { /* NLA_NESTED can't be empty */ },
        [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY,
                                   .len = IEEE80211_MAX_MESH_ID_LEN },
-       [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 },
+       [NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT,
 
        [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 },
        [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED },
@@ -2636,6 +2669,8 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
 
        control_freq = nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ]);
 
+       memset(chandef, 0, sizeof(*chandef));
+
        chandef->chan = ieee80211_get_channel(&rdev->wiphy, control_freq);
        chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
        chandef->center_freq1 = control_freq;
@@ -3176,7 +3211,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
 
        if (rdev->ops->get_channel) {
                int ret;
-               struct cfg80211_chan_def chandef;
+               struct cfg80211_chan_def chandef = {};
 
                ret = rdev_get_channel(rdev, wdev, &chandef);
                if (ret == 0) {
@@ -6270,6 +6305,9 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info)
        if (!rdev->ops->del_mpath)
                return -EOPNOTSUPP;
 
+       if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
+               return -EOPNOTSUPP;
+
        return rdev_del_mpath(rdev, dev, dst);
 }
 
@@ -13644,7 +13682,7 @@ static int nl80211_get_ftm_responder_stats(struct sk_buff *skb,
        hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
                             NL80211_CMD_GET_FTM_RESPONDER_STATS);
        if (!hdr)
-               return -ENOBUFS;
+               goto nla_put_failure;
 
        if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
                goto nla_put_failure;
index 5311d0a..446c76d 100644 (file)
@@ -2108,7 +2108,7 @@ static void reg_call_notifier(struct wiphy *wiphy,
 
 static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
 {
-       struct cfg80211_chan_def chandef;
+       struct cfg80211_chan_def chandef = {};
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
        enum nl80211_iftype iftype;
 
@@ -3883,6 +3883,7 @@ bool regulatory_pre_cac_allowed(struct wiphy *wiphy)
 
        return pre_cac_allowed;
 }
+EXPORT_SYMBOL(regulatory_pre_cac_allowed);
 
 void regulatory_propagate_dfs_state(struct wiphy *wiphy,
                                    struct cfg80211_chan_def *chandef,
index 504133d..dc8f689 100644 (file)
@@ -155,14 +155,6 @@ bool regulatory_indoor_allowed(void);
  */
 #define REG_PRE_CAC_EXPIRY_GRACE_MS 2000
 
-/**
- * regulatory_pre_cac_allowed - if pre-CAC allowed in the current dfs domain
- * @wiphy: wiphy for which pre-CAC capability is checked.
-
- * Pre-CAC is allowed only in ETSI domain.
- */
-bool regulatory_pre_cac_allowed(struct wiphy *wiphy);
-
 /**
  * regulatory_propagate_dfs_state - Propagate DFS channel state to other wiphys
  * @wiphy - wiphy on which radar is detected and the event will be propagated
index d313c9b..aef240f 100644 (file)
@@ -1703,8 +1703,7 @@ cfg80211_parse_mbssid_frame_data(struct wiphy *wiphy,
 static void
 cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
                                   struct cfg80211_bss *nontrans_bss,
-                                  struct ieee80211_mgmt *mgmt, size_t len,
-                                  gfp_t gfp)
+                                  struct ieee80211_mgmt *mgmt, size_t len)
 {
        u8 *ie, *new_ie, *pos;
        const u8 *nontrans_ssid, *trans_ssid, *mbssid;
@@ -1715,6 +1714,8 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
        const struct cfg80211_bss_ies *old;
        u8 cpy_len;
 
+       lockdep_assert_held(&wiphy_to_rdev(wiphy)->bss_lock);
+
        ie = mgmt->u.probe_resp.variable;
 
        new_ie_len = ielen;
@@ -1723,26 +1724,30 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
                return;
        new_ie_len -= trans_ssid[1];
        mbssid = cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen);
-       if (!mbssid)
+       /*
+        * It's not valid to have the MBSSID element before SSID
+        * ignore if that happens - the code below assumes it is
+        * after (while copying things inbetween).
+        */
+       if (!mbssid || mbssid < trans_ssid)
                return;
        new_ie_len -= mbssid[1];
-       rcu_read_lock();
+
        nontrans_ssid = ieee80211_bss_get_ie(nontrans_bss, WLAN_EID_SSID);
-       if (!nontrans_ssid) {
-               rcu_read_unlock();
+       if (!nontrans_ssid)
                return;
-       }
+
        new_ie_len += nontrans_ssid[1];
-       rcu_read_unlock();
 
        /* generate new ie for nontrans BSS
         * 1. replace SSID with nontrans BSS' SSID
         * 2. skip MBSSID IE
         */
-       new_ie = kzalloc(new_ie_len, gfp);
+       new_ie = kzalloc(new_ie_len, GFP_ATOMIC);
        if (!new_ie)
                return;
-       new_ies = kzalloc(sizeof(*new_ies) + new_ie_len, gfp);
+
+       new_ies = kzalloc(sizeof(*new_ies) + new_ie_len, GFP_ATOMIC);
        if (!new_ies)
                goto out_free;
 
@@ -1896,6 +1901,8 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
        cfg80211_parse_mbssid_frame_data(wiphy, data, mgmt, len,
                                         &non_tx_data, gfp);
 
+       spin_lock_bh(&wiphy_to_rdev(wiphy)->bss_lock);
+
        /* check if the res has other nontransmitting bss which is not
         * in MBSSID IE
         */
@@ -1910,8 +1917,9 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
                ies2 = rcu_access_pointer(tmp_bss->ies);
                if (ies2->tsf < ies1->tsf)
                        cfg80211_update_notlisted_nontrans(wiphy, tmp_bss,
-                                                          mgmt, len, gfp);
+                                                          mgmt, len);
        }
+       spin_unlock_bh(&wiphy_to_rdev(wiphy)->bss_lock);
 
        return res;
 }
index 419eb12..5b4ed5b 100644 (file)
@@ -1559,7 +1559,8 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
        }
 
        if (freq == 2484) {
-               if (chandef->width > NL80211_CHAN_WIDTH_40)
+               /* channel 14 is only for IEEE 802.11b */
+               if (chandef->width != NL80211_CHAN_WIDTH_20_NOHT)
                        return false;
 
                *op_class = 82; /* channel 14 */
index 7b6529d..cac9e28 100644 (file)
@@ -798,7 +798,7 @@ static int cfg80211_wext_giwfreq(struct net_device *dev,
 {
        struct wireless_dev *wdev = dev->ieee80211_ptr;
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
-       struct cfg80211_chan_def chandef;
+       struct cfg80211_chan_def chandef = {};
        int ret;
 
        switch (wdev->iftype) {
index c67d7a8..73fd0ea 100644 (file)
@@ -202,6 +202,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
                               struct iw_point *data, char *ssid)
 {
        struct wireless_dev *wdev = dev->ieee80211_ptr;
+       int ret = 0;
 
        /* call only for station! */
        if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
@@ -219,7 +220,10 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
                if (ie) {
                        data->flags = 1;
                        data->length = ie[1];
-                       memcpy(ssid, ie + 2, data->length);
+                       if (data->length > IW_ESSID_MAX_SIZE)
+                               ret = -EINVAL;
+                       else
+                               memcpy(ssid, ie + 2, data->length);
                }
                rcu_read_unlock();
        } else if (wdev->wext.connect.ssid && wdev->wext.connect.ssid_len) {
@@ -229,7 +233,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
        }
        wdev_unlock(wdev);
 
-       return 0;
+       return ret;
 }
 
 int cfg80211_mgd_wext_siwap(struct net_device *dev,
index 5c111bc..00e7823 100644 (file)
@@ -55,7 +55,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
                if (!sock_owned_by_user(sk)) {
                        queued = x25_process_rx_frame(sk, skb);
                } else {
-                       queued = !sk_add_backlog(sk, skb, sk->sk_rcvbuf);
+                       queued = !sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf));
                }
                bh_unlock_sock(sk);
                sock_put(sk);
index 947b8ff..3049af2 100644 (file)
@@ -27,6 +27,9 @@ void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
 {
        unsigned long flags;
 
+       if (!xs->tx)
+               return;
+
        spin_lock_irqsave(&umem->xsk_list_lock, flags);
        list_add_rcu(&xs->list, &umem->xsk_list);
        spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
@@ -36,6 +39,9 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
 {
        unsigned long flags;
 
+       if (!xs->tx)
+               return;
+
        spin_lock_irqsave(&umem->xsk_list_lock, flags);
        list_del_rcu(&xs->list);
        spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
@@ -206,14 +212,7 @@ static int xdp_umem_map_pages(struct xdp_umem *umem)
 
 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 {
-       unsigned int i;
-
-       for (i = 0; i < umem->npgs; i++) {
-               struct page *page = umem->pgs[i];
-
-               set_page_dirty_lock(page);
-               put_page(page);
-       }
+       put_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
 
        kfree(umem->pgs);
        umem->pgs = NULL;
@@ -383,8 +382,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
                        return -EINVAL;
        }
 
-       headroom = ALIGN(headroom, 64);
-
        size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
        if (size_chk < 0)
                return -EINVAL;
index c2f1af3..9044073 100644 (file)
@@ -305,9 +305,8 @@ out:
 }
 EXPORT_SYMBOL(xsk_umem_consume_tx);
 
-static int xsk_zc_xmit(struct sock *sk)
+static int xsk_zc_xmit(struct xdp_sock *xs)
 {
-       struct xdp_sock *xs = xdp_sk(sk);
        struct net_device *dev = xs->dev;
 
        return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
@@ -327,11 +326,10 @@ static void xsk_destruct_skb(struct sk_buff *skb)
        sock_wfree(skb);
 }
 
-static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
-                           size_t total_len)
+static int xsk_generic_xmit(struct sock *sk)
 {
-       u32 max_batch = TX_BATCH_SIZE;
        struct xdp_sock *xs = xdp_sk(sk);
+       u32 max_batch = TX_BATCH_SIZE;
        bool sent_frame = false;
        struct xdp_desc desc;
        struct sk_buff *skb;
@@ -394,6 +392,18 @@ out:
        return err;
 }
 
+static int __xsk_sendmsg(struct sock *sk)
+{
+       struct xdp_sock *xs = xdp_sk(sk);
+
+       if (unlikely(!(xs->dev->flags & IFF_UP)))
+               return -ENETDOWN;
+       if (unlikely(!xs->tx))
+               return -ENOBUFS;
+
+       return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk);
+}
+
 static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 {
        bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
@@ -402,21 +412,18 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 
        if (unlikely(!xsk_is_bound(xs)))
                return -ENXIO;
-       if (unlikely(!(xs->dev->flags & IFF_UP)))
-               return -ENETDOWN;
-       if (unlikely(!xs->tx))
-               return -ENOBUFS;
-       if (need_wait)
+       if (unlikely(need_wait))
                return -EOPNOTSUPP;
 
-       return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
+       return __xsk_sendmsg(sk);
 }
 
 static unsigned int xsk_poll(struct file *file, struct socket *sock,
                             struct poll_table_struct *wait)
 {
        unsigned int mask = datagram_poll(file, sock, wait);
-       struct xdp_sock *xs = xdp_sk(sock->sk);
+       struct sock *sk = sock->sk;
+       struct xdp_sock *xs = xdp_sk(sk);
        struct net_device *dev;
        struct xdp_umem *umem;
 
@@ -426,9 +433,14 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock,
        dev = xs->dev;
        umem = xs->umem;
 
-       if (umem->need_wakeup)
-               dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
-                                               umem->need_wakeup);
+       if (umem->need_wakeup) {
+               if (dev->netdev_ops->ndo_xsk_wakeup)
+                       dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
+                                                       umem->need_wakeup);
+               else
+                       /* Poll needs to drive Tx also in copy mode */
+                       __xsk_sendmsg(sk);
+       }
 
        if (xs->rx && !xskq_empty_desc(xs->rx))
                mask |= POLLIN | POLLRDNORM;
@@ -977,7 +989,7 @@ static int xsk_mmap(struct file *file, struct socket *sock,
        /* Matches the smp_wmb() in xsk_init_queue */
        smp_rmb();
        qpg = virt_to_head_page(q->ring);
-       if (size > (PAGE_SIZE << compound_order(qpg)))
+       if (size > page_size(qpg))
                return -EINVAL;
 
        pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
index 6088bc2..9b599ed 100644 (file)
@@ -706,7 +706,7 @@ resume:
        if (err)
                goto drop;
 
-       nf_reset(skb);
+       nf_reset_ct(skb);
 
        if (decaps) {
                sp = skb_sec_path(skb);
index 2ab4859..0f5131b 100644 (file)
@@ -185,7 +185,7 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
        skb->skb_iif = 0;
        skb->ignore_df = 0;
        skb_dst_drop(skb);
-       nf_reset(skb);
+       nf_reset_ct(skb);
        nf_reset_trace(skb);
 
        if (!xnet)
index 9499b35..b1db55b 100644 (file)
@@ -502,7 +502,7 @@ int xfrm_output_resume(struct sk_buff *skb, int err)
        struct net *net = xs_net(skb_dst(skb)->xfrm);
 
        while (likely((err = xfrm_output_one(skb, err)) == 0)) {
-               nf_reset(skb);
+               nf_reset_ct(skb);
 
                err = skb_dst(skb)->ops->local_out(net, skb->sk, skb);
                if (unlikely(err != 1))
index 21e9392..f2d1e57 100644 (file)
@@ -2808,7 +2808,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
                        continue;
                }
 
-               nf_reset(skb);
+               nf_reset_ct(skb);
                skb_dst_drop(skb);
                skb_dst_set(skb, dst);
 
index 7409722..7048bb3 100644 (file)
@@ -3,7 +3,8 @@
 #ifndef __ASM_GOTO_WORKAROUND_H
 #define __ASM_GOTO_WORKAROUND_H
 
-/* this will bring in asm_volatile_goto macro definition
+/*
+ * This will bring in asm_volatile_goto and asm_inline macro definitions
  * if enabled by compiler and config options.
  */
 #include <linux/types.h>
 #define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto")
 #endif
 
+/*
+ * asm_inline is defined as asm __inline in "include/linux/compiler_types.h"
+ * if supported by the kernel's CC (i.e CONFIG_CC_HAS_ASM_INLINE) which is not
+ * supported by CLANG.
+ */
+#ifdef asm_inline
+#undef asm_inline
+#define asm_inline asm
+#endif
+
 #define volatile(x...) volatile("")
 #endif
index e399380..4c31b30 100644 (file)
@@ -13,6 +13,7 @@
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <linux/perf_event.h>
 
 #include "libbpf.h"
 #include "bpf_load.h"
index 4b0432e..10ba926 100644 (file)
@@ -143,11 +143,6 @@ cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || e
 # Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y)
 ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3))
 
-# ar-option
-# Usage: KBUILD_ARFLAGS := $(call ar-option,D)
-# Important: no spaces around options
-ar-option = $(call try-run, $(AR) rc$(1) "$$TMP",$(1),$(2))
-
 # ld-version
 # Note this is mainly for HJ Lu's 3 number binutil versions
 ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh)
index c42891e..3e86b30 100644 (file)
@@ -17,7 +17,7 @@ hostprogs-$(CONFIG_VT)           += conmakehash
 hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount
 hostprogs-$(CONFIG_BUILDTIME_EXTABLE_SORT) += sortextable
 hostprogs-$(CONFIG_ASN1)        += asn1_compiler
-hostprogs-$(CONFIG_MODULE_SIG += sign-file
+hostprogs-$(CONFIG_MODULE_SIG_FORMAT) += sign-file
 hostprogs-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert
 hostprogs-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert
 
index f72aba6..a9e4795 100644 (file)
@@ -389,7 +389,7 @@ $(sort $(subdir-obj-y)): $(subdir-ym) ;
 ifdef builtin-target
 
 quiet_cmd_ar_builtin = AR      $@
-      cmd_ar_builtin = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(real-prereqs)
+      cmd_ar_builtin = rm -f $@; $(AR) cDPrST $@ $(real-prereqs)
 
 $(builtin-target): $(real-obj-y) FORCE
        $(call if_changed,ar_builtin)
index 4a0cdd6..179d55a 100644 (file)
@@ -232,7 +232,7 @@ quiet_cmd_ld = LD      $@
 # ---------------------------------------------------------------------------
 
 quiet_cmd_ar = AR      $@
-      cmd_ar = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(real-prereqs)
+      cmd_ar = rm -f $@; $(AR) cDPrsT $@ $(real-prereqs)
 
 # Objcopy
 # ---------------------------------------------------------------------------
index 93a7edf..6fcc66a 100755 (executable)
@@ -62,6 +62,8 @@ my $conststructsfile = "$D/const_structs.checkpatch";
 my $typedefsfile = "";
 my $color = "auto";
 my $allow_c99_comments = 1; # Can be overridden by --ignore C99_COMMENT_TOLERANCE
+# git output parsing needs US English output, so first set backtick child process LANGUAGE
+my $git_command ='export LANGUAGE=en_US.UTF-8; git';
 
 sub help {
        my ($exitcode) = @_;
@@ -904,7 +906,7 @@ sub seed_camelcase_includes {
        $camelcase_seeded = 1;
 
        if (-e ".git") {
-               my $git_last_include_commit = `git log --no-merges --pretty=format:"%h%n" -1 -- include`;
+               my $git_last_include_commit = `${git_command} log --no-merges --pretty=format:"%h%n" -1 -- include`;
                chomp $git_last_include_commit;
                $camelcase_cache = ".checkpatch-camelcase.git.$git_last_include_commit";
        } else {
@@ -932,7 +934,7 @@ sub seed_camelcase_includes {
        }
 
        if (-e ".git") {
-               $files = `git ls-files "include/*.h"`;
+               $files = `${git_command} ls-files "include/*.h"`;
                @include_files = split('\n', $files);
        }
 
@@ -956,13 +958,13 @@ sub git_commit_info {
 
        return ($id, $desc) if ((which("git") eq "") || !(-e ".git"));
 
-       my $output = `git log --no-color --format='%H %s' -1 $commit 2>&1`;
+       my $output = `${git_command} log --no-color --format='%H %s' -1 $commit 2>&1`;
        $output =~ s/^\s*//gm;
        my @lines = split("\n", $output);
 
        return ($id, $desc) if ($#lines < 0);
 
-       if ($lines[0] =~ /^error: short SHA1 $commit is ambiguous\./) {
+       if ($lines[0] =~ /^error: short SHA1 $commit is ambiguous/) {
 # Maybe one day convert this block of bash into something that returns
 # all matching commit ids, but it's very slow...
 #
@@ -1006,7 +1008,7 @@ if ($git) {
                } else {
                        $git_range = "-1 $commit_expr";
                }
-               my $lines = `git log --no-color --no-merges --pretty=format:'%H %s' $git_range`;
+               my $lines = `${git_command} log --no-color --no-merges --pretty=format:'%H %s' $git_range`;
                foreach my $line (split(/\n/, $lines)) {
                        $line =~ /^([0-9a-fA-F]{40,40}) (.*)$/;
                        next if (!defined($1) || !defined($2));
@@ -2725,8 +2727,10 @@ sub process {
                    ($line =~ /^\s*(?:WARNING:|BUG:)/ ||
                     $line =~ /^\s*\[\s*\d+\.\d{6,6}\s*\]/ ||
                                        # timestamp
-                    $line =~ /^\s*\[\<[0-9a-fA-F]{8,}\>\]/)) {
-                                       # stack dump address
+                    $line =~ /^\s*\[\<[0-9a-fA-F]{8,}\>\]/) ||
+                    $line =~ /^(?:\s+\w+:\s+[0-9a-fA-F]+){3,3}/ ||
+                    $line =~ /^\s*\#\d+\s*\[[0-9a-fA-F]+\]\s*\w+ at [0-9a-fA-F]+/) {
+                                       # stack dump address styles
                        $commit_log_possible_stack_dump = 1;
                }
 
@@ -2898,6 +2902,17 @@ sub process {
                        }
                }
 
+# check for invalid commit id
+               if ($in_commit_log && $line =~ /(^fixes:|\bcommit)\s+([0-9a-f]{6,40})\b/i) {
+                       my $id;
+                       my $description;
+                       ($id, $description) = git_commit_info($2, undef, undef);
+                       if (!defined($id)) {
+                               WARN("UNKNOWN_COMMIT_ID",
+                                    "Unknown commit id '$2', maybe rebased or not pulled?\n" . $herecurr);
+                       }
+               }
+
 # ignore non-hunk lines and lines being removed
                next if (!$hunk_line || $line =~ /^-/);
 
@@ -3069,21 +3084,21 @@ sub process {
 # check SPDX comment style for .[chsS] files
                                if ($realfile =~ /\.[chsS]$/ &&
                                    $rawline =~ /SPDX-License-Identifier:/ &&
-                                   $rawline !~ /^\+\s*\Q$comment\E\s*/) {
+                                   $rawline !~ m@^\+\s*\Q$comment\E\s*@) {
                                        WARN("SPDX_LICENSE_TAG",
                                             "Improper SPDX comment style for '$realfile', please use '$comment' instead\n" . $herecurr);
                                }
 
                                if ($comment !~ /^$/ &&
-                                   $rawline !~ /^\+\Q$comment\E SPDX-License-Identifier: /) {
-                                        WARN("SPDX_LICENSE_TAG",
-                                             "Missing or malformed SPDX-License-Identifier tag in line $checklicenseline\n" . $herecurr);
+                                   $rawline !~ m@^\+\Q$comment\E SPDX-License-Identifier: @) {
+                                       WARN("SPDX_LICENSE_TAG",
+                                            "Missing or malformed SPDX-License-Identifier tag in line $checklicenseline\n" . $herecurr);
                                } elsif ($rawline =~ /(SPDX-License-Identifier: .*)/) {
-                                        my $spdx_license = $1;
-                                        if (!is_SPDX_License_valid($spdx_license)) {
-                                                 WARN("SPDX_LICENSE_TAG",
-                                                      "'$spdx_license' is not supported in LICENSES/...\n" . $herecurr);
-                                        }
+                                       my $spdx_license = $1;
+                                       if (!is_SPDX_License_valid($spdx_license)) {
+                                               WARN("SPDX_LICENSE_TAG",
+                                                    "'$spdx_license' is not supported in LICENSES/...\n" . $herecurr);
+                                       }
                                }
                        }
                }
@@ -4660,7 +4675,7 @@ sub process {
 
 # closing brace should have a space following it when it has anything
 # on the line
-               if ($line =~ /}(?!(?:,|;|\)))\S/) {
+               if ($line =~ /}(?!(?:,|;|\)|\}))\S/) {
                        if (ERROR("SPACING",
                                  "space required after that close brace '}'\n" . $herecurr) &&
                            $fix) {
@@ -5191,7 +5206,7 @@ sub process {
                                next if ($arg =~ /\.\.\./);
                                next if ($arg =~ /^type$/i);
                                my $tmp_stmt = $define_stmt;
-                               $tmp_stmt =~ s/\b(typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g;
+                               $tmp_stmt =~ s/\b(sizeof|typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g;
                                $tmp_stmt =~ s/\#+\s*$arg\b//g;
                                $tmp_stmt =~ s/\b$arg\s*\#\#//g;
                                my $use_cnt = () = $tmp_stmt =~ /\b$arg\b/g;
@@ -5873,6 +5888,18 @@ sub process {
                             "__aligned(size) is preferred over __attribute__((aligned(size)))\n" . $herecurr);
                }
 
+# Check for __attribute__ section, prefer __section
+               if ($realfile !~ m@\binclude/uapi/@ &&
+                   $line =~ /\b__attribute__\s*\(\s*\(.*_*section_*\s*\(\s*("[^"]*")/) {
+                       my $old = substr($rawline, $-[1], $+[1] - $-[1]);
+                       my $new = substr($old, 1, -1);
+                       if (WARN("PREFER_SECTION",
+                                "__section($new) is preferred over __attribute__((section($old)))\n" . $herecurr) &&
+                           $fix) {
+                               $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*_*section_*\s*\(\s*\Q$old\E\s*\)\s*\)\s*\)/__section($new)/;
+                       }
+               }
+
 # Check for __attribute__ format(printf, prefer __printf
                if ($realfile !~ m@\binclude/uapi/@ &&
                    $line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf/) {
@@ -6480,6 +6507,12 @@ sub process {
                             "Using $1 should generally have parentheses around the comparison\n" . $herecurr);
                }
 
+# nested likely/unlikely calls
+               if ($line =~ /\b(?:(?:un)?likely)\s*\(\s*!?\s*(IS_ERR(?:_OR_NULL|_VALUE)?|WARN)/) {
+                       WARN("LIKELY_MISUSE",
+                            "nested (un)?likely() calls, $1 already uses unlikely() internally\n" . $herecurr);
+               }
+
 # whine mightly about in_atomic
                if ($line =~ /\bin_atomic\s*\(/) {
                        if ($realfile =~ m@^drivers/@) {
diff --git a/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci b/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci
deleted file mode 100644 (file)
index 56a2e26..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/// Use devm_platform_ioremap_resource helper which wraps
-/// platform_get_resource() and devm_ioremap_resource() together.
-///
-// Confidence: High
-// Copyright: (C) 2019 Himanshu Jha GPLv2.
-// Copyright: (C) 2019 Julia Lawall, Inria/LIP6. GPLv2.
-// Keywords: platform_get_resource, devm_ioremap_resource,
-// Keywords: devm_platform_ioremap_resource
-
-virtual patch
-virtual report
-
-@r depends on patch && !report@
-expression e1, e2, arg1, arg2, arg3;
-identifier id;
-@@
-
-(
-- id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
-|
-- struct resource *id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
-)
-  ... when != id
-- e1 = devm_ioremap_resource(arg3, id);
-+ e1 = devm_platform_ioremap_resource(arg1, arg2);
-  ... when != id
-? id = e2
-
-@r1 depends on patch && !report@
-identifier r.id;
-type T;
-@@
-
-- T *id;
-  ...when != id
-
-@r2 depends on report && !patch@
-identifier id;
-expression e1, e2, arg1, arg2, arg3;
-position j0;
-@@
-
-(
-  id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
-|
-  struct resource *id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
-)
-  ... when != id
-  e1@j0 = devm_ioremap_resource(arg3, id);
-  ... when != id
-? id = e2
-
-@script:python depends on report && !patch@
-e1 << r2.e1;
-j0 << r2.j0;
-@@
-
-msg = "WARNING: Use devm_platform_ioremap_resource for %s" % (e1)
-coccilib.report.print_report(j0[0], msg)
index c832bb6..99e93a6 100644 (file)
@@ -6,6 +6,8 @@
 /// add a missing namespace tag to a module source file.
 ///
 
+virtual report
+
 @has_ns_import@
 declarer name MODULE_IMPORT_NS;
 identifier virtual.ns;
index 6d2e09a..2fa7bb8 100644 (file)
@@ -16,6 +16,8 @@ import sys
 
 from linux import utils
 
+printk_log_type = utils.CachedType("struct printk_log")
+
 
 class LxDmesg(gdb.Command):
     """Print Linux kernel log buffer."""
@@ -42,9 +44,14 @@ class LxDmesg(gdb.Command):
             b = utils.read_memoryview(inf, log_buf_addr, log_next_idx)
             log_buf = a.tobytes() + b.tobytes()
 
+        length_offset = printk_log_type.get_type()['len'].bitpos // 8
+        text_len_offset = printk_log_type.get_type()['text_len'].bitpos // 8
+        time_stamp_offset = printk_log_type.get_type()['ts_nsec'].bitpos // 8
+        text_offset = printk_log_type.get_type().sizeof
+
         pos = 0
         while pos < log_buf.__len__():
-            length = utils.read_u16(log_buf[pos + 8:pos + 10])
+            length = utils.read_u16(log_buf, pos + length_offset)
             if length == 0:
                 if log_buf_2nd_half == -1:
                     gdb.write("Corrupted log buffer!\n")
@@ -52,10 +59,11 @@ class LxDmesg(gdb.Command):
                 pos = log_buf_2nd_half
                 continue
 
-            text_len = utils.read_u16(log_buf[pos + 10:pos + 12])
-            text = log_buf[pos + 16:pos + 16 + text_len].decode(
+            text_len = utils.read_u16(log_buf, pos + text_len_offset)
+            text_start = pos + text_offset
+            text = log_buf[text_start:text_start + text_len].decode(
                 encoding='utf8', errors='replace')
-            time_stamp = utils.read_u64(log_buf[pos:pos + 8])
+            time_stamp = utils.read_u64(log_buf, pos + time_stamp_offset)
 
             for line in text.splitlines():
                 msg = u"[{time:12.6f}] {line}\n".format(
index 2f5b95f..be984aa 100644 (file)
@@ -15,7 +15,7 @@ import gdb
 import os
 import re
 
-from linux import modules
+from linux import modules, utils
 
 
 if hasattr(gdb, 'Breakpoint'):
@@ -77,12 +77,12 @@ lx-symbols command."""
             gdb.write("scanning for modules in {0}\n".format(path))
             for root, dirs, files in os.walk(path):
                 for name in files:
-                    if name.endswith(".ko"):
+                    if name.endswith(".ko") or name.endswith(".ko.debug"):
                         self.module_files.append(root + "/" + name)
         self.module_files_updated = True
 
     def _get_module_file(self, module_name):
-        module_pattern = ".*/{0}\.ko$".format(
+        module_pattern = ".*/{0}\.ko(?:.debug)?$".format(
             module_name.replace("_", r"[_\-]"))
         for name in self.module_files:
             if re.match(module_pattern, name) and os.path.exists(name):
@@ -99,7 +99,8 @@ lx-symbols command."""
             attrs[n]['name'].string(): attrs[n]['address']
             for n in range(int(sect_attrs['nsections']))}
         args = []
-        for section_name in [".data", ".data..read_mostly", ".rodata", ".bss"]:
+        for section_name in [".data", ".data..read_mostly", ".rodata", ".bss",
+                             ".text", ".text.hot", ".text.unlikely"]:
             address = section_name_to_address.get(section_name)
             if address:
                 args.append(" -s {name} {addr}".format(
@@ -116,6 +117,12 @@ lx-symbols command."""
             module_file = self._get_module_file(module_name)
 
         if module_file:
+            if utils.is_target_arch('s390'):
+                # Module text is preceded by PLT stubs on s390.
+                module_arch = module['arch']
+                plt_offset = int(module_arch['plt_offset'])
+                plt_size = int(module_arch['plt_size'])
+                module_addr = hex(int(module_addr, 0) + plt_offset + plt_size)
             gdb.write("loading @{addr}: {filename}\n".format(
                 addr=module_addr, filename=module_file))
             cmdline = "add-symbol-file {filename} {addr}{sections}".format(
index bc67126..ea94221 100644 (file)
@@ -92,15 +92,16 @@ def read_memoryview(inf, start, length):
     return memoryview(inf.read_memory(start, length))
 
 
-def read_u16(buffer):
+def read_u16(buffer, offset):
+    buffer_val = buffer[offset:offset + 2]
     value = [0, 0]
 
-    if type(buffer[0]) is str:
-        value[0] = ord(buffer[0])
-        value[1] = ord(buffer[1])
+    if type(buffer_val[0]) is str:
+        value[0] = ord(buffer_val[0])
+        value[1] = ord(buffer_val[1])
     else:
-        value[0] = buffer[0]
-        value[1] = buffer[1]
+        value[0] = buffer_val[0]
+        value[1] = buffer_val[1]
 
     if get_target_endianness() == LITTLE_ENDIAN:
         return value[0] + (value[1] << 8)
@@ -108,18 +109,18 @@ def read_u16(buffer):
         return value[1] + (value[0] << 8)
 
 
-def read_u32(buffer):
+def read_u32(buffer, offset):
     if get_target_endianness() == LITTLE_ENDIAN:
-        return read_u16(buffer[0:2]) + (read_u16(buffer[2:4]) << 16)
+        return read_u16(buffer, offset) + (read_u16(buffer, offset + 2) << 16)
     else:
-        return read_u16(buffer[2:4]) + (read_u16(buffer[0:2]) << 16)
+        return read_u16(buffer, offset + 2) + (read_u16(buffer, offset) << 16)
 
 
-def read_u64(buffer):
+def read_u64(buffer, offset):
     if get_target_endianness() == LITTLE_ENDIAN:
-        return read_u32(buffer[0:4]) + (read_u32(buffer[4:8]) << 32)
+        return read_u32(buffer, offset) + (read_u32(buffer, offset + 4) << 32)
     else:
-        return read_u32(buffer[4:8]) + (read_u32(buffer[0:4]) << 32)
+        return read_u32(buffer, offset + 4) + (read_u32(buffer, offset) << 32)
 
 
 target_arch = None
index 3961941..d2a30a7 100644 (file)
@@ -166,7 +166,7 @@ struct symbol {
        struct module *module;
        unsigned int crc;
        int crc_valid;
-       const char *namespace;
+       char *namespace;
        unsigned int weak:1;
        unsigned int vmlinux:1;    /* 1 if symbol is defined in vmlinux */
        unsigned int kernel:1;     /* 1 if symbol is from kernel
@@ -348,34 +348,43 @@ static enum export export_from_sec(struct elf_info *elf, unsigned int sec)
                return export_unknown;
 }
 
-static const char *sym_extract_namespace(const char **symname)
+static const char *namespace_from_kstrtabns(struct elf_info *info,
+                                           Elf_Sym *kstrtabns)
+{
+       char *value = info->ksymtab_strings + kstrtabns->st_value;
+       return value[0] ? value : NULL;
+}
+
+static void sym_update_namespace(const char *symname, const char *namespace)
 {
-       size_t n;
-       char *dupsymname;
+       struct symbol *s = find_symbol(symname);
 
-       n = strcspn(*symname, ".");
-       if (n < strlen(*symname) - 1) {
-               dupsymname = NOFAIL(strdup(*symname));
-               dupsymname[n] = '\0';
-               *symname = dupsymname;
-               return dupsymname + n + 1;
+       /*
+        * That symbol should have been created earlier and thus this is
+        * actually an assertion.
+        */
+       if (!s) {
+               merror("Could not update namespace(%s) for symbol %s\n",
+                      namespace, symname);
+               return;
        }
 
-       return NULL;
+       free(s->namespace);
+       s->namespace =
+               namespace && namespace[0] ? NOFAIL(strdup(namespace)) : NULL;
 }
 
 /**
  * Add an exported symbol - it may have already been added without a
  * CRC, in this case just update the CRC
  **/
-static struct symbol *sym_add_exported(const char *name, const char *namespace,
-                                      struct module *mod, enum export export)
+static struct symbol *sym_add_exported(const char *name, struct module *mod,
+                                      enum export export)
 {
        struct symbol *s = find_symbol(name);
 
        if (!s) {
                s = new_symbol(name, mod, export);
-               s->namespace = namespace;
        } else {
                if (!s->preloaded) {
                        warn("%s: '%s' exported twice. Previous export was in %s%s\n",
@@ -584,6 +593,10 @@ static int parse_elf(struct elf_info *info, const char *filename)
                        info->export_unused_gpl_sec = i;
                else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
                        info->export_gpl_future_sec = i;
+               else if (strcmp(secname, "__ksymtab_strings") == 0)
+                       info->ksymtab_strings = (void *)hdr +
+                                               sechdrs[i].sh_offset -
+                                               sechdrs[i].sh_addr;
 
                if (sechdrs[i].sh_type == SHT_SYMTAB) {
                        unsigned int sh_link_idx;
@@ -672,7 +685,7 @@ static void handle_modversions(struct module *mod, struct elf_info *info,
        unsigned int crc;
        enum export export;
        bool is_crc = false;
-       const char *name, *namespace;
+       const char *name;
 
        if ((!is_vmlinux(mod->name) || mod->is_dot_o) &&
            strstarts(symname, "__ksymtab"))
@@ -745,8 +758,7 @@ static void handle_modversions(struct module *mod, struct elf_info *info,
                /* All exported symbols */
                if (strstarts(symname, "__ksymtab_")) {
                        name = symname + strlen("__ksymtab_");
-                       namespace = sym_extract_namespace(&name);
-                       sym_add_exported(name, namespace, mod, export);
+                       sym_add_exported(name, mod, export);
                }
                if (strcmp(symname, "init_module") == 0)
                        mod->has_init = 1;
@@ -2042,6 +2054,16 @@ static void read_symbols(const char *modname)
                handle_moddevtable(mod, &info, sym, symname);
        }
 
+       /* Apply symbol namespaces from __kstrtabns_<symbol> entries. */
+       for (sym = info.symtab_start; sym < info.symtab_stop; sym++) {
+               symname = remove_dot(info.strtab + sym->st_name);
+
+               if (strstarts(symname, "__kstrtabns_"))
+                       sym_update_namespace(symname + strlen("__kstrtabns_"),
+                                            namespace_from_kstrtabns(&info,
+                                                                     sym));
+       }
+
        // check for static EXPORT_SYMBOL_* functions && global vars
        for (sym = info.symtab_start; sym < info.symtab_stop; sym++) {
                unsigned char bind = ELF_ST_BIND(sym->st_info);
@@ -2453,12 +2475,12 @@ static void read_dump(const char *fname, unsigned int kernel)
                        mod = new_module(modname);
                        mod->skip = 1;
                }
-               s = sym_add_exported(symname, namespace, mod,
-                                    export_no(export));
+               s = sym_add_exported(symname, mod, export_no(export));
                s->kernel    = kernel;
                s->preloaded = 1;
                s->is_static = 0;
                sym_update_crc(symname, mod, crc, export_no(export));
+               sym_update_namespace(symname, namespace);
        }
        release_file(file, size);
        return;
@@ -2652,15 +2674,20 @@ int main(int argc, char **argv)
                fatal("modpost: Section mismatches detected.\n"
                      "Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them.\n");
        for (n = 0; n < SYMBOL_HASH_SIZE; n++) {
-               struct symbol *s = symbolhash[n];
+               struct symbol *s;
+
+               for (s = symbolhash[n]; s; s = s->next) {
+                       /*
+                        * Do not check "vmlinux". This avoids the same warnings
+                        * shown twice, and false-positives for ARCH=um.
+                        */
+                       if (is_vmlinux(s->module->name) && !s->module->is_dot_o)
+                               continue;
 
-               while (s) {
                        if (s->is_static)
                                warn("\"%s\" [%s] is a static %s\n",
                                     s->name, s->module->name,
                                     export_str(s->export));
-
-                       s = s->next;
                }
        }
 
index 92a926d..ad271bc 100644 (file)
@@ -143,6 +143,7 @@ struct elf_info {
        Elf_Section  export_gpl_sec;
        Elf_Section  export_unused_gpl_sec;
        Elf_Section  export_gpl_future_sec;
+       char         *ksymtab_strings;
        char         *strtab;
        char         *modinfo;
        unsigned int modinfo_len;
index 6135574..1da7bca 100755 (executable)
 use warnings;
 use strict;
 use File::Find;
+use File::Spec;
 
 my $nm = ($ENV{'NM'} || "nm") . " -p";
 my $objdump = ($ENV{'OBJDUMP'} || "objdump") . " -s -j .comment";
-my $srctree = "";
-my $objtree = "";
-$srctree = "$ENV{'srctree'}/" if (exists($ENV{'srctree'}));
-$objtree = "$ENV{'objtree'}/" if (exists($ENV{'objtree'}));
+my $srctree = File::Spec->curdir();
+my $objtree = File::Spec->curdir();
+$srctree = File::Spec->rel2abs($ENV{'srctree'}) if (exists($ENV{'srctree'}));
+$objtree = File::Spec->rel2abs($ENV{'objtree'}) if (exists($ENV{'objtree'}));
 
 if ($#ARGV != -1) {
        print STDERR "usage: $0 takes no parameters\n";
@@ -231,9 +232,9 @@ sub do_nm
        }
        ($source = $basename) =~ s/\.o$//;
        if (-e "$source.c" || -e "$source.S") {
-               $source = "$objtree$File::Find::dir/$source";
+               $source = File::Spec->catfile($objtree, $File::Find::dir, $source)
        } else {
-               $source = "$srctree$File::Find::dir/$source";
+               $source = File::Spec->catfile($srctree, $File::Find::dir, $source)
        }
        if (! -e "$source.c" && ! -e "$source.S") {
                # No obvious source, exclude the object if it is conglomerate
index ac2b603..dda6fba 100644 (file)
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 # Linux kernel symbol namespace import generator
 #
@@ -33,7 +33,7 @@ generate_deps() {
        if [ ! -f "$ns_deps_file" ]; then return; fi
        local mod_source_files=`cat $mod_file | sed -n 1p                      \
                                              | sed -e 's/\.o/\.c/g'           \
-                                             | sed "s/[^ ]* */${srctree}\/&/g"`
+                                             | sed "s|[^ ]* *|${srctree}/&|g"`
        for ns in `cat $ns_deps_file`; do
                echo "Adding namespace $ns to module $mod_name (if needed)."
                generate_deps_for_ns $ns $mod_source_files
@@ -41,7 +41,7 @@ generate_deps() {
                for source_file in $mod_source_files; do
                        sed '/MODULE_IMPORT_NS/Q' $source_file > ${source_file}.tmp
                        offset=$(wc -l ${source_file}.tmp | awk '{print $1;}')
-                       cat $source_file | grep MODULE_IMPORT_NS | sort -u >> ${source_file}.tmp
+                       cat $source_file | grep MODULE_IMPORT_NS | LANG=C sort -u >> ${source_file}.tmp
                        tail -n +$((offset +1)) ${source_file} | grep -v MODULE_IMPORT_NS >> ${source_file}.tmp
                        if ! diff -q ${source_file} ${source_file}.tmp; then
                                mv ${source_file}.tmp ${source_file}
index 8f0a278..74eab03 100644 (file)
@@ -389,11 +389,8 @@ static int nop_mcount(Elf_Shdr const *const relhdr,
                        mcountsym = get_mcountsym(sym0, relp, str0);
 
                if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) {
-                       if (make_nop) {
+                       if (make_nop)
                                ret = make_nop((void *)ehdr, _w(shdr->sh_offset) + _w(relp->r_offset));
-                               if (ret < 0)
-                                       return -1;
-                       }
                        if (warn_on_notrace_sect && !once) {
                                printf("Section %s has mcount callers being ignored\n",
                                       txtname);
index 365b3c2..a2998b1 100755 (executable)
@@ -93,7 +93,7 @@ scm_version()
        # Check for mercurial and a mercurial repo.
        if test -d .hg && hgid=`hg id 2>/dev/null`; then
                # Do we have an tagged version?  If so, latesttagdistance == 1
-               if [ "`hg log -r . --template '{latesttagdistance}'`" == "1" ]; then
+               if [ "`hg log -r . --template '{latesttagdistance}'`" = "1" ]; then
                        id=`hg log -r . --template '{latesttag}'`
                        printf '%s%s' -hg "$id"
                else
@@ -126,7 +126,7 @@ scm_version()
 
 collect_files()
 {
-       local file res
+       local file res=
 
        for file; do
                case "$file" in
index 0d65594..2a1a2d3 100644 (file)
@@ -237,6 +237,7 @@ source "security/apparmor/Kconfig"
 source "security/loadpin/Kconfig"
 source "security/yama/Kconfig"
 source "security/safesetid/Kconfig"
+source "security/lockdown/Kconfig"
 
 source "security/integrity/Kconfig"
 
@@ -276,11 +277,11 @@ endchoice
 
 config LSM
        string "Ordered list of enabled LSMs"
-       default "yama,loadpin,safesetid,integrity,smack,selinux,tomoyo,apparmor" if DEFAULT_SECURITY_SMACK
-       default "yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo" if DEFAULT_SECURITY_APPARMOR
-       default "yama,loadpin,safesetid,integrity,tomoyo" if DEFAULT_SECURITY_TOMOYO
-       default "yama,loadpin,safesetid,integrity" if DEFAULT_SECURITY_DAC
-       default "yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
+       default "lockdown,yama,loadpin,safesetid,integrity,smack,selinux,tomoyo,apparmor" if DEFAULT_SECURITY_SMACK
+       default "lockdown,yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo" if DEFAULT_SECURITY_APPARMOR
+       default "lockdown,yama,loadpin,safesetid,integrity,tomoyo" if DEFAULT_SECURITY_TOMOYO
+       default "lockdown,yama,loadpin,safesetid,integrity" if DEFAULT_SECURITY_DAC
+       default "lockdown,yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
        help
          A comma-separated list of LSMs, in initialization order.
          Any LSMs left off this list will be ignored. This can be
index c598b90..be1dd9d 100644 (file)
@@ -11,6 +11,7 @@ subdir-$(CONFIG_SECURITY_APPARMOR)    += apparmor
 subdir-$(CONFIG_SECURITY_YAMA)         += yama
 subdir-$(CONFIG_SECURITY_LOADPIN)      += loadpin
 subdir-$(CONFIG_SECURITY_SAFESETID)    += safesetid
+subdir-$(CONFIG_SECURITY_LOCKDOWN_LSM) += lockdown
 
 # always enable default capabilities
 obj-y                                  += commoncap.o
@@ -27,6 +28,7 @@ obj-$(CONFIG_SECURITY_APPARMOR)               += apparmor/
 obj-$(CONFIG_SECURITY_YAMA)            += yama/
 obj-$(CONFIG_SECURITY_LOADPIN)         += loadpin/
 obj-$(CONFIG_SECURITY_SAFESETID)       += safesetid/
+obj-$(CONFIG_SECURITY_LOCKDOWN_LSM)    += lockdown/
 obj-$(CONFIG_CGROUP_DEVICE)            += device_cgroup.o
 
 # Object integrity file lists
index c352532..0bae6ad 100644 (file)
@@ -18,8 +18,8 @@ if INTEGRITY
 
 config INTEGRITY_SIGNATURE
        bool "Digital signature verification using multiple keyrings"
-       depends on KEYS
        default n
+       select KEYS
        select SIGNATURE
        help
          This option enables digital signature verification support
index 19faace..35e6ca7 100644 (file)
@@ -13,9 +13,6 @@ integrity-$(CONFIG_INTEGRITY_PLATFORM_KEYRING) += platform_certs/platform_keyrin
 integrity-$(CONFIG_LOAD_UEFI_KEYS) += platform_certs/efi_parser.o \
                                        platform_certs/load_uefi.o
 integrity-$(CONFIG_LOAD_IPL_KEYS) += platform_certs/load_ipl_s390.o
-$(obj)/load_uefi.o: KBUILD_CFLAGS += -fshort-wchar
 
-subdir-$(CONFIG_IMA)                   += ima
 obj-$(CONFIG_IMA)                      += ima/
-subdir-$(CONFIG_EVM)                   += evm
 obj-$(CONFIG_EVM)                      += evm/
index 868ade3..ea1aae3 100644 (file)
@@ -39,11 +39,10 @@ static const char * const keyring_name[INTEGRITY_KEYRING_MAX] = {
 #define restrict_link_to_ima restrict_link_by_builtin_trusted
 #endif
 
-int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
-                           const char *digest, int digestlen)
+static struct key *integrity_keyring_from_id(const unsigned int id)
 {
-       if (id >= INTEGRITY_KEYRING_MAX || siglen < 2)
-               return -EINVAL;
+       if (id >= INTEGRITY_KEYRING_MAX)
+               return ERR_PTR(-EINVAL);
 
        if (!keyring[id]) {
                keyring[id] =
@@ -52,23 +51,49 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
                        int err = PTR_ERR(keyring[id]);
                        pr_err("no %s keyring: %d\n", keyring_name[id], err);
                        keyring[id] = NULL;
-                       return err;
+                       return ERR_PTR(err);
                }
        }
 
+       return keyring[id];
+}
+
+int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
+                           const char *digest, int digestlen)
+{
+       struct key *keyring;
+
+       if (siglen < 2)
+               return -EINVAL;
+
+       keyring = integrity_keyring_from_id(id);
+       if (IS_ERR(keyring))
+               return PTR_ERR(keyring);
+
        switch (sig[1]) {
        case 1:
                /* v1 API expect signature without xattr type */
-               return digsig_verify(keyring[id], sig + 1, siglen - 1,
-                                    digest, digestlen);
+               return digsig_verify(keyring, sig + 1, siglen - 1, digest,
+                                    digestlen);
        case 2:
-               return asymmetric_verify(keyring[id], sig, siglen,
-                                        digest, digestlen);
+               return asymmetric_verify(keyring, sig, siglen, digest,
+                                        digestlen);
        }
 
        return -EOPNOTSUPP;
 }
 
+int integrity_modsig_verify(const unsigned int id, const struct modsig *modsig)
+{
+       struct key *keyring;
+
+       keyring = integrity_keyring_from_id(id);
+       if (IS_ERR(keyring))
+               return PTR_ERR(keyring);
+
+       return ima_modsig_verify(keyring, modsig);
+}
+
 static int __init __integrity_init_keyring(const unsigned int id,
                                           key_perm_t perm,
                                           struct key_restriction *restriction)
index 2ced99d..838476d 100644 (file)
@@ -160,7 +160,7 @@ config IMA_APPRAISE
 
 config IMA_ARCH_POLICY
         bool "Enable loading an IMA architecture specific policy"
-        depends on (KEXEC_VERIFY_SIG && IMA) || IMA_APPRAISE \
+        depends on (KEXEC_SIG && IMA) || IMA_APPRAISE \
                   && INTEGRITY_ASYMMETRIC_KEYS
         default n
         help
@@ -233,6 +233,19 @@ config IMA_APPRAISE_BOOTPARAM
          This option enables the different "ima_appraise=" modes
          (eg. fix, log) from the boot command line.
 
+config IMA_APPRAISE_MODSIG
+       bool "Support module-style signatures for appraisal"
+       depends on IMA_APPRAISE
+       depends on INTEGRITY_ASYMMETRIC_KEYS
+       select PKCS7_MESSAGE_PARSER
+       select MODULE_SIG_FORMAT
+       default n
+       help
+          Adds support for signatures appended to files. The format of the
+          appended signature is the same used for signed kernel modules.
+          The modsig keyword can be used in the IMA policy to allow a hook
+          to accept such signatures.
+
 config IMA_TRUSTED_KEYRING
        bool "Require all keys on the .ima keyring be signed (deprecated)"
        depends on IMA_APPRAISE && SYSTEM_TRUSTED_KEYRING
index d921dc4..31d57cd 100644 (file)
@@ -9,5 +9,6 @@ obj-$(CONFIG_IMA) += ima.o
 ima-y := ima_fs.o ima_queue.o ima_init.o ima_main.o ima_crypto.o ima_api.o \
         ima_policy.o ima_template.o ima_template_lib.o
 ima-$(CONFIG_IMA_APPRAISE) += ima_appraise.o
+ima-$(CONFIG_IMA_APPRAISE_MODSIG) += ima_modsig.o
 ima-$(CONFIG_HAVE_IMA_KEXEC) += ima_kexec.o
 obj-$(CONFIG_IMA_BLACKLIST_KEYRING) += ima_mok.o
index 011b91c..3689081 100644 (file)
@@ -60,6 +60,7 @@ struct ima_event_data {
        const unsigned char *filename;
        struct evm_ima_xattr_data *xattr_value;
        int xattr_len;
+       const struct modsig *modsig;
        const char *violation;
        const void *buf;
        int buf_len;
@@ -113,6 +114,8 @@ struct ima_kexec_hdr {
        u64 count;
 };
 
+extern const int read_idmap[];
+
 #ifdef CONFIG_HAVE_IMA_KEXEC
 void ima_load_kexec_buffer(void);
 #else
@@ -149,6 +152,7 @@ int template_desc_init_fields(const char *template_fmt,
                              int *num_fields);
 struct ima_template_desc *ima_template_desc_current(void);
 struct ima_template_desc *lookup_template_desc(const char *name);
+bool ima_template_has_modsig(const struct ima_template_desc *ima_template);
 int ima_restore_measurement_entry(struct ima_template_entry *entry);
 int ima_restore_measurement_list(loff_t bufsize, void *buf);
 int ima_measurements_show(struct seq_file *m, void *v);
@@ -196,6 +200,10 @@ enum ima_hooks {
        __ima_hooks(__ima_hook_enumify)
 };
 
+extern const char *const func_tokens[];
+
+struct modsig;
+
 /* LIM API function definitions */
 int ima_get_action(struct inode *inode, const struct cred *cred, u32 secid,
                   int mask, enum ima_hooks func, int *pcr,
@@ -203,11 +211,11 @@ int ima_get_action(struct inode *inode, const struct cred *cred, u32 secid,
 int ima_must_measure(struct inode *inode, int mask, enum ima_hooks func);
 int ima_collect_measurement(struct integrity_iint_cache *iint,
                            struct file *file, void *buf, loff_t size,
-                           enum hash_algo algo);
+                           enum hash_algo algo, struct modsig *modsig);
 void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file,
                           const unsigned char *filename,
                           struct evm_ima_xattr_data *xattr_value,
-                          int xattr_len, int pcr,
+                          int xattr_len, const struct modsig *modsig, int pcr,
                           struct ima_template_desc *template_desc);
 void ima_audit_measurement(struct integrity_iint_cache *iint,
                           const unsigned char *filename);
@@ -249,7 +257,7 @@ int ima_appraise_measurement(enum ima_hooks func,
                             struct integrity_iint_cache *iint,
                             struct file *file, const unsigned char *filename,
                             struct evm_ima_xattr_data *xattr_value,
-                            int xattr_len);
+                            int xattr_len, const struct modsig *modsig);
 int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func);
 void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file);
 enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
@@ -265,7 +273,8 @@ static inline int ima_appraise_measurement(enum ima_hooks func,
                                           struct file *file,
                                           const unsigned char *filename,
                                           struct evm_ima_xattr_data *xattr_value,
-                                          int xattr_len)
+                                          int xattr_len,
+                                          const struct modsig *modsig)
 {
        return INTEGRITY_UNKNOWN;
 }
@@ -302,6 +311,51 @@ static inline int ima_read_xattr(struct dentry *dentry,
 
 #endif /* CONFIG_IMA_APPRAISE */
 
+#ifdef CONFIG_IMA_APPRAISE_MODSIG
+bool ima_hook_supports_modsig(enum ima_hooks func);
+int ima_read_modsig(enum ima_hooks func, const void *buf, loff_t buf_len,
+                   struct modsig **modsig);
+void ima_collect_modsig(struct modsig *modsig, const void *buf, loff_t size);
+int ima_get_modsig_digest(const struct modsig *modsig, enum hash_algo *algo,
+                         const u8 **digest, u32 *digest_size);
+int ima_get_raw_modsig(const struct modsig *modsig, const void **data,
+                      u32 *data_len);
+void ima_free_modsig(struct modsig *modsig);
+#else
+static inline bool ima_hook_supports_modsig(enum ima_hooks func)
+{
+       return false;
+}
+
+static inline int ima_read_modsig(enum ima_hooks func, const void *buf,
+                                 loff_t buf_len, struct modsig **modsig)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void ima_collect_modsig(struct modsig *modsig, const void *buf,
+                                     loff_t size)
+{
+}
+
+static inline int ima_get_modsig_digest(const struct modsig *modsig,
+                                       enum hash_algo *algo, const u8 **digest,
+                                       u32 *digest_size)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int ima_get_raw_modsig(const struct modsig *modsig,
+                                    const void **data, u32 *data_len)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void ima_free_modsig(struct modsig *modsig)
+{
+}
+#endif /* CONFIG_IMA_APPRAISE_MODSIG */
+
 /* LSM based policy rules require audit */
 #ifdef CONFIG_IMA_LSM_RULES
 
index f614e22..610759f 100644 (file)
@@ -45,8 +45,8 @@ int ima_alloc_init_template(struct ima_event_data *event_data,
        else
                template_desc = ima_template_desc_current();
 
-       *entry = kzalloc(sizeof(**entry) + template_desc->num_fields *
-                        sizeof(struct ima_field_data), GFP_NOFS);
+       *entry = kzalloc(struct_size(*entry, template_data,
+                                    template_desc->num_fields), GFP_NOFS);
        if (!*entry)
                return -ENOMEM;
 
@@ -205,7 +205,7 @@ int ima_get_action(struct inode *inode, const struct cred *cred, u32 secid,
  */
 int ima_collect_measurement(struct integrity_iint_cache *iint,
                            struct file *file, void *buf, loff_t size,
-                           enum hash_algo algo)
+                           enum hash_algo algo, struct modsig *modsig)
 {
        const char *audit_cause = "failed";
        struct inode *inode = file_inode(file);
@@ -219,6 +219,14 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
                char digest[IMA_MAX_DIGEST_SIZE];
        } hash;
 
+       /*
+        * Always collect the modsig, because IMA might have already collected
+        * the file digest without collecting the modsig in a previous
+        * measurement rule.
+        */
+       if (modsig)
+               ima_collect_modsig(modsig, buf, size);
+
        if (iint->flags & IMA_COLLECTED)
                goto out;
 
@@ -285,7 +293,7 @@ out:
 void ima_store_measurement(struct integrity_iint_cache *iint,
                           struct file *file, const unsigned char *filename,
                           struct evm_ima_xattr_data *xattr_value,
-                          int xattr_len, int pcr,
+                          int xattr_len, const struct modsig *modsig, int pcr,
                           struct ima_template_desc *template_desc)
 {
        static const char op[] = "add_template_measure";
@@ -297,10 +305,17 @@ void ima_store_measurement(struct integrity_iint_cache *iint,
                                             .file = file,
                                             .filename = filename,
                                             .xattr_value = xattr_value,
-                                            .xattr_len = xattr_len };
+                                            .xattr_len = xattr_len,
+                                            .modsig = modsig };
        int violation = 0;
 
-       if (iint->measured_pcrs & (0x1 << pcr))
+       /*
+        * We still need to store the measurement in the case of MODSIG because
+        * we only have its contents to put in the list at the time of
+        * appraisal, but a file measurement from earlier might already exist in
+        * the measurement list.
+        */
+       if (iint->measured_pcrs & (0x1 << pcr) && !modsig)
                return;
 
        result = ima_alloc_init_template(&event_data, &entry, template_desc);
index 89b8319..136ae4e 100644 (file)
@@ -199,6 +199,110 @@ int ima_read_xattr(struct dentry *dentry,
        return ret;
 }
 
+/*
+ * xattr_verify - verify xattr digest or signature
+ *
+ * Verify whether the hash or signature matches the file contents.
+ *
+ * Return 0 on success, error code otherwise.
+ */
+static int xattr_verify(enum ima_hooks func, struct integrity_iint_cache *iint,
+                       struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                       enum integrity_status *status, const char **cause)
+{
+       int rc = -EINVAL, hash_start = 0;
+
+       switch (xattr_value->type) {
+       case IMA_XATTR_DIGEST_NG:
+               /* first byte contains algorithm id */
+               hash_start = 1;
+               /* fall through */
+       case IMA_XATTR_DIGEST:
+               if (iint->flags & IMA_DIGSIG_REQUIRED) {
+                       *cause = "IMA-signature-required";
+                       *status = INTEGRITY_FAIL;
+                       break;
+               }
+               clear_bit(IMA_DIGSIG, &iint->atomic_flags);
+               if (xattr_len - sizeof(xattr_value->type) - hash_start >=
+                               iint->ima_hash->length)
+                       /*
+                        * xattr length may be longer. md5 hash in previous
+                        * version occupied 20 bytes in xattr, instead of 16
+                        */
+                       rc = memcmp(&xattr_value->data[hash_start],
+                                   iint->ima_hash->digest,
+                                   iint->ima_hash->length);
+               else
+                       rc = -EINVAL;
+               if (rc) {
+                       *cause = "invalid-hash";
+                       *status = INTEGRITY_FAIL;
+                       break;
+               }
+               *status = INTEGRITY_PASS;
+               break;
+       case EVM_IMA_XATTR_DIGSIG:
+               set_bit(IMA_DIGSIG, &iint->atomic_flags);
+               rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA,
+                                            (const char *)xattr_value,
+                                            xattr_len,
+                                            iint->ima_hash->digest,
+                                            iint->ima_hash->length);
+               if (rc == -EOPNOTSUPP) {
+                       *status = INTEGRITY_UNKNOWN;
+                       break;
+               }
+               if (IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING) && rc &&
+                   func == KEXEC_KERNEL_CHECK)
+                       rc = integrity_digsig_verify(INTEGRITY_KEYRING_PLATFORM,
+                                                    (const char *)xattr_value,
+                                                    xattr_len,
+                                                    iint->ima_hash->digest,
+                                                    iint->ima_hash->length);
+               if (rc) {
+                       *cause = "invalid-signature";
+                       *status = INTEGRITY_FAIL;
+               } else {
+                       *status = INTEGRITY_PASS;
+               }
+               break;
+       default:
+               *status = INTEGRITY_UNKNOWN;
+               *cause = "unknown-ima-data";
+               break;
+       }
+
+       return rc;
+}
+
+/*
+ * modsig_verify - verify modsig signature
+ *
+ * Verify whether the signature matches the file contents.
+ *
+ * Return 0 on success, error code otherwise.
+ */
+static int modsig_verify(enum ima_hooks func, const struct modsig *modsig,
+                        enum integrity_status *status, const char **cause)
+{
+       int rc;
+
+       rc = integrity_modsig_verify(INTEGRITY_KEYRING_IMA, modsig);
+       if (IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING) && rc &&
+           func == KEXEC_KERNEL_CHECK)
+               rc = integrity_modsig_verify(INTEGRITY_KEYRING_PLATFORM,
+                                            modsig);
+       if (rc) {
+               *cause = "invalid-signature";
+               *status = INTEGRITY_FAIL;
+       } else {
+               *status = INTEGRITY_PASS;
+       }
+
+       return rc;
+}
+
 /*
  * ima_appraise_measurement - appraise file measurement
  *
@@ -211,19 +315,22 @@ int ima_appraise_measurement(enum ima_hooks func,
                             struct integrity_iint_cache *iint,
                             struct file *file, const unsigned char *filename,
                             struct evm_ima_xattr_data *xattr_value,
-                            int xattr_len)
+                            int xattr_len, const struct modsig *modsig)
 {
        static const char op[] = "appraise_data";
        const char *cause = "unknown";
        struct dentry *dentry = file_dentry(file);
        struct inode *inode = d_backing_inode(dentry);
        enum integrity_status status = INTEGRITY_UNKNOWN;
-       int rc = xattr_len, hash_start = 0;
+       int rc = xattr_len;
+       bool try_modsig = iint->flags & IMA_MODSIG_ALLOWED && modsig;
 
-       if (!(inode->i_opflags & IOP_XATTR))
+       /* If not appraising a modsig, we need an xattr. */
+       if (!(inode->i_opflags & IOP_XATTR) && !try_modsig)
                return INTEGRITY_UNKNOWN;
 
-       if (rc <= 0) {
+       /* If reading the xattr failed and there's no modsig, error out. */
+       if (rc <= 0 && !try_modsig) {
                if (rc && rc != -ENODATA)
                        goto out;
 
@@ -246,6 +353,10 @@ int ima_appraise_measurement(enum ima_hooks func,
        case INTEGRITY_UNKNOWN:
                break;
        case INTEGRITY_NOXATTRS:        /* No EVM protected xattrs. */
+               /* It's fine not to have xattrs when using a modsig. */
+               if (try_modsig)
+                       break;
+               /* fall through */
        case INTEGRITY_NOLABEL:         /* No security.evm xattr. */
                cause = "missing-HMAC";
                goto out;
@@ -256,65 +367,18 @@ int ima_appraise_measurement(enum ima_hooks func,
                WARN_ONCE(true, "Unexpected integrity status %d\n", status);
        }
 
-       switch (xattr_value->type) {
-       case IMA_XATTR_DIGEST_NG:
-               /* first byte contains algorithm id */
-               hash_start = 1;
-               /* fall through */
-       case IMA_XATTR_DIGEST:
-               if (iint->flags & IMA_DIGSIG_REQUIRED) {
-                       cause = "IMA-signature-required";
-                       status = INTEGRITY_FAIL;
-                       break;
-               }
-               clear_bit(IMA_DIGSIG, &iint->atomic_flags);
-               if (xattr_len - sizeof(xattr_value->type) - hash_start >=
-                               iint->ima_hash->length)
-                       /* xattr length may be longer. md5 hash in previous
-                          version occupied 20 bytes in xattr, instead of 16
-                        */
-                       rc = memcmp(&xattr_value->data[hash_start],
-                                   iint->ima_hash->digest,
-                                   iint->ima_hash->length);
-               else
-                       rc = -EINVAL;
-               if (rc) {
-                       cause = "invalid-hash";
-                       status = INTEGRITY_FAIL;
-                       break;
-               }
-               status = INTEGRITY_PASS;
-               break;
-       case EVM_IMA_XATTR_DIGSIG:
-               set_bit(IMA_DIGSIG, &iint->atomic_flags);
-               rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA,
-                                            (const char *)xattr_value,
-                                            xattr_len,
-                                            iint->ima_hash->digest,
-                                            iint->ima_hash->length);
-               if (rc == -EOPNOTSUPP) {
-                       status = INTEGRITY_UNKNOWN;
-                       break;
-               }
-               if (IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING) && rc &&
-                   func == KEXEC_KERNEL_CHECK)
-                       rc = integrity_digsig_verify(INTEGRITY_KEYRING_PLATFORM,
-                                                    (const char *)xattr_value,
-                                                    xattr_len,
-                                                    iint->ima_hash->digest,
-                                                    iint->ima_hash->length);
-               if (rc) {
-                       cause = "invalid-signature";
-                       status = INTEGRITY_FAIL;
-               } else {
-                       status = INTEGRITY_PASS;
-               }
-               break;
-       default:
-               status = INTEGRITY_UNKNOWN;
-               cause = "unknown-ima-data";
-               break;
-       }
+       if (xattr_value)
+               rc = xattr_verify(func, iint, xattr_value, xattr_len, &status,
+                                 &cause);
+
+       /*
+        * If we have a modsig and either no imasig or the imasig's key isn't
+        * known, then try verifying the modsig.
+        */
+       if (try_modsig &&
+           (!xattr_value || xattr_value->type == IMA_XATTR_DIGEST_NG ||
+            rc == -ENOKEY))
+               rc = modsig_verify(func, modsig, &status, &cause);
 
 out:
        /*
@@ -332,7 +396,7 @@ out:
                                    op, cause, rc, 0);
        } else if (status != INTEGRITY_PASS) {
                /* Fix mode, but don't replace file signatures. */
-               if ((ima_appraise & IMA_APPRAISE_FIX) &&
+               if ((ima_appraise & IMA_APPRAISE_FIX) && !try_modsig &&
                    (!xattr_value ||
                     xattr_value->type != EVM_IMA_XATTR_DIGSIG)) {
                        if (!ima_fix_xattr(dentry, iint))
@@ -371,7 +435,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
            !(iint->flags & IMA_HASH))
                return;
 
-       rc = ima_collect_measurement(iint, file, NULL, 0, ima_hash_algo);
+       rc = ima_collect_measurement(iint, file, NULL, 0, ima_hash_algo, NULL);
        if (rc < 0)
                return;
 
index d4c7b8e..73044fc 100644 (file)
@@ -268,8 +268,16 @@ static int ima_calc_file_hash_atfm(struct file *file,
                rbuf_len = min_t(loff_t, i_size - offset, rbuf_size[active]);
                rc = integrity_kernel_read(file, offset, rbuf[active],
                                           rbuf_len);
-               if (rc != rbuf_len)
+               if (rc != rbuf_len) {
+                       if (rc >= 0)
+                               rc = -EINVAL;
+                       /*
+                        * Forward current rc, do not overwrite with return value
+                        * from ahash_wait()
+                        */
+                       ahash_wait(ahash_rc, &wait);
                        goto out3;
+               }
 
                if (rbuf[1] && offset) {
                        /* Using two buffers, and it is not the first
index 5840197..60027c6 100644 (file)
@@ -202,6 +202,7 @@ static int process_measurement(struct file *file, const struct cred *cred,
        int rc = 0, action, must_appraise = 0;
        int pcr = CONFIG_IMA_MEASURE_PCR_IDX;
        struct evm_ima_xattr_data *xattr_value = NULL;
+       struct modsig *modsig = NULL;
        int xattr_len = 0;
        bool violation_check;
        enum hash_algo hash_algo;
@@ -302,13 +303,27 @@ static int process_measurement(struct file *file, const struct cred *cred,
        }
 
        if ((action & IMA_APPRAISE_SUBMASK) ||
-                   strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) != 0)
+           strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) != 0) {
                /* read 'security.ima' */
                xattr_len = ima_read_xattr(file_dentry(file), &xattr_value);
 
+               /*
+                * Read the appended modsig if allowed by the policy, and allow
+                * an additional measurement list entry, if needed, based on the
+                * template format and whether the file was already measured.
+                */
+               if (iint->flags & IMA_MODSIG_ALLOWED) {
+                       rc = ima_read_modsig(func, buf, size, &modsig);
+
+                       if (!rc && ima_template_has_modsig(template_desc) &&
+                           iint->flags & IMA_MEASURED)
+                               action |= IMA_MEASURE;
+               }
+       }
+
        hash_algo = ima_get_hash_algo(xattr_value, xattr_len);
 
-       rc = ima_collect_measurement(iint, file, buf, size, hash_algo);
+       rc = ima_collect_measurement(iint, file, buf, size, hash_algo, modsig);
        if (rc != 0 && rc != -EBADF && rc != -EINVAL)
                goto out_locked;
 
@@ -317,12 +332,12 @@ static int process_measurement(struct file *file, const struct cred *cred,
 
        if (action & IMA_MEASURE)
                ima_store_measurement(iint, file, pathname,
-                                     xattr_value, xattr_len, pcr,
+                                     xattr_value, xattr_len, modsig, pcr,
                                      template_desc);
        if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) {
                inode_lock(inode);
                rc = ima_appraise_measurement(func, iint, file, pathname,
-                                             xattr_value, xattr_len);
+                                             xattr_value, xattr_len, modsig);
                inode_unlock(inode);
                if (!rc)
                        rc = mmap_violation_check(func, file, &pathbuf,
@@ -339,6 +354,7 @@ out_locked:
                rc = -EACCES;
        mutex_unlock(&iint->mutex);
        kfree(xattr_value);
+       ima_free_modsig(modsig);
 out:
        if (pathbuf)
                __putname(pathbuf);
@@ -502,7 +518,7 @@ int ima_read_file(struct file *file, enum kernel_read_file_id read_id)
        return 0;
 }
 
-static const int read_idmap[READING_MAX_ID] = {
+const int read_idmap[READING_MAX_ID] = {
        [READING_FIRMWARE] = FIRMWARE_CHECK,
        [READING_FIRMWARE_PREALLOC_BUFFER] = FIRMWARE_CHECK,
        [READING_MODULE] = MODULE_CHECK,
@@ -574,7 +590,7 @@ int ima_load_data(enum kernel_load_data_id id)
 
        switch (id) {
        case LOADING_KEXEC_IMAGE:
-               if (IS_ENABLED(CONFIG_KEXEC_VERIFY_SIG)
+               if (IS_ENABLED(CONFIG_KEXEC_SIG)
                    && arch_ima_get_secureboot()) {
                        pr_err("impossible to appraise a kernel image without a file descriptor; try using kexec_file_load syscall.\n");
                        return -EACCES;
diff --git a/security/integrity/ima/ima_modsig.c b/security/integrity/ima/ima_modsig.c
new file mode 100644 (file)
index 0000000..d106885
--- /dev/null
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IMA support for appraising module-style appended signatures.
+ *
+ * Copyright (C) 2019  IBM Corporation
+ *
+ * Author:
+ * Thiago Jung Bauermann <bauerman@linux.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/module_signature.h>
+#include <keys/asymmetric-type.h>
+#include <crypto/pkcs7.h>
+
+#include "ima.h"
+
+struct modsig {
+       struct pkcs7_message *pkcs7_msg;
+
+       enum hash_algo hash_algo;
+
+       /* This digest will go in the 'd-modsig' field of the IMA template. */
+       const u8 *digest;
+       u32 digest_size;
+
+       /*
+        * This is what will go to the measurement list if the template requires
+        * storing the signature.
+        */
+       int raw_pkcs7_len;
+       u8 raw_pkcs7[];
+};
+
+/**
+ * ima_hook_supports_modsig - can the policy allow modsig for this hook?
+ *
+ * modsig is only supported by hooks using ima_post_read_file(), because only
+ * they preload the contents of the file in a buffer. FILE_CHECK does that in
+ * some cases, but not when reached from vfs_open(). POLICY_CHECK can support
+ * it, but it's not useful in practice because it's a text file so deny.
+ */
+bool ima_hook_supports_modsig(enum ima_hooks func)
+{
+       switch (func) {
+       case KEXEC_KERNEL_CHECK:
+       case KEXEC_INITRAMFS_CHECK:
+       case MODULE_CHECK:
+               return true;
+       default:
+               return false;
+       }
+}
+
+/*
+ * ima_read_modsig - Read modsig from buf.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+int ima_read_modsig(enum ima_hooks func, const void *buf, loff_t buf_len,
+                   struct modsig **modsig)
+{
+       const size_t marker_len = strlen(MODULE_SIG_STRING);
+       const struct module_signature *sig;
+       struct modsig *hdr;
+       size_t sig_len;
+       const void *p;
+       int rc;
+
+       if (buf_len <= marker_len + sizeof(*sig))
+               return -ENOENT;
+
+       p = buf + buf_len - marker_len;
+       if (memcmp(p, MODULE_SIG_STRING, marker_len))
+               return -ENOENT;
+
+       buf_len -= marker_len;
+       sig = (const struct module_signature *)(p - sizeof(*sig));
+
+       rc = mod_check_sig(sig, buf_len, func_tokens[func]);
+       if (rc)
+               return rc;
+
+       sig_len = be32_to_cpu(sig->sig_len);
+       buf_len -= sig_len + sizeof(*sig);
+
+       /* Allocate sig_len additional bytes to hold the raw PKCS#7 data. */
+       hdr = kzalloc(sizeof(*hdr) + sig_len, GFP_KERNEL);
+       if (!hdr)
+               return -ENOMEM;
+
+       hdr->pkcs7_msg = pkcs7_parse_message(buf + buf_len, sig_len);
+       if (IS_ERR(hdr->pkcs7_msg)) {
+               rc = PTR_ERR(hdr->pkcs7_msg);
+               kfree(hdr);
+               return rc;
+       }
+
+       memcpy(hdr->raw_pkcs7, buf + buf_len, sig_len);
+       hdr->raw_pkcs7_len = sig_len;
+
+       /* We don't know the hash algorithm yet. */
+       hdr->hash_algo = HASH_ALGO__LAST;
+
+       *modsig = hdr;
+
+       return 0;
+}
+
+/**
+ * ima_collect_modsig - Calculate the file hash without the appended signature.
+ *
+ * Since the modsig is part of the file contents, the hash used in its signature
+ * isn't the same one ordinarily calculated by IMA. Therefore PKCS7 code
+ * calculates a separate one for signature verification.
+ */
+void ima_collect_modsig(struct modsig *modsig, const void *buf, loff_t size)
+{
+       int rc;
+
+       /*
+        * Provide the file contents (minus the appended sig) so that the PKCS7
+        * code can calculate the file hash.
+        */
+       size -= modsig->raw_pkcs7_len + strlen(MODULE_SIG_STRING) +
+               sizeof(struct module_signature);
+       rc = pkcs7_supply_detached_data(modsig->pkcs7_msg, buf, size);
+       if (rc)
+               return;
+
+       /* Ask the PKCS7 code to calculate the file hash. */
+       rc = pkcs7_get_digest(modsig->pkcs7_msg, &modsig->digest,
+                             &modsig->digest_size, &modsig->hash_algo);
+}
+
+int ima_modsig_verify(struct key *keyring, const struct modsig *modsig)
+{
+       return verify_pkcs7_message_sig(NULL, 0, modsig->pkcs7_msg, keyring,
+                                       VERIFYING_MODULE_SIGNATURE, NULL, NULL);
+}
+
+int ima_get_modsig_digest(const struct modsig *modsig, enum hash_algo *algo,
+                         const u8 **digest, u32 *digest_size)
+{
+       *algo = modsig->hash_algo;
+       *digest = modsig->digest;
+       *digest_size = modsig->digest_size;
+
+       return 0;
+}
+
+int ima_get_raw_modsig(const struct modsig *modsig, const void **data,
+                      u32 *data_len)
+{
+       *data = &modsig->raw_pkcs7;
+       *data_len = modsig->raw_pkcs7_len;
+
+       return 0;
+}
+
+void ima_free_modsig(struct modsig *modsig)
+{
+       if (!modsig)
+               return;
+
+       pkcs7_free_message(modsig->pkcs7_msg);
+       kfree(modsig);
+}
index 6df7f64..5380aca 100644 (file)
@@ -6,6 +6,9 @@
  * ima_policy.c
  *     - initialize default measure policy rules
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/fs.h>
@@ -491,6 +494,9 @@ int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid,
        struct ima_rule_entry *entry;
        int action = 0, actmask = flags | (flags << 1);
 
+       if (template_desc)
+               *template_desc = ima_template_desc_current();
+
        rcu_read_lock();
        list_for_each_entry_rcu(entry, ima_rules, list) {
 
@@ -510,6 +516,7 @@ int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid,
                                action |= IMA_FAIL_UNVERIFIABLE_SIGS;
                }
 
+
                if (entry->action & IMA_DO_MASK)
                        actmask &= ~(entry->action | entry->action << 1);
                else
@@ -520,8 +527,6 @@ int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid,
 
                if (template_desc && entry->template)
                        *template_desc = entry->template;
-               else if (template_desc)
-                       *template_desc = ima_template_desc_current();
 
                if (!actmask)
                        break;
@@ -843,6 +848,38 @@ static void ima_log_string(struct audit_buffer *ab, char *key, char *value)
        ima_log_string_op(ab, key, value, NULL);
 }
 
+/*
+ * Validating the appended signature included in the measurement list requires
+ * the file hash calculated without the appended signature (i.e., the 'd-modsig'
+ * field). Therefore, notify the user if they have the 'modsig' field but not
+ * the 'd-modsig' field in the template.
+ */
+static void check_template_modsig(const struct ima_template_desc *template)
+{
+#define MSG "template with 'modsig' field also needs 'd-modsig' field\n"
+       bool has_modsig, has_dmodsig;
+       static bool checked;
+       int i;
+
+       /* We only need to notify the user once. */
+       if (checked)
+               return;
+
+       has_modsig = has_dmodsig = false;
+       for (i = 0; i < template->num_fields; i++) {
+               if (!strcmp(template->fields[i]->field_id, "modsig"))
+                       has_modsig = true;
+               else if (!strcmp(template->fields[i]->field_id, "d-modsig"))
+                       has_dmodsig = true;
+       }
+
+       if (has_modsig && !has_dmodsig)
+               pr_notice(MSG);
+
+       checked = true;
+#undef MSG
+}
+
 static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 {
        struct audit_buffer *ab;
@@ -1128,6 +1165,10 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
                        ima_log_string(ab, "appraise_type", args[0].from);
                        if ((strcmp(args[0].from, "imasig")) == 0)
                                entry->flags |= IMA_DIGSIG_REQUIRED;
+                       else if (ima_hook_supports_modsig(entry->func) &&
+                                strcmp(args[0].from, "imasig|modsig") == 0)
+                               entry->flags |= IMA_DIGSIG_REQUIRED |
+                                               IMA_MODSIG_ALLOWED;
                        else
                                result = -EINVAL;
                        break;
@@ -1181,6 +1222,12 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
        else if (entry->action == APPRAISE)
                temp_ima_appraise |= ima_appraise_flag(entry->func);
 
+       if (!result && entry->flags & IMA_MODSIG_ALLOWED) {
+               template_desc = entry->template ? entry->template :
+                                                 ima_template_desc_current();
+               check_template_modsig(template_desc);
+       }
+
        audit_log_format(ab, "res=%d", !result);
        audit_log_end(ab);
        return result;
@@ -1252,6 +1299,12 @@ void ima_delete_rules(void)
        }
 }
 
+#define __ima_hook_stringify(str)      (#str),
+
+const char *const func_tokens[] = {
+       __ima_hooks(__ima_hook_stringify)
+};
+
 #ifdef CONFIG_IMA_READ_POLICY
 enum {
        mask_exec = 0, mask_write, mask_read, mask_append
@@ -1264,12 +1317,6 @@ static const char *const mask_tokens[] = {
        "^MAY_APPEND"
 };
 
-#define __ima_hook_stringify(str)      (#str),
-
-static const char *const func_tokens[] = {
-       __ima_hooks(__ima_hook_stringify)
-};
-
 void *ima_policy_start(struct seq_file *m, loff_t *pos)
 {
        loff_t l = *pos;
@@ -1447,8 +1494,12 @@ int ima_policy_show(struct seq_file *m, void *v)
        }
        if (entry->template)
                seq_printf(m, "template=%s ", entry->template->name);
-       if (entry->flags & IMA_DIGSIG_REQUIRED)
-               seq_puts(m, "appraise_type=imasig ");
+       if (entry->flags & IMA_DIGSIG_REQUIRED) {
+               if (entry->flags & IMA_MODSIG_ALLOWED)
+                       seq_puts(m, "appraise_type=imasig|modsig ");
+               else
+                       seq_puts(m, "appraise_type=imasig ");
+       }
        if (entry->flags & IMA_PERMIT_DIRECTIO)
                seq_puts(m, "permit_directio ");
        rcu_read_unlock();
@@ -1456,3 +1507,53 @@ int ima_policy_show(struct seq_file *m, void *v)
        return 0;
 }
 #endif /* CONFIG_IMA_READ_POLICY */
+
+#if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING)
+/*
+ * ima_appraise_signature: whether IMA will appraise a given function using
+ * an IMA digital signature. This is restricted to cases where the kernel
+ * has a set of built-in trusted keys in order to avoid an attacker simply
+ * loading additional keys.
+ */
+bool ima_appraise_signature(enum kernel_read_file_id id)
+{
+       struct ima_rule_entry *entry;
+       bool found = false;
+       enum ima_hooks func;
+
+       if (id >= READING_MAX_ID)
+               return false;
+
+       func = read_idmap[id] ?: FILE_CHECK;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(entry, ima_rules, list) {
+               if (entry->action != APPRAISE)
+                       continue;
+
+               /*
+                * A generic entry will match, but otherwise require that it
+                * match the func we're looking for
+                */
+               if (entry->func && entry->func != func)
+                       continue;
+
+               /*
+                * We require this to be a digital signature, not a raw IMA
+                * hash.
+                */
+               if (entry->flags & IMA_DIGSIG_REQUIRED)
+                       found = true;
+
+               /*
+                * We've found a rule that matches, so break now even if it
+                * didn't require a digital signature - a later rule that does
+                * won't override it, so would be a false positive.
+                */
+               break;
+       }
+
+       rcu_read_unlock();
+       return found;
+}
+#endif /* CONFIG_IMA_APPRAISE && CONFIG_INTEGRITY_TRUSTED_KEYRING */
index cb349d7..6aa6408 100644 (file)
@@ -23,6 +23,7 @@ static struct ima_template_desc builtin_templates[] = {
        {.name = "ima-ng", .fmt = "d-ng|n-ng"},
        {.name = "ima-sig", .fmt = "d-ng|n-ng|sig"},
        {.name = "ima-buf", .fmt = "d-ng|n-ng|buf"},
+       {.name = "ima-modsig", .fmt = "d-ng|n-ng|sig|d-modsig|modsig"},
        {.name = "", .fmt = ""},        /* placeholder for a custom format */
 };
 
@@ -42,6 +43,10 @@ static const struct ima_template_field supported_fields[] = {
         .field_show = ima_show_template_sig},
        {.field_id = "buf", .field_init = ima_eventbuf_init,
         .field_show = ima_show_template_buf},
+       {.field_id = "d-modsig", .field_init = ima_eventdigest_modsig_init,
+        .field_show = ima_show_template_digest_ng},
+       {.field_id = "modsig", .field_init = ima_eventmodsig_init,
+        .field_show = ima_show_template_sig},
 };
 
 /*
@@ -49,10 +54,29 @@ static const struct ima_template_field supported_fields[] = {
  * need to be accounted for since they shouldn't be defined in the same template
  * description as 'd-ng' and 'n-ng' respectively.
  */
-#define MAX_TEMPLATE_NAME_LEN sizeof("d-ng|n-ng|sig|buf")
+#define MAX_TEMPLATE_NAME_LEN sizeof("d-ng|n-ng|sig|buf|d-modisg|modsig")
 
 static struct ima_template_desc *ima_template;
 
+/**
+ * ima_template_has_modsig - Check whether template has modsig-related fields.
+ * @ima_template: IMA template to check.
+ *
+ * Tells whether the given template has fields referencing a file's appended
+ * signature.
+ */
+bool ima_template_has_modsig(const struct ima_template_desc *ima_template)
+{
+       int i;
+
+       for (i = 0; i < ima_template->num_fields; i++)
+               if (!strcmp(ima_template->fields[i]->field_id, "modsig") ||
+                   !strcmp(ima_template->fields[i]->field_id, "d-modsig"))
+                       return true;
+
+       return false;
+}
+
 static int __init ima_template_setup(char *str)
 {
        struct ima_template_desc *template_desc;
@@ -282,9 +306,8 @@ static int ima_restore_template_data(struct ima_template_desc *template_desc,
        int ret = 0;
        int i;
 
-       *entry = kzalloc(sizeof(**entry) +
-                   template_desc->num_fields * sizeof(struct ima_field_data),
-                   GFP_NOFS);
+       *entry = kzalloc(struct_size(*entry, template_data,
+                                    template_desc->num_fields), GFP_NOFS);
        if (!*entry)
                return -ENOMEM;
 
index 2fb9a10..32ae05d 100644 (file)
@@ -225,7 +225,8 @@ int ima_parse_buf(void *bufstartp, void *bufendp, void **bufcurp,
        return 0;
 }
 
-static int ima_eventdigest_init_common(u8 *digest, u32 digestsize, u8 hash_algo,
+static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize,
+                                      u8 hash_algo,
                                       struct ima_field_data *field_data)
 {
        /*
@@ -328,6 +329,41 @@ out:
                                           hash_algo, field_data);
 }
 
+/*
+ * This function writes the digest of the file which is expected to match the
+ * digest contained in the file's appended signature.
+ */
+int ima_eventdigest_modsig_init(struct ima_event_data *event_data,
+                               struct ima_field_data *field_data)
+{
+       enum hash_algo hash_algo;
+       const u8 *cur_digest;
+       u32 cur_digestsize;
+
+       if (!event_data->modsig)
+               return 0;
+
+       if (event_data->violation) {
+               /* Recording a violation. */
+               hash_algo = HASH_ALGO_SHA1;
+               cur_digest = NULL;
+               cur_digestsize = 0;
+       } else {
+               int rc;
+
+               rc = ima_get_modsig_digest(event_data->modsig, &hash_algo,
+                                          &cur_digest, &cur_digestsize);
+               if (rc)
+                       return rc;
+               else if (hash_algo == HASH_ALGO__LAST || cur_digestsize == 0)
+                       /* There was some error collecting the digest. */
+                       return -EINVAL;
+       }
+
+       return ima_eventdigest_init_common(cur_digest, cur_digestsize,
+                                          hash_algo, field_data);
+}
+
 static int ima_eventname_init_common(struct ima_event_data *event_data,
                                     struct ima_field_data *field_data,
                                     bool size_limit)
@@ -406,3 +442,29 @@ int ima_eventbuf_init(struct ima_event_data *event_data,
                                             event_data->buf_len, DATA_FMT_HEX,
                                             field_data);
 }
+
+/*
+ *  ima_eventmodsig_init - include the appended file signature as part of the
+ *  template data
+ */
+int ima_eventmodsig_init(struct ima_event_data *event_data,
+                        struct ima_field_data *field_data)
+{
+       const void *data;
+       u32 data_len;
+       int rc;
+
+       if (!event_data->modsig)
+               return 0;
+
+       /*
+        * modsig is a runtime structure containing pointers. Get its raw data
+        * instead.
+        */
+       rc = ima_get_raw_modsig(event_data->modsig, &data, &data_len);
+       if (rc)
+               return rc;
+
+       return ima_write_template_field_data(data, data_len, DATA_FMT_HEX,
+                                            field_data);
+}
index 652aa5d..9a88c79 100644 (file)
@@ -36,10 +36,14 @@ int ima_eventname_init(struct ima_event_data *event_data,
                       struct ima_field_data *field_data);
 int ima_eventdigest_ng_init(struct ima_event_data *event_data,
                            struct ima_field_data *field_data);
+int ima_eventdigest_modsig_init(struct ima_event_data *event_data,
+                               struct ima_field_data *field_data);
 int ima_eventname_ng_init(struct ima_event_data *event_data,
                          struct ima_field_data *field_data);
 int ima_eventsig_init(struct ima_event_data *event_data,
                      struct ima_field_data *field_data);
 int ima_eventbuf_init(struct ima_event_data *event_data,
                      struct ima_field_data *field_data);
+int ima_eventmodsig_init(struct ima_event_data *event_data,
+                        struct ima_field_data *field_data);
 #endif /* __LINUX_IMA_TEMPLATE_LIB_H */
index ed12d8e..d9323d3 100644 (file)
@@ -31,6 +31,7 @@
 #define IMA_NEW_FILE           0x04000000
 #define EVM_IMMUTABLE_DIGSIG   0x08000000
 #define IMA_FAIL_UNVERIFIABLE_SIGS     0x10000000
+#define IMA_MODSIG_ALLOWED     0x20000000
 
 #define IMA_DO_MASK            (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \
                                 IMA_HASH | IMA_APPRAISE_SUBMASK)
@@ -147,10 +148,13 @@ int integrity_kernel_read(struct file *file, loff_t offset,
 
 extern struct dentry *integrity_dir;
 
+struct modsig;
+
 #ifdef CONFIG_INTEGRITY_SIGNATURE
 
 int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
                            const char *digest, int digestlen);
+int integrity_modsig_verify(unsigned int id, const struct modsig *modsig);
 
 int __init integrity_init_keyring(const unsigned int id);
 int __init integrity_load_x509(const unsigned int id, const char *path);
@@ -165,6 +169,12 @@ static inline int integrity_digsig_verify(const unsigned int id,
        return -EOPNOTSUPP;
 }
 
+static inline int integrity_modsig_verify(unsigned int id,
+                                         const struct modsig *modsig)
+{
+       return -EOPNOTSUPP;
+}
+
 static inline int integrity_init_keyring(const unsigned int id)
 {
        return 0;
@@ -190,6 +200,16 @@ static inline int asymmetric_verify(struct key *keyring, const char *sig,
 }
 #endif
 
+#ifdef CONFIG_IMA_APPRAISE_MODSIG
+int ima_modsig_verify(struct key *keyring, const struct modsig *modsig);
+#else
+static inline int ima_modsig_verify(struct key *keyring,
+                                   const struct modsig *modsig)
+{
+       return -EOPNOTSUPP;
+}
+#endif
+
 #ifdef CONFIG_IMA_LOAD_X509
 void __init ima_load_x509(void);
 #else
index ade6991..1fbd778 100644 (file)
@@ -1228,11 +1228,16 @@ hashalg_fail:
 
 static int __init init_digests(void)
 {
+       int i;
+
        digests = kcalloc(chip->nr_allocated_banks, sizeof(*digests),
                          GFP_KERNEL);
        if (!digests)
                return -ENOMEM;
 
+       for (i = 0; i < chip->nr_allocated_banks; i++)
+               digests[i].alg_id = chip->allocated_banks[i].alg_id;
+
        return 0;
 }
 
diff --git a/security/lockdown/Kconfig b/security/lockdown/Kconfig
new file mode 100644 (file)
index 0000000..e84ddf4
--- /dev/null
@@ -0,0 +1,47 @@
+config SECURITY_LOCKDOWN_LSM
+       bool "Basic module for enforcing kernel lockdown"
+       depends on SECURITY
+       select MODULE_SIG if MODULES
+       help
+         Build support for an LSM that enforces a coarse kernel lockdown
+         behaviour.
+
+config SECURITY_LOCKDOWN_LSM_EARLY
+       bool "Enable lockdown LSM early in init"
+       depends on SECURITY_LOCKDOWN_LSM
+       help
+         Enable the lockdown LSM early in boot. This is necessary in order
+         to ensure that lockdown enforcement can be carried out on kernel
+         boot parameters that are otherwise parsed before the security
+         subsystem is fully initialised. If enabled, lockdown will
+         unconditionally be called before any other LSMs.
+
+choice
+       prompt "Kernel default lockdown mode"
+       default LOCK_DOWN_KERNEL_FORCE_NONE
+       depends on SECURITY_LOCKDOWN_LSM
+       help
+         The kernel can be configured to default to differing levels of
+         lockdown.
+
+config LOCK_DOWN_KERNEL_FORCE_NONE
+       bool "None"
+       help
+         No lockdown functionality is enabled by default. Lockdown may be
+         enabled via the kernel commandline or /sys/kernel/security/lockdown.
+
+config LOCK_DOWN_KERNEL_FORCE_INTEGRITY
+       bool "Integrity"
+       help
+        The kernel runs in integrity mode by default. Features that allow
+        the kernel to be modified at runtime are disabled.
+
+config LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY
+       bool "Confidentiality"
+       help
+        The kernel runs in confidentiality mode by default. Features that
+        allow the kernel to be modified at runtime or that permit userland
+        code to read confidential material held inside the kernel are
+        disabled.
+
+endchoice
diff --git a/security/lockdown/Makefile b/security/lockdown/Makefile
new file mode 100644 (file)
index 0000000..e3634b9
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_SECURITY_LOCKDOWN_LSM) += lockdown.o
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
new file mode 100644 (file)
index 0000000..40b7905
--- /dev/null
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Lock down the kernel
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/security.h>
+#include <linux/export.h>
+#include <linux/lsm_hooks.h>
+
+static enum lockdown_reason kernel_locked_down;
+
+static const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
+       [LOCKDOWN_NONE] = "none",
+       [LOCKDOWN_MODULE_SIGNATURE] = "unsigned module loading",
+       [LOCKDOWN_DEV_MEM] = "/dev/mem,kmem,port",
+       [LOCKDOWN_EFI_TEST] = "/dev/efi_test access",
+       [LOCKDOWN_KEXEC] = "kexec of unsigned images",
+       [LOCKDOWN_HIBERNATION] = "hibernation",
+       [LOCKDOWN_PCI_ACCESS] = "direct PCI access",
+       [LOCKDOWN_IOPORT] = "raw io port access",
+       [LOCKDOWN_MSR] = "raw MSR access",
+       [LOCKDOWN_ACPI_TABLES] = "modifying ACPI tables",
+       [LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
+       [LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO",
+       [LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
+       [LOCKDOWN_MMIOTRACE] = "unsafe mmio",
+       [LOCKDOWN_DEBUGFS] = "debugfs access",
+       [LOCKDOWN_INTEGRITY_MAX] = "integrity",
+       [LOCKDOWN_KCORE] = "/proc/kcore access",
+       [LOCKDOWN_KPROBES] = "use of kprobes",
+       [LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
+       [LOCKDOWN_PERF] = "unsafe use of perf",
+       [LOCKDOWN_TRACEFS] = "use of tracefs",
+       [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
+};
+
+static const enum lockdown_reason lockdown_levels[] = {LOCKDOWN_NONE,
+                                                LOCKDOWN_INTEGRITY_MAX,
+                                                LOCKDOWN_CONFIDENTIALITY_MAX};
+
+/*
+ * Put the kernel into lock-down mode.
+ */
+static int lock_kernel_down(const char *where, enum lockdown_reason level)
+{
+       if (kernel_locked_down >= level)
+               return -EPERM;
+
+       kernel_locked_down = level;
+       pr_notice("Kernel is locked down from %s; see man kernel_lockdown.7\n",
+                 where);
+       return 0;
+}
+
+static int __init lockdown_param(char *level)
+{
+       if (!level)
+               return -EINVAL;
+
+       if (strcmp(level, "integrity") == 0)
+               lock_kernel_down("command line", LOCKDOWN_INTEGRITY_MAX);
+       else if (strcmp(level, "confidentiality") == 0)
+               lock_kernel_down("command line", LOCKDOWN_CONFIDENTIALITY_MAX);
+       else
+               return -EINVAL;
+
+       return 0;
+}
+
+early_param("lockdown", lockdown_param);
+
+/**
+ * lockdown_is_locked_down - Find out if the kernel is locked down
+ * @what: Tag to use in notice generated if lockdown is in effect
+ */
+static int lockdown_is_locked_down(enum lockdown_reason what)
+{
+       if (WARN(what >= LOCKDOWN_CONFIDENTIALITY_MAX,
+                "Invalid lockdown reason"))
+               return -EPERM;
+
+       if (kernel_locked_down >= what) {
+               if (lockdown_reasons[what])
+                       pr_notice("Lockdown: %s: %s is restricted; see man kernel_lockdown.7\n",
+                                 current->comm, lockdown_reasons[what]);
+               return -EPERM;
+       }
+
+       return 0;
+}
+
+static struct security_hook_list lockdown_hooks[] __lsm_ro_after_init = {
+       LSM_HOOK_INIT(locked_down, lockdown_is_locked_down),
+};
+
+static int __init lockdown_lsm_init(void)
+{
+#if defined(CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY)
+       lock_kernel_down("Kernel configuration", LOCKDOWN_INTEGRITY_MAX);
+#elif defined(CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY)
+       lock_kernel_down("Kernel configuration", LOCKDOWN_CONFIDENTIALITY_MAX);
+#endif
+       security_add_hooks(lockdown_hooks, ARRAY_SIZE(lockdown_hooks),
+                          "lockdown");
+       return 0;
+}
+
+static ssize_t lockdown_read(struct file *filp, char __user *buf, size_t count,
+                            loff_t *ppos)
+{
+       char temp[80];
+       int i, offset = 0;
+
+       for (i = 0; i < ARRAY_SIZE(lockdown_levels); i++) {
+               enum lockdown_reason level = lockdown_levels[i];
+
+               if (lockdown_reasons[level]) {
+                       const char *label = lockdown_reasons[level];
+
+                       if (kernel_locked_down == level)
+                               offset += sprintf(temp+offset, "[%s] ", label);
+                       else
+                               offset += sprintf(temp+offset, "%s ", label);
+               }
+       }
+
+       /* Convert the last space to a newline if needed. */
+       if (offset > 0)
+               temp[offset-1] = '\n';
+
+       return simple_read_from_buffer(buf, count, ppos, temp, strlen(temp));
+}
+
+static ssize_t lockdown_write(struct file *file, const char __user *buf,
+                             size_t n, loff_t *ppos)
+{
+       char *state;
+       int i, len, err = -EINVAL;
+
+       state = memdup_user_nul(buf, n);
+       if (IS_ERR(state))
+               return PTR_ERR(state);
+
+       len = strlen(state);
+       if (len && state[len-1] == '\n') {
+               state[len-1] = '\0';
+               len--;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(lockdown_levels); i++) {
+               enum lockdown_reason level = lockdown_levels[i];
+               const char *label = lockdown_reasons[level];
+
+               if (label && !strcmp(state, label))
+                       err = lock_kernel_down("securityfs", level);
+       }
+
+       kfree(state);
+       return err ? err : n;
+}
+
+static const struct file_operations lockdown_ops = {
+       .read  = lockdown_read,
+       .write = lockdown_write,
+};
+
+static int __init lockdown_secfs_init(void)
+{
+       struct dentry *dentry;
+
+       dentry = securityfs_create_file("lockdown", 0600, NULL, NULL,
+                                       &lockdown_ops);
+       return PTR_ERR_OR_ZERO(dentry);
+}
+
+core_initcall(lockdown_secfs_init);
+
+#ifdef CONFIG_SECURITY_LOCKDOWN_LSM_EARLY
+DEFINE_EARLY_LSM(lockdown) = {
+#else
+DEFINE_LSM(lockdown) = {
+#endif
+       .name = "lockdown",
+       .init = lockdown_lsm_init,
+};
index 25ee5c7..1bc000f 100644 (file)
@@ -33,6 +33,7 @@
 
 /* How many LSMs were built into the kernel? */
 #define LSM_COUNT (__end_lsm_info - __start_lsm_info)
+#define EARLY_LSM_COUNT (__end_early_lsm_info - __start_early_lsm_info)
 
 struct security_hook_heads security_hook_heads __lsm_ro_after_init;
 static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain);
@@ -277,6 +278,8 @@ static void __init ordered_lsm_parse(const char *order, const char *origin)
 static void __init lsm_early_cred(struct cred *cred);
 static void __init lsm_early_task(struct task_struct *task);
 
+static int lsm_append(const char *new, char **result);
+
 static void __init ordered_lsm_init(void)
 {
        struct lsm_info **lsm;
@@ -323,6 +326,26 @@ static void __init ordered_lsm_init(void)
        kfree(ordered_lsms);
 }
 
+int __init early_security_init(void)
+{
+       int i;
+       struct hlist_head *list = (struct hlist_head *) &security_hook_heads;
+       struct lsm_info *lsm;
+
+       for (i = 0; i < sizeof(security_hook_heads) / sizeof(struct hlist_head);
+            i++)
+               INIT_HLIST_HEAD(&list[i]);
+
+       for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) {
+               if (!lsm->enabled)
+                       lsm->enabled = &lsm_enabled_true;
+               prepare_lsm(lsm);
+               initialize_lsm(lsm);
+       }
+
+       return 0;
+}
+
 /**
  * security_init - initializes the security framework
  *
@@ -330,14 +353,18 @@ static void __init ordered_lsm_init(void)
  */
 int __init security_init(void)
 {
-       int i;
-       struct hlist_head *list = (struct hlist_head *) &security_hook_heads;
+       struct lsm_info *lsm;
 
        pr_info("Security Framework initializing\n");
 
-       for (i = 0; i < sizeof(security_hook_heads) / sizeof(struct hlist_head);
-            i++)
-               INIT_HLIST_HEAD(&list[i]);
+       /*
+        * Append the names of the early LSM modules now that kmalloc() is
+        * available
+        */
+       for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) {
+               if (lsm->enabled)
+                       lsm_append(lsm->name, &lsm_names);
+       }
 
        /* Load LSMs in specified order. */
        ordered_lsm_init();
@@ -384,7 +411,7 @@ static bool match_last_lsm(const char *list, const char *lsm)
        return !strcmp(last, lsm);
 }
 
-static int lsm_append(char *new, char **result)
+static int lsm_append(const char *new, char **result)
 {
        char *cp;
 
@@ -422,8 +449,15 @@ void __init security_add_hooks(struct security_hook_list *hooks, int count,
                hooks[i].lsm = lsm;
                hlist_add_tail_rcu(&hooks[i].list, hooks[i].head);
        }
-       if (lsm_append(lsm, &lsm_names) < 0)
-               panic("%s - Cannot get early memory.\n", __func__);
+
+       /*
+        * Don't try to append during early_security_init(), we'll come back
+        * and fix this up afterwards.
+        */
+       if (slab_is_available()) {
+               if (lsm_append(lsm, &lsm_names) < 0)
+                       panic("%s - Cannot get early memory.\n", __func__);
+       }
 }
 
 int call_blocking_lsm_notifier(enum lsm_event event, void *data)
@@ -2364,3 +2398,9 @@ void security_bpf_prog_free(struct bpf_prog_aux *aux)
        call_void_hook(bpf_prog_free_security, aux);
 }
 #endif /* CONFIG_BPF_SYSCALL */
+
+int security_locked_down(enum lockdown_reason what)
+{
+       return call_int_hook(locked_down, 0, what);
+}
+EXPORT_SYMBOL(security_locked_down);
index 3a29e7c..a5813c7 100644 (file)
@@ -1946,7 +1946,14 @@ static int convert_context(struct context *oldc, struct context *newc, void *p)
                rc = string_to_context_struct(args->newp, NULL, s,
                                              newc, SECSID_NULL);
                if (rc == -EINVAL) {
-                       /* Retain string representation for later mapping. */
+                       /*
+                        * Retain string representation for later mapping.
+                        *
+                        * IMPORTANT: We need to copy the contents of oldc->str
+                        * back into s again because string_to_context_struct()
+                        * may have garbled it.
+                        */
+                       memcpy(s, oldc->str, oldc->len);
                        context_init(newc);
                        newc->str = s;
                        newc->len = oldc->len;
index f1c93a7..38ac3da 100644 (file)
@@ -465,7 +465,7 @@ char *smk_parse_smack(const char *string, int len)
        if (i == 0 || i >= SMK_LONGLABEL)
                return ERR_PTR(-EINVAL);
 
-       smack = kzalloc(i + 1, GFP_KERNEL);
+       smack = kzalloc(i + 1, GFP_NOFS);
        if (smack == NULL)
                return ERR_PTR(-ENOMEM);
 
@@ -500,7 +500,7 @@ int smk_netlbl_mls(int level, char *catset, struct netlbl_lsm_secattr *sap,
                        if ((m & *cp) == 0)
                                continue;
                        rc = netlbl_catmap_setbit(&sap->attr.mls.cat,
-                                                 cat, GFP_KERNEL);
+                                                 cat, GFP_NOFS);
                        if (rc < 0) {
                                netlbl_catmap_free(sap->attr.mls.cat);
                                return rc;
@@ -536,7 +536,7 @@ struct smack_known *smk_import_entry(const char *string, int len)
        if (skp != NULL)
                goto freeout;
 
-       skp = kzalloc(sizeof(*skp), GFP_KERNEL);
+       skp = kzalloc(sizeof(*skp), GFP_NOFS);
        if (skp == NULL) {
                skp = ERR_PTR(-ENOMEM);
                goto freeout;
index 4c5e5a4..abeb09c 100644 (file)
@@ -288,7 +288,7 @@ static struct smack_known *smk_fetch(const char *name, struct inode *ip,
        if (!(ip->i_opflags & IOP_XATTR))
                return ERR_PTR(-EOPNOTSUPP);
 
-       buffer = kzalloc(SMK_LONGLABEL, GFP_KERNEL);
+       buffer = kzalloc(SMK_LONGLABEL, GFP_NOFS);
        if (buffer == NULL)
                return ERR_PTR(-ENOMEM);
 
@@ -307,7 +307,7 @@ static struct smack_known *smk_fetch(const char *name, struct inode *ip,
 
 /**
  * init_inode_smack - initialize an inode security blob
- * @isp: the blob to initialize
+ * @inode: inode to extract the info from
  * @skp: a pointer to the Smack label entry to use in the blob
  *
  */
@@ -509,7 +509,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp)
 
 /**
  * smack_syslog - Smack approval on syslog
- * @type: message type
+ * @typefrom_file: unused
  *
  * Returns 0 on success, error code otherwise.
  */
@@ -765,7 +765,7 @@ static int smack_sb_eat_lsm_opts(char *options, void **mnt_opts)
 /**
  * smack_set_mnt_opts - set Smack specific mount options
  * @sb: the file system superblock
- * @opts: Smack mount options
+ * @mnt_opts: Smack mount options
  * @kern_flags: mount option from kernel space or user space
  * @set_kern_flags: where to store converted mount opts
  *
@@ -937,7 +937,8 @@ static int smack_bprm_set_creds(struct linux_binprm *bprm)
 
                if (rc != 0)
                        return rc;
-       } else if (bprm->unsafe)
+       }
+       if (bprm->unsafe & ~LSM_UNSAFE_PTRACE)
                return -EPERM;
 
        bsp->smk_task = isp->smk_task;
@@ -958,7 +959,7 @@ static int smack_bprm_set_creds(struct linux_binprm *bprm)
  * smack_inode_alloc_security - allocate an inode blob
  * @inode: the inode in need of a blob
  *
- * Returns 0 if it gets a blob, -ENOMEM otherwise
+ * Returns 0
  */
 static int smack_inode_alloc_security(struct inode *inode)
 {
@@ -1164,7 +1165,7 @@ static int smack_inode_rename(struct inode *old_inode,
  *
  * This is the important Smack hook.
  *
- * Returns 0 if access is permitted, -EACCES otherwise
+ * Returns 0 if access is permitted, an error code otherwise
  */
 static int smack_inode_permission(struct inode *inode, int mask)
 {
@@ -1222,8 +1223,7 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 
 /**
  * smack_inode_getattr - Smack check for getting attributes
- * @mnt: vfsmount of the object
- * @dentry: the object
+ * @path: path to extract the info from
  *
  * Returns 0 if access is permitted, an error code otherwise
  */
@@ -1870,14 +1870,13 @@ static int smack_file_receive(struct file *file)
 /**
  * smack_file_open - Smack dentry open processing
  * @file: the object
- * @cred: task credential
  *
  * Set the security blob in the file structure.
  * Allow the open only if the task has read access. There are
  * many read operations (e.g. fstat) that you can do with an
  * fd even if you have the file open write-only.
  *
- * Returns 0
+ * Returns 0 if current has access, error code otherwise
  */
 static int smack_file_open(struct file *file)
 {
@@ -1900,7 +1899,7 @@ static int smack_file_open(struct file *file)
 
 /**
  * smack_cred_alloc_blank - "allocate" blank task-level security credentials
- * @new: the new credentials
+ * @cred: the new credentials
  * @gfp: the atomicity of any memory allocations
  *
  * Prepare a blank set of credentials for modification.  This must allocate all
@@ -1983,7 +1982,7 @@ static void smack_cred_transfer(struct cred *new, const struct cred *old)
 
 /**
  * smack_cred_getsecid - get the secid corresponding to a creds structure
- * @c: the object creds
+ * @cred: the object creds
  * @secid: where to put the result
  *
  * Sets the secid to contain a u32 version of the smack label.
@@ -2140,8 +2139,6 @@ static int smack_task_getioprio(struct task_struct *p)
 /**
  * smack_task_setscheduler - Smack check on setting scheduler
  * @p: the task object
- * @policy: unused
- * @lp: unused
  *
  * Return 0 if read access is permitted
  */
@@ -2611,8 +2608,9 @@ static void smk_ipv6_port_label(struct socket *sock, struct sockaddr *address)
 
 /**
  * smk_ipv6_port_check - check Smack port access
- * @sock: socket
+ * @sk: socket
  * @address: address
+ * @act: the action being taken
  *
  * Create or update the port list entry
  */
@@ -2782,7 +2780,7 @@ static int smack_socket_post_create(struct socket *sock, int family,
  *
  * Cross reference the peer labels for SO_PEERSEC
  *
- * Returns 0 on success, and error code otherwise
+ * Returns 0
  */
 static int smack_socket_socketpair(struct socket *socka,
                                   struct socket *sockb)
@@ -3014,13 +3012,13 @@ static int smack_shm_shmctl(struct kern_ipc_perm *isp, int cmd)
  *
  * Returns 0 if current has the requested access, error code otherwise
  */
-static int smack_shm_shmat(struct kern_ipc_perm *ipc, char __user *shmaddr,
+static int smack_shm_shmat(struct kern_ipc_perm *isp, char __user *shmaddr,
                           int shmflg)
 {
        int may;
 
        may = smack_flags_to_may(shmflg);
-       return smk_curacc_shm(ipc, may);
+       return smk_curacc_shm(isp, may);
 }
 
 /**
@@ -3925,6 +3923,8 @@ access_check:
                        skp = smack_ipv6host_label(&sadd);
                if (skp == NULL)
                        skp = smack_net_ambient;
+               if (skb == NULL)
+                       break;
 #ifdef CONFIG_AUDIT
                smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net);
                ad.a.u.net->family = family;
@@ -4762,7 +4762,7 @@ static __init void init_smack_known_list(void)
 /**
  * smack_init - initialize the smack system
  *
- * Returns 0
+ * Returns 0 on success, -ENOMEM is there's no memory
  */
 static __init int smack_init(void)
 {
index 5c9fbf3..6b724d2 100644 (file)
@@ -226,7 +226,8 @@ static int snd_timer_check_master(struct snd_timer_instance *master)
        return 0;
 }
 
-static int snd_timer_close_locked(struct snd_timer_instance *timeri);
+static int snd_timer_close_locked(struct snd_timer_instance *timeri,
+                                 struct device **card_devp_to_put);
 
 /*
  * open a timer instance
@@ -238,6 +239,7 @@ int snd_timer_open(struct snd_timer_instance **ti,
 {
        struct snd_timer *timer;
        struct snd_timer_instance *timeri = NULL;
+       struct device *card_dev_to_put = NULL;
        int err;
 
        mutex_lock(&register_mutex);
@@ -261,7 +263,7 @@ int snd_timer_open(struct snd_timer_instance **ti,
                list_add_tail(&timeri->open_list, &snd_timer_slave_list);
                err = snd_timer_check_slave(timeri);
                if (err < 0) {
-                       snd_timer_close_locked(timeri);
+                       snd_timer_close_locked(timeri, &card_dev_to_put);
                        timeri = NULL;
                }
                goto unlock;
@@ -313,7 +315,7 @@ int snd_timer_open(struct snd_timer_instance **ti,
                        timeri = NULL;
 
                        if (timer->card)
-                               put_device(&timer->card->card_dev);
+                               card_dev_to_put = &timer->card->card_dev;
                        module_put(timer->module);
                        goto unlock;
                }
@@ -323,12 +325,15 @@ int snd_timer_open(struct snd_timer_instance **ti,
        timer->num_instances++;
        err = snd_timer_check_master(timeri);
        if (err < 0) {
-               snd_timer_close_locked(timeri);
+               snd_timer_close_locked(timeri, &card_dev_to_put);
                timeri = NULL;
        }
 
  unlock:
        mutex_unlock(&register_mutex);
+       /* put_device() is called after unlock for avoiding deadlock */
+       if (card_dev_to_put)
+               put_device(card_dev_to_put);
        *ti = timeri;
        return err;
 }
@@ -338,7 +343,8 @@ EXPORT_SYMBOL(snd_timer_open);
  * close a timer instance
  * call this with register_mutex down.
  */
-static int snd_timer_close_locked(struct snd_timer_instance *timeri)
+static int snd_timer_close_locked(struct snd_timer_instance *timeri,
+                                 struct device **card_devp_to_put)
 {
        struct snd_timer *timer = timeri->timer;
        struct snd_timer_instance *slave, *tmp;
@@ -395,7 +401,7 @@ static int snd_timer_close_locked(struct snd_timer_instance *timeri)
                        timer->hw.close(timer);
                /* release a card refcount for safe disconnection */
                if (timer->card)
-                       put_device(&timer->card->card_dev);
+                       *card_devp_to_put = &timer->card->card_dev;
                module_put(timer->module);
        }
 
@@ -407,14 +413,18 @@ static int snd_timer_close_locked(struct snd_timer_instance *timeri)
  */
 int snd_timer_close(struct snd_timer_instance *timeri)
 {
+       struct device *card_dev_to_put = NULL;
        int err;
 
        if (snd_BUG_ON(!timeri))
                return -ENXIO;
 
        mutex_lock(&register_mutex);
-       err = snd_timer_close_locked(timeri);
+       err = snd_timer_close_locked(timeri, &card_dev_to_put);
        mutex_unlock(&register_mutex);
+       /* put_device() is called after unlock for avoiding deadlock */
+       if (card_dev_to_put)
+               put_device(card_dev_to_put);
        return err;
 }
 EXPORT_SYMBOL(snd_timer_close);
index 73fee99..6c1497d 100644 (file)
@@ -252,8 +252,7 @@ end:
        return err;
 }
 
-static unsigned int
-map_data_channels(struct snd_bebob *bebob, struct amdtp_stream *s)
+static int map_data_channels(struct snd_bebob *bebob, struct amdtp_stream *s)
 {
        unsigned int sec, sections, ch, channels;
        unsigned int pcm, midi, location;
index 218292b..f5b3252 100644 (file)
@@ -15,7 +15,7 @@ alesis_io14_tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT] = {
 
 static const unsigned int
 alesis_io26_tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT] = {
-       {10, 10, 8},    /* Tx0 = Analog + S/PDIF. */
+       {10, 10, 4},    /* Tx0 = Analog + S/PDIF. */
        {16, 8, 0},     /* Tx1 = ADAT1 + ADAT2. */
 };
 
index 211ca85..cfab60d 100644 (file)
@@ -270,6 +270,11 @@ int snd_hdac_ext_bus_link_get(struct hdac_bus *bus,
 
                ret = snd_hdac_ext_bus_link_power_up(link);
 
+               /*
+                * clear the register to invalidate all the output streams
+                */
+               snd_hdac_updatew(link->ml_addr, AZX_REG_ML_LOSIDV,
+                                ML_LOSIDV_STREAM_MASK, 0);
                /*
                 *  wait for 521usec for codec to report status
                 *  HDA spec section 4.3 - Codec Discovery
index d3999e7..7e7be8e 100644 (file)
@@ -447,8 +447,6 @@ static void azx_int_disable(struct hdac_bus *bus)
        list_for_each_entry(azx_dev, &bus->stream_list, list)
                snd_hdac_stream_updateb(azx_dev, SD_CTL, SD_INT_MASK, 0);
 
-       synchronize_irq(bus->irq);
-
        /* disable SIE for all streams */
        snd_hdac_chip_writeb(bus, INTCTL, 0);
 
index 91e71be..cf53fbd 100644 (file)
@@ -1348,9 +1348,9 @@ static int azx_free(struct azx *chip)
        }
 
        if (bus->chip_init) {
-               azx_stop_chip(chip);
                azx_clear_irq_pending(chip);
                azx_stop_all_streams(chip);
+               azx_stop_chip(chip);
        }
 
        if (bus->irq >= 0)
@@ -2399,6 +2399,12 @@ static const struct pci_device_id azx_ids[] = {
        /* Icelake */
        { PCI_DEVICE(0x8086, 0x34c8),
          .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+       /* Jasperlake */
+       { PCI_DEVICE(0x8086, 0x38c8),
+         .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+       /* Tigerlake */
+       { PCI_DEVICE(0x8086, 0xa0c8),
+         .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
        /* Elkhart Lake */
        { PCI_DEVICE(0x8086, 0x4b55),
          .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
@@ -2485,8 +2491,7 @@ static const struct pci_device_id azx_ids[] = {
                         AZX_DCAPS_PM_RUNTIME },
        /* AMD Raven */
        { PCI_DEVICE(0x1022, 0x15e3),
-         .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB |
-                        AZX_DCAPS_PM_RUNTIME },
+         .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_AMD_SB },
        /* ATI HDMI */
        { PCI_DEVICE(0x1002, 0x0002),
          .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
index e283966..bc9dd8e 100644 (file)
@@ -357,6 +357,7 @@ static const struct hda_fixup ad1986a_fixups[] = {
 
 static const struct snd_pci_quirk ad1986a_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x30af, "HP B2800", AD1986A_FIXUP_LAPTOP_IMIC),
+       SND_PCI_QUIRK(0x1043, 0x1153, "ASUS M9V", AD1986A_FIXUP_LAPTOP_IMIC),
        SND_PCI_QUIRK(0x1043, 0x1443, "ASUS Z99He", AD1986A_FIXUP_EAPD),
        SND_PCI_QUIRK(0x1043, 0x1447, "ASUS A8JN", AD1986A_FIXUP_EAPD),
        SND_PCI_QUIRK_MASK(0x1043, 0xff00, 0x8100, "ASUS P5", AD1986A_FIXUP_3STACK),
index bca5de7..b725537 100644 (file)
@@ -145,6 +145,7 @@ struct hdmi_spec {
        struct snd_array pins; /* struct hdmi_spec_per_pin */
        struct hdmi_pcm pcm_rec[16];
        struct mutex pcm_lock;
+       struct mutex bind_lock; /* for audio component binding */
        /* pcm_bitmap means which pcms have been assigned to pins*/
        unsigned long pcm_bitmap;
        int pcm_used;   /* counter of pcm_rec[] */
@@ -2258,7 +2259,7 @@ static int generic_hdmi_init(struct hda_codec *codec)
        struct hdmi_spec *spec = codec->spec;
        int pin_idx;
 
-       mutex_lock(&spec->pcm_lock);
+       mutex_lock(&spec->bind_lock);
        spec->use_jack_detect = !codec->jackpoll_interval;
        for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) {
                struct hdmi_spec_per_pin *per_pin = get_pin(spec, pin_idx);
@@ -2275,7 +2276,7 @@ static int generic_hdmi_init(struct hda_codec *codec)
                        snd_hda_jack_detect_enable_callback(codec, pin_nid,
                                                            jack_callback);
        }
-       mutex_unlock(&spec->pcm_lock);
+       mutex_unlock(&spec->bind_lock);
        return 0;
 }
 
@@ -2382,6 +2383,7 @@ static int alloc_generic_hdmi(struct hda_codec *codec)
        spec->ops = generic_standard_hdmi_ops;
        spec->dev_num = 1;      /* initialize to 1 */
        mutex_init(&spec->pcm_lock);
+       mutex_init(&spec->bind_lock);
        snd_hdac_register_chmap_ops(&codec->core, &spec->chmap);
 
        spec->chmap.ops.get_chmap = hdmi_get_chmap;
@@ -2451,7 +2453,7 @@ static void generic_acomp_notifier_set(struct drm_audio_component *acomp,
        int i;
 
        spec = container_of(acomp->audio_ops, struct hdmi_spec, drm_audio_ops);
-       mutex_lock(&spec->pcm_lock);
+       mutex_lock(&spec->bind_lock);
        spec->use_acomp_notifier = use_acomp;
        spec->codec->relaxed_resume = use_acomp;
        /* reprogram each jack detection logic depending on the notifier */
@@ -2461,7 +2463,7 @@ static void generic_acomp_notifier_set(struct drm_audio_component *acomp,
                                              get_pin(spec, i)->pin_nid,
                                              use_acomp);
        }
-       mutex_unlock(&spec->pcm_lock);
+       mutex_unlock(&spec->bind_lock);
 }
 
 /* enable / disable the notifier via master bind / unbind */
@@ -3474,6 +3476,8 @@ static int patch_nvhdmi(struct hda_codec *codec)
                nvhdmi_chmap_cea_alloc_validate_get_type;
        spec->chmap.ops.chmap_validate = nvhdmi_chmap_validate;
 
+       codec->link_down_at_suspend = 1;
+
        generic_acomp_init(codec, &nvhdmi_audio_ops, nvhdmi_port2pin);
 
        return 0;
index da16954..80f66ba 100644 (file)
@@ -393,6 +393,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
        case 0x10ec0700:
        case 0x10ec0701:
        case 0x10ec0703:
+       case 0x10ec0711:
                alc_update_coef_idx(codec, 0x10, 1<<15, 0);
                break;
        case 0x10ec0662:
@@ -408,6 +409,9 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
        case 0x10ec0672:
                alc_update_coef_idx(codec, 0xd, 0, 1<<14); /* EAPD Ctrl */
                break;
+       case 0x10ec0623:
+               alc_update_coef_idx(codec, 0x19, 1<<13, 0);
+               break;
        case 0x10ec0668:
                alc_update_coef_idx(codec, 0x7, 3<<13, 0);
                break;
@@ -2919,6 +2923,7 @@ enum {
        ALC269_TYPE_ALC225,
        ALC269_TYPE_ALC294,
        ALC269_TYPE_ALC300,
+       ALC269_TYPE_ALC623,
        ALC269_TYPE_ALC700,
 };
 
@@ -2954,6 +2959,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
        case ALC269_TYPE_ALC225:
        case ALC269_TYPE_ALC294:
        case ALC269_TYPE_ALC300:
+       case ALC269_TYPE_ALC623:
        case ALC269_TYPE_ALC700:
                ssids = alc269_ssids;
                break;
@@ -5358,6 +5364,17 @@ static void alc271_hp_gate_mic_jack(struct hda_codec *codec,
        }
 }
 
+static void alc256_fixup_dell_xps_13_headphone_noise2(struct hda_codec *codec,
+                                                     const struct hda_fixup *fix,
+                                                     int action)
+{
+       if (action != HDA_FIXUP_ACT_PRE_PROBE)
+               return;
+
+       snd_hda_codec_amp_stereo(codec, 0x1a, HDA_INPUT, 0, HDA_AMP_VOLMASK, 1);
+       snd_hda_override_wcaps(codec, 0x1a, get_wcaps(codec, 0x1a) & ~AC_WCAP_IN_AMP);
+}
+
 static void alc269_fixup_limit_int_mic_boost(struct hda_codec *codec,
                                             const struct hda_fixup *fix,
                                             int action)
@@ -5817,10 +5834,12 @@ enum {
        ALC292_FIXUP_DELL_E7X,
        ALC292_FIXUP_DISABLE_AAMIX,
        ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK,
+       ALC298_FIXUP_ALIENWARE_MIC_NO_PRESENCE,
        ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
        ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
        ALC275_FIXUP_DELL_XPS,
        ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
+       ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2,
        ALC293_FIXUP_LENOVO_SPK_NOISE,
        ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
        ALC255_FIXUP_DELL_SPK_NOISE,
@@ -5868,9 +5887,11 @@ enum {
        ALC225_FIXUP_WYSE_AUTO_MUTE,
        ALC225_FIXUP_WYSE_DISABLE_MIC_VREF,
        ALC286_FIXUP_ACER_AIO_HEADSET_MIC,
+       ALC256_FIXUP_ASUS_HEADSET_MIC,
        ALC256_FIXUP_ASUS_MIC_NO_PRESENCE,
        ALC299_FIXUP_PREDATOR_SPK,
        ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC,
+       ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6506,6 +6527,15 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC292_FIXUP_DISABLE_AAMIX
        },
+       [ALC298_FIXUP_ALIENWARE_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x18, 0x01a1913c }, /* headset mic w/o jack detect */
+                       { }
+               },
+               .chained_before = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE,
+       },
        [ALC298_FIXUP_DELL1_MIC_NO_PRESENCE] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
@@ -6547,6 +6577,12 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
        },
+       [ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc256_fixup_dell_xps_13_headphone_noise2,
+               .chained = true,
+               .chain_id = ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE
+       },
        [ALC293_FIXUP_LENOVO_SPK_NOISE] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc_fixup_disable_aamix,
@@ -6901,6 +6937,15 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE
        },
+       [ALC256_FIXUP_ASUS_HEADSET_MIC] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x03a11020 }, /* headset mic with jack detect */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE
+       },
        [ALC256_FIXUP_ASUS_MIC_NO_PRESENCE] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
@@ -6927,6 +6972,16 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
        },
+       [ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x04a11040 },
+                       { 0x21, 0x04211020 },
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6980,17 +7035,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
        SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
        SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
-       SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+       SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2),
        SND_PCI_QUIRK(0x1028, 0x0706, "Dell Inspiron 7559", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
        SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
        SND_PCI_QUIRK(0x1028, 0x0738, "Dell Precision 5820", ALC269_FIXUP_NO_SHUTUP),
-       SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+       SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2),
        SND_PCI_QUIRK(0x1028, 0x075c, "Dell XPS 27 7760", ALC298_FIXUP_SPK_VOLUME),
        SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
        SND_PCI_QUIRK(0x1028, 0x07b0, "Dell Precision 7520", ALC295_FIXUP_DISABLE_DAC3),
        SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
        SND_PCI_QUIRK(0x1028, 0x080c, "Dell WYSE", ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE),
-       SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+       SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2),
        SND_PCI_QUIRK(0x1028, 0x084b, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
        SND_PCI_QUIRK(0x1028, 0x084e, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
        SND_PCI_QUIRK(0x1028, 0x0871, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC),
@@ -7087,6 +7142,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
        SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
        SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
        SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
        SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),
        SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
@@ -7165,6 +7221,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
        SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
        SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
        SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
        SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
@@ -7190,6 +7248,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
        SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS),
        SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */
+       SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE),
 
 #if 0
        /* Below is a quirk table taken from the old code.
@@ -7358,6 +7417,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
        {.id = ALC295_FIXUP_CHROME_BOOK, .name = "alc-chrome-book"},
        {.id = ALC299_FIXUP_PREDATOR_SPK, .name = "predator-spk"},
        {.id = ALC298_FIXUP_HUAWEI_MBX_STEREO, .name = "huawei-mbx-stereo"},
+       {.id = ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, .name = "alc256-medion-headset"},
        {}
 };
 #define ALC225_STANDARD_PINS \
@@ -7770,6 +7830,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x17, 0x90170110},
                {0x1a, 0x03011020},
                {0x21, 0x03211030}),
+       SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_ALIENWARE_MIC_NO_PRESENCE,
+               {0x12, 0xb7a60140},
+               {0x17, 0x90170110},
+               {0x1a, 0x03a11030},
+               {0x21, 0x03211020}),
        SND_HDA_PIN_QUIRK(0x10ec0299, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
                ALC225_STANDARD_PINS,
                {0x12, 0xb7a60130},
@@ -7959,9 +8024,13 @@ static int patch_alc269(struct hda_codec *codec)
                spec->codec_variant = ALC269_TYPE_ALC300;
                spec->gen.mixer_nid = 0; /* no loopback on ALC300 */
                break;
+       case 0x10ec0623:
+               spec->codec_variant = ALC269_TYPE_ALC623;
+               break;
        case 0x10ec0700:
        case 0x10ec0701:
        case 0x10ec0703:
+       case 0x10ec0711:
                spec->codec_variant = ALC269_TYPE_ALC700;
                spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */
                alc_update_coef_idx(codec, 0x4a, 1 << 15, 0); /* Combo jack auto trigger control */
@@ -9159,6 +9228,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
        HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269),
        HDA_CODEC_ENTRY(0x10ec0299, "ALC299", patch_alc269),
        HDA_CODEC_ENTRY(0x10ec0300, "ALC300", patch_alc269),
+       HDA_CODEC_ENTRY(0x10ec0623, "ALC623", patch_alc269),
        HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861),
        HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd),
        HDA_CODEC_ENTRY(0x10ec0861, "ALC861", patch_alc861),
@@ -9176,6 +9246,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
        HDA_CODEC_ENTRY(0x10ec0700, "ALC700", patch_alc269),
        HDA_CODEC_ENTRY(0x10ec0701, "ALC701", patch_alc269),
        HDA_CODEC_ENTRY(0x10ec0703, "ALC703", patch_alc269),
+       HDA_CODEC_ENTRY(0x10ec0711, "ALC711", patch_alc269),
        HDA_CODEC_ENTRY(0x10ec0867, "ALC891", patch_alc662),
        HDA_CODEC_ENTRY(0x10ec0880, "ALC880", patch_alc880),
        HDA_CODEC_ENTRY(0x10ec0882, "ALC882", patch_alc882),
index 48e9eef..ca60339 100644 (file)
@@ -116,19 +116,16 @@ static struct atmel_pcm_dma_params ssc_dma_params[NUM_SSC_DEVICES][2] = {
 static struct atmel_ssc_info ssc_info[NUM_SSC_DEVICES] = {
        {
        .name           = "ssc0",
-       .lock           = __SPIN_LOCK_UNLOCKED(ssc_info[0].lock),
        .dir_mask       = SSC_DIR_MASK_UNUSED,
        .initialized    = 0,
        },
        {
        .name           = "ssc1",
-       .lock           = __SPIN_LOCK_UNLOCKED(ssc_info[1].lock),
        .dir_mask       = SSC_DIR_MASK_UNUSED,
        .initialized    = 0,
        },
        {
        .name           = "ssc2",
-       .lock           = __SPIN_LOCK_UNLOCKED(ssc_info[2].lock),
        .dir_mask       = SSC_DIR_MASK_UNUSED,
        .initialized    = 0,
        },
@@ -317,13 +314,10 @@ static int atmel_ssc_startup(struct snd_pcm_substream *substream,
 
        snd_soc_dai_set_dma_data(dai, substream, dma_params);
 
-       spin_lock_irq(&ssc_p->lock);
-       if (ssc_p->dir_mask & dir_mask) {
-               spin_unlock_irq(&ssc_p->lock);
+       if (ssc_p->dir_mask & dir_mask)
                return -EBUSY;
-       }
+
        ssc_p->dir_mask |= dir_mask;
-       spin_unlock_irq(&ssc_p->lock);
 
        return 0;
 }
@@ -355,7 +349,6 @@ static void atmel_ssc_shutdown(struct snd_pcm_substream *substream,
 
        dir_mask = 1 << dir;
 
-       spin_lock_irq(&ssc_p->lock);
        ssc_p->dir_mask &= ~dir_mask;
        if (!ssc_p->dir_mask) {
                if (ssc_p->initialized) {
@@ -369,7 +362,6 @@ static void atmel_ssc_shutdown(struct snd_pcm_substream *substream,
                ssc_p->cmr_div = ssc_p->tcmr_period = ssc_p->rcmr_period = 0;
                ssc_p->forced_divider = 0;
        }
-       spin_unlock_irq(&ssc_p->lock);
 
        /* Shutdown the SSC clock. */
        pr_debug("atmel_ssc_dai: Stopping clock\n");
index ae764cb..3470b96 100644 (file)
@@ -93,7 +93,6 @@ struct atmel_ssc_state {
 struct atmel_ssc_info {
        char *name;
        struct ssc_device *ssc;
-       spinlock_t lock;        /* lock for dir_mask */
        unsigned short dir_mask;        /* 0=unused, 1=playback, 2=capture */
        unsigned short initialized;     /* true if SSC has been initialized */
        unsigned short daifmt;
index e609abc..eb709d5 100644 (file)
@@ -901,16 +901,20 @@ static void max98373_slot_config(struct i2c_client *i2c,
                max98373->i_slot = value & 0xF;
        else
                max98373->i_slot = 1;
-
-       max98373->reset_gpio = of_get_named_gpio(dev->of_node,
+       if (dev->of_node) {
+               max98373->reset_gpio = of_get_named_gpio(dev->of_node,
                                                "maxim,reset-gpio", 0);
-       if (!gpio_is_valid(max98373->reset_gpio)) {
-               dev_err(dev, "Looking up %s property in node %s failed %d\n",
-                       "maxim,reset-gpio", dev->of_node->full_name,
-                       max98373->reset_gpio);
+               if (!gpio_is_valid(max98373->reset_gpio)) {
+                       dev_err(dev, "Looking up %s property in node %s failed %d\n",
+                               "maxim,reset-gpio", dev->of_node->full_name,
+                               max98373->reset_gpio);
+               } else {
+                       dev_dbg(dev, "maxim,reset-gpio=%d",
+                               max98373->reset_gpio);
+               }
        } else {
-               dev_dbg(dev, "maxim,reset-gpio=%d",
-                       max98373->reset_gpio);
+               /* this makes reset_gpio as invalid */
+               max98373->reset_gpio = -1;
        }
 
        if (!device_property_read_u32(dev, "maxim,spkfb-slot-no", &value))
index 9fa5d44..58b2468 100644 (file)
@@ -243,6 +243,10 @@ static const char *const rx_mix1_text[] = {
        "ZERO", "IIR1", "IIR2", "RX1", "RX2", "RX3"
 };
 
+static const char * const rx_mix2_text[] = {
+       "ZERO", "IIR1", "IIR2"
+};
+
 static const char *const dec_mux_text[] = {
        "ZERO", "ADC1", "ADC2", "ADC3", "DMIC1", "DMIC2"
 };
@@ -270,6 +274,16 @@ static const struct soc_enum rx3_mix1_inp_enum[] = {
        SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX3_B2_CTL, 0, 6, rx_mix1_text),
 };
 
+/* RX1 MIX2 */
+static const struct soc_enum rx_mix2_inp1_chain_enum =
+       SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX1_B3_CTL,
+               0, 3, rx_mix2_text);
+
+/* RX2 MIX2 */
+static const struct soc_enum rx2_mix2_inp1_chain_enum =
+       SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX2_B3_CTL,
+               0, 3, rx_mix2_text);
+
 /* DEC */
 static const struct soc_enum dec1_mux_enum = SOC_ENUM_SINGLE(
                                LPASS_CDC_CONN_TX_B1_CTL, 0, 6, dec_mux_text);
@@ -309,6 +323,10 @@ static const struct snd_kcontrol_new rx3_mix1_inp2_mux = SOC_DAPM_ENUM(
                                "RX3 MIX1 INP2 Mux", rx3_mix1_inp_enum[1]);
 static const struct snd_kcontrol_new rx3_mix1_inp3_mux = SOC_DAPM_ENUM(
                                "RX3 MIX1 INP3 Mux", rx3_mix1_inp_enum[2]);
+static const struct snd_kcontrol_new rx1_mix2_inp1_mux = SOC_DAPM_ENUM(
+                               "RX1 MIX2 INP1 Mux", rx_mix2_inp1_chain_enum);
+static const struct snd_kcontrol_new rx2_mix2_inp1_mux = SOC_DAPM_ENUM(
+                               "RX2 MIX2 INP1 Mux", rx2_mix2_inp1_chain_enum);
 
 /* Digital Gain control -38.4 dB to +38.4 dB in 0.3 dB steps */
 static const DECLARE_TLV_DB_SCALE(digital_gain, -3840, 30, 0);
@@ -740,6 +758,10 @@ static const struct snd_soc_dapm_widget msm8916_wcd_digital_dapm_widgets[] = {
                         &rx3_mix1_inp2_mux),
        SND_SOC_DAPM_MUX("RX3 MIX1 INP3", SND_SOC_NOPM, 0, 0,
                         &rx3_mix1_inp3_mux),
+       SND_SOC_DAPM_MUX("RX1 MIX2 INP1", SND_SOC_NOPM, 0, 0,
+                        &rx1_mix2_inp1_mux),
+       SND_SOC_DAPM_MUX("RX2 MIX2 INP1", SND_SOC_NOPM, 0, 0,
+                        &rx2_mix2_inp1_mux),
 
        SND_SOC_DAPM_MUX("CIC1 MUX", SND_SOC_NOPM, 0, 0, &cic1_mux),
        SND_SOC_DAPM_MUX("CIC2 MUX", SND_SOC_NOPM, 0, 0, &cic2_mux),
index 50ed86d..88b7569 100644 (file)
@@ -21,8 +21,7 @@
 
 #define PCM3168A_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | \
                         SNDRV_PCM_FMTBIT_S24_3LE | \
-                        SNDRV_PCM_FMTBIT_S24_LE | \
-                        SNDRV_PCM_FMTBIT_S32_LE)
+                        SNDRV_PCM_FMTBIT_S24_LE)
 
 #define PCM3168A_FMT_I2S               0x0
 #define PCM3168A_FMT_LEFT_J            0x1
index 762595d..c506c93 100644 (file)
@@ -1770,6 +1770,9 @@ static int rt5651_detect_headset(struct snd_soc_component *component)
 
 static bool rt5651_support_button_press(struct rt5651_priv *rt5651)
 {
+       if (!rt5651->hp_jack)
+               return false;
+
        /* Button press support only works with internal jack-detection */
        return (rt5651->hp_jack->status & SND_JACK_MICROPHONE) &&
                rt5651->gpiod_hp_det == NULL;
index 1ef4707..c50b75c 100644 (file)
@@ -995,6 +995,16 @@ static int rt5682_set_jack_detect(struct snd_soc_component *component,
 {
        struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
 
+       rt5682->hs_jack = hs_jack;
+
+       if (!hs_jack) {
+               regmap_update_bits(rt5682->regmap, RT5682_IRQ_CTRL_2,
+                                  RT5682_JD1_EN_MASK, RT5682_JD1_DIS);
+               regmap_update_bits(rt5682->regmap, RT5682_RC_CLK_CTRL,
+                                  RT5682_POW_JDH | RT5682_POW_JDL, 0);
+               return 0;
+       }
+
        switch (rt5682->pdata.jd_src) {
        case RT5682_JD1:
                snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_2,
@@ -1032,8 +1042,6 @@ static int rt5682_set_jack_detect(struct snd_soc_component *component,
                break;
        }
 
-       rt5682->hs_jack = hs_jack;
-
        return 0;
 }
 
index c3d06e8..d5fb7f5 100644 (file)
@@ -533,13 +533,10 @@ static SOC_ENUM_SINGLE_DECL(dac_osr,
 static SOC_ENUM_SINGLE_DECL(adc_osr,
                            WM8994_OVERSAMPLING, 1, osr_text);
 
-static const struct snd_kcontrol_new wm8994_snd_controls[] = {
+static const struct snd_kcontrol_new wm8994_common_snd_controls[] = {
 SOC_DOUBLE_R_TLV("AIF1ADC1 Volume", WM8994_AIF1_ADC1_LEFT_VOLUME,
                 WM8994_AIF1_ADC1_RIGHT_VOLUME,
                 1, 119, 0, digital_tlv),
-SOC_DOUBLE_R_TLV("AIF1ADC2 Volume", WM8994_AIF1_ADC2_LEFT_VOLUME,
-                WM8994_AIF1_ADC2_RIGHT_VOLUME,
-                1, 119, 0, digital_tlv),
 SOC_DOUBLE_R_TLV("AIF2ADC Volume", WM8994_AIF2_ADC_LEFT_VOLUME,
                 WM8994_AIF2_ADC_RIGHT_VOLUME,
                 1, 119, 0, digital_tlv),
@@ -556,8 +553,6 @@ SOC_ENUM("AIF2DACR Source", aif2dacr_src),
 
 SOC_DOUBLE_R_TLV("AIF1DAC1 Volume", WM8994_AIF1_DAC1_LEFT_VOLUME,
                 WM8994_AIF1_DAC1_RIGHT_VOLUME, 1, 96, 0, digital_tlv),
-SOC_DOUBLE_R_TLV("AIF1DAC2 Volume", WM8994_AIF1_DAC2_LEFT_VOLUME,
-                WM8994_AIF1_DAC2_RIGHT_VOLUME, 1, 96, 0, digital_tlv),
 SOC_DOUBLE_R_TLV("AIF2DAC Volume", WM8994_AIF2_DAC_LEFT_VOLUME,
                 WM8994_AIF2_DAC_RIGHT_VOLUME, 1, 96, 0, digital_tlv),
 
@@ -565,17 +560,12 @@ SOC_SINGLE_TLV("AIF1 Boost Volume", WM8994_AIF1_CONTROL_2, 10, 3, 0, aif_tlv),
 SOC_SINGLE_TLV("AIF2 Boost Volume", WM8994_AIF2_CONTROL_2, 10, 3, 0, aif_tlv),
 
 SOC_SINGLE("AIF1DAC1 EQ Switch", WM8994_AIF1_DAC1_EQ_GAINS_1, 0, 1, 0),
-SOC_SINGLE("AIF1DAC2 EQ Switch", WM8994_AIF1_DAC2_EQ_GAINS_1, 0, 1, 0),
 SOC_SINGLE("AIF2 EQ Switch", WM8994_AIF2_EQ_GAINS_1, 0, 1, 0),
 
 WM8994_DRC_SWITCH("AIF1DAC1 DRC Switch", WM8994_AIF1_DRC1_1, 2),
 WM8994_DRC_SWITCH("AIF1ADC1L DRC Switch", WM8994_AIF1_DRC1_1, 1),
 WM8994_DRC_SWITCH("AIF1ADC1R DRC Switch", WM8994_AIF1_DRC1_1, 0),
 
-WM8994_DRC_SWITCH("AIF1DAC2 DRC Switch", WM8994_AIF1_DRC2_1, 2),
-WM8994_DRC_SWITCH("AIF1ADC2L DRC Switch", WM8994_AIF1_DRC2_1, 1),
-WM8994_DRC_SWITCH("AIF1ADC2R DRC Switch", WM8994_AIF1_DRC2_1, 0),
-
 WM8994_DRC_SWITCH("AIF2DAC DRC Switch", WM8994_AIF2_DRC_1, 2),
 WM8994_DRC_SWITCH("AIF2ADCL DRC Switch", WM8994_AIF2_DRC_1, 1),
 WM8994_DRC_SWITCH("AIF2ADCR DRC Switch", WM8994_AIF2_DRC_1, 0),
@@ -594,9 +584,6 @@ SOC_SINGLE("Sidetone HPF Switch", WM8994_SIDETONE, 6, 1, 0),
 SOC_ENUM("AIF1ADC1 HPF Mode", aif1adc1_hpf),
 SOC_DOUBLE("AIF1ADC1 HPF Switch", WM8994_AIF1_ADC1_FILTERS, 12, 11, 1, 0),
 
-SOC_ENUM("AIF1ADC2 HPF Mode", aif1adc2_hpf),
-SOC_DOUBLE("AIF1ADC2 HPF Switch", WM8994_AIF1_ADC2_FILTERS, 12, 11, 1, 0),
-
 SOC_ENUM("AIF2ADC HPF Mode", aif2adc_hpf),
 SOC_DOUBLE("AIF2ADC HPF Switch", WM8994_AIF2_ADC_FILTERS, 12, 11, 1, 0),
 
@@ -637,6 +624,24 @@ SOC_SINGLE("AIF2DAC 3D Stereo Switch", WM8994_AIF2_DAC_FILTERS_2,
           8, 1, 0),
 };
 
+/* Controls not available on WM1811 */
+static const struct snd_kcontrol_new wm8994_snd_controls[] = {
+SOC_DOUBLE_R_TLV("AIF1ADC2 Volume", WM8994_AIF1_ADC2_LEFT_VOLUME,
+                WM8994_AIF1_ADC2_RIGHT_VOLUME,
+                1, 119, 0, digital_tlv),
+SOC_DOUBLE_R_TLV("AIF1DAC2 Volume", WM8994_AIF1_DAC2_LEFT_VOLUME,
+                WM8994_AIF1_DAC2_RIGHT_VOLUME, 1, 96, 0, digital_tlv),
+
+SOC_SINGLE("AIF1DAC2 EQ Switch", WM8994_AIF1_DAC2_EQ_GAINS_1, 0, 1, 0),
+
+WM8994_DRC_SWITCH("AIF1DAC2 DRC Switch", WM8994_AIF1_DRC2_1, 2),
+WM8994_DRC_SWITCH("AIF1ADC2L DRC Switch", WM8994_AIF1_DRC2_1, 1),
+WM8994_DRC_SWITCH("AIF1ADC2R DRC Switch", WM8994_AIF1_DRC2_1, 0),
+
+SOC_ENUM("AIF1ADC2 HPF Mode", aif1adc2_hpf),
+SOC_DOUBLE("AIF1ADC2 HPF Switch", WM8994_AIF1_ADC2_FILTERS, 12, 11, 1, 0),
+};
+
 static const struct snd_kcontrol_new wm8994_eq_controls[] = {
 SOC_SINGLE_TLV("AIF1DAC1 EQ1 Volume", WM8994_AIF1_DAC1_EQ_GAINS_1, 11, 31, 0,
               eq_tlv),
@@ -4258,13 +4263,15 @@ static int wm8994_component_probe(struct snd_soc_component *component)
        wm8994_handle_pdata(wm8994);
 
        wm_hubs_add_analogue_controls(component);
-       snd_soc_add_component_controls(component, wm8994_snd_controls,
-                            ARRAY_SIZE(wm8994_snd_controls));
+       snd_soc_add_component_controls(component, wm8994_common_snd_controls,
+                                      ARRAY_SIZE(wm8994_common_snd_controls));
        snd_soc_dapm_new_controls(dapm, wm8994_dapm_widgets,
                                  ARRAY_SIZE(wm8994_dapm_widgets));
 
        switch (control->type) {
        case WM8994:
+               snd_soc_add_component_controls(component, wm8994_snd_controls,
+                                              ARRAY_SIZE(wm8994_snd_controls));
                snd_soc_dapm_new_controls(dapm, wm8994_specific_dapm_widgets,
                                          ARRAY_SIZE(wm8994_specific_dapm_widgets));
                if (control->revision < 4) {
@@ -4284,8 +4291,10 @@ static int wm8994_component_probe(struct snd_soc_component *component)
                }
                break;
        case WM8958:
+               snd_soc_add_component_controls(component, wm8994_snd_controls,
+                                              ARRAY_SIZE(wm8994_snd_controls));
                snd_soc_add_component_controls(component, wm8958_snd_controls,
-                                    ARRAY_SIZE(wm8958_snd_controls));
+                                              ARRAY_SIZE(wm8958_snd_controls));
                snd_soc_dapm_new_controls(dapm, wm8958_dapm_widgets,
                                          ARRAY_SIZE(wm8958_dapm_widgets));
                if (control->revision < 1) {
index ae28d99..9b8bb7b 100644 (file)
@@ -1259,8 +1259,7 @@ static unsigned int wmfw_convert_flags(unsigned int in, unsigned int len)
        }
 
        if (in) {
-               if (in & WMFW_CTL_FLAG_READABLE)
-                       out |= rd;
+               out |= rd;
                if (in & WMFW_CTL_FLAG_WRITEABLE)
                        out |= wr;
                if (in & WMFW_CTL_FLAG_VOLATILE)
@@ -3697,11 +3696,16 @@ static int wm_adsp_buffer_parse_legacy(struct wm_adsp *dsp)
        u32 xmalg, addr, magic;
        int i, ret;
 
+       alg_region = wm_adsp_find_alg_region(dsp, WMFW_ADSP2_XM, dsp->fw_id);
+       if (!alg_region) {
+               adsp_err(dsp, "No algorithm region found\n");
+               return -EINVAL;
+       }
+
        buf = wm_adsp_buffer_alloc(dsp);
        if (!buf)
                return -ENOMEM;
 
-       alg_region = wm_adsp_find_alg_region(dsp, WMFW_ADSP2_XM, dsp->fw_id);
        xmalg = dsp->ops->sys_config_size / sizeof(__be32);
 
        addr = alg_region->base + xmalg + ALG_XM_FIELD(magic);
index ef0b746..b517e4b 100644 (file)
@@ -628,6 +628,16 @@ static int fsl_sai_startup(struct snd_pcm_substream *substream,
                           FSL_SAI_CR3_TRCE_MASK,
                           FSL_SAI_CR3_TRCE);
 
+       /*
+        * EDMA controller needs period size to be a multiple of
+        * tx/rx maxburst
+        */
+       if (sai->soc_data->use_edma)
+               snd_pcm_hw_constraint_step(substream->runtime, 0,
+                                          SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
+                                          tx ? sai->dma_params_tx.maxburst :
+                                          sai->dma_params_rx.maxburst);
+
        ret = snd_pcm_hw_constraint_list(substream->runtime, 0,
                        SNDRV_PCM_HW_PARAM_RATE, &fsl_sai_rate_constraints);
 
@@ -1026,30 +1036,35 @@ static int fsl_sai_remove(struct platform_device *pdev)
 
 static const struct fsl_sai_soc_data fsl_sai_vf610_data = {
        .use_imx_pcm = false,
+       .use_edma = false,
        .fifo_depth = 32,
        .reg_offset = 0,
 };
 
 static const struct fsl_sai_soc_data fsl_sai_imx6sx_data = {
        .use_imx_pcm = true,
+       .use_edma = false,
        .fifo_depth = 32,
        .reg_offset = 0,
 };
 
 static const struct fsl_sai_soc_data fsl_sai_imx7ulp_data = {
        .use_imx_pcm = true,
+       .use_edma = false,
        .fifo_depth = 16,
        .reg_offset = 8,
 };
 
 static const struct fsl_sai_soc_data fsl_sai_imx8mq_data = {
        .use_imx_pcm = true,
+       .use_edma = false,
        .fifo_depth = 128,
        .reg_offset = 8,
 };
 
 static const struct fsl_sai_soc_data fsl_sai_imx8qm_data = {
        .use_imx_pcm = true,
+       .use_edma = true,
        .fifo_depth = 64,
        .reg_offset = 0,
 };
index b12cb57..76b15de 100644 (file)
 
 struct fsl_sai_soc_data {
        bool use_imx_pcm;
+       bool use_edma;
        unsigned int fifo_depth;
        unsigned int reg_offset;
 };
index a437567..4f6e58c 100644 (file)
@@ -308,6 +308,9 @@ static const struct snd_soc_dapm_widget sof_widgets[] = {
        SND_SOC_DAPM_HP("Headphone Jack", NULL),
        SND_SOC_DAPM_MIC("Headset Mic", NULL),
        SND_SOC_DAPM_SPK("Spk", NULL),
+};
+
+static const struct snd_soc_dapm_widget dmic_widgets[] = {
        SND_SOC_DAPM_MIC("SoC DMIC", NULL),
 };
 
@@ -318,10 +321,6 @@ static const struct snd_soc_dapm_route sof_map[] = {
 
        /* other jacks */
        { "IN1P", NULL, "Headset Mic" },
-
-       /* digital mics */
-       {"DMic", NULL, "SoC DMIC"},
-
 };
 
 static const struct snd_soc_dapm_route speaker_map[] = {
@@ -329,6 +328,11 @@ static const struct snd_soc_dapm_route speaker_map[] = {
        { "Spk", NULL, "Speaker" },
 };
 
+static const struct snd_soc_dapm_route dmic_map[] = {
+       /* digital mics */
+       {"DMic", NULL, "SoC DMIC"},
+};
+
 static int speaker_codec_init(struct snd_soc_pcm_runtime *rtd)
 {
        struct snd_soc_card *card = rtd->card;
@@ -342,6 +346,28 @@ static int speaker_codec_init(struct snd_soc_pcm_runtime *rtd)
        return ret;
 }
 
+static int dmic_init(struct snd_soc_pcm_runtime *rtd)
+{
+       struct snd_soc_card *card = rtd->card;
+       int ret;
+
+       ret = snd_soc_dapm_new_controls(&card->dapm, dmic_widgets,
+                                       ARRAY_SIZE(dmic_widgets));
+       if (ret) {
+               dev_err(card->dev, "DMic widget addition failed: %d\n", ret);
+               /* Don't need to add routes if widget addition failed */
+               return ret;
+       }
+
+       ret = snd_soc_dapm_add_routes(&card->dapm, dmic_map,
+                                     ARRAY_SIZE(dmic_map));
+
+       if (ret)
+               dev_err(card->dev, "DMic map addition failed: %d\n", ret);
+
+       return ret;
+}
+
 /* sof audio machine driver for rt5682 codec */
 static struct snd_soc_card sof_audio_card_rt5682 = {
        .name = "sof_rt5682",
@@ -445,6 +471,7 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev,
                links[id].name = "dmic01";
                links[id].cpus = &cpus[id];
                links[id].cpus->dai_name = "DMIC01 Pin";
+               links[id].init = dmic_init;
                if (dmic_be_num > 1) {
                        /* set up 2 BE links at most */
                        links[id + 1].name = "dmic16k";
@@ -576,6 +603,15 @@ static int sof_audio_probe(struct platform_device *pdev)
        /* need to get main clock from pmc */
        if (sof_rt5682_quirk & SOF_RT5682_MCLK_BYTCHT_EN) {
                ctx->mclk = devm_clk_get(&pdev->dev, "pmc_plt_clk_3");
+               if (IS_ERR(ctx->mclk)) {
+                       ret = PTR_ERR(ctx->mclk);
+
+                       dev_err(&pdev->dev,
+                               "Failed to get MCLK from pmc_plt_clk_3: %d\n",
+                               ret);
+                       return ret;
+               }
+
                ret = clk_prepare_enable(ctx->mclk);
                if (ret < 0) {
                        dev_err(&pdev->dev,
@@ -621,8 +657,24 @@ static int sof_audio_probe(struct platform_device *pdev)
                                          &sof_audio_card_rt5682);
 }
 
+static int sof_rt5682_remove(struct platform_device *pdev)
+{
+       struct snd_soc_card *card = platform_get_drvdata(pdev);
+       struct snd_soc_component *component = NULL;
+
+       for_each_card_components(card, component) {
+               if (!strcmp(component->name, rt5682_component[0].name)) {
+                       snd_soc_component_set_jack(component, NULL, NULL);
+                       break;
+               }
+       }
+
+       return 0;
+}
+
 static struct platform_driver sof_audio = {
        .probe = sof_audio_probe,
+       .remove = sof_rt5682_remove,
        .driver = {
                .name = "sof_rt5682",
                .pm = &snd_soc_pm_ops,
index af2d5a6..61c984f 100644 (file)
@@ -677,7 +677,7 @@ static int rockchip_i2s_probe(struct platform_device *pdev)
        ret = rockchip_pcm_platform_register(&pdev->dev);
        if (ret) {
                dev_err(&pdev->dev, "Could not register PCM\n");
-               return ret;
+               goto err_suspend;
        }
 
        return 0;
index c213913..fd8c664 100644 (file)
@@ -5,6 +5,7 @@
 //  Author: Claude <claude@insginal.co.kr>
 
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 
@@ -74,6 +75,17 @@ static struct snd_soc_card arndale_rt5631 = {
        .num_links = ARRAY_SIZE(arndale_rt5631_dai),
 };
 
+static void arndale_put_of_nodes(struct snd_soc_card *card)
+{
+       struct snd_soc_dai_link *dai_link;
+       int i;
+
+       for_each_card_prelinks(card, i, dai_link) {
+               of_node_put(dai_link->cpus->of_node);
+               of_node_put(dai_link->codecs->of_node);
+       }
+}
+
 static int arndale_audio_probe(struct platform_device *pdev)
 {
        int n, ret;
@@ -103,18 +115,31 @@ static int arndale_audio_probe(struct platform_device *pdev)
                if (!arndale_rt5631_dai[0].codecs->of_node) {
                        dev_err(&pdev->dev,
                        "Property 'samsung,audio-codec' missing or invalid\n");
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto err_put_of_nodes;
                }
        }
 
        ret = devm_snd_soc_register_card(card->dev, card);
+       if (ret) {
+               dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", ret);
+               goto err_put_of_nodes;
+       }
+       return 0;
 
-       if (ret)
-               dev_err(&pdev->dev, "snd_soc_register_card() failed:%d\n", ret);
-
+err_put_of_nodes:
+       arndale_put_of_nodes(card);
        return ret;
 }
 
+static int arndale_audio_remove(struct platform_device *pdev)
+{
+       struct snd_soc_card *card = platform_get_drvdata(pdev);
+
+       arndale_put_of_nodes(card);
+       return 0;
+}
+
 static const struct of_device_id samsung_arndale_rt5631_of_match[] __maybe_unused = {
        { .compatible = "samsung,arndale-rt5631", },
        { .compatible = "samsung,arndale-alc5631", },
@@ -129,6 +154,7 @@ static struct platform_driver arndale_audio_driver = {
                .of_match_table = of_match_ptr(samsung_arndale_rt5631_of_match),
        },
        .probe = arndale_audio_probe,
+       .remove = arndale_audio_remove,
 };
 
 module_platform_driver(arndale_audio_driver);
index bda5b95..e9596c2 100644 (file)
@@ -761,6 +761,7 @@ static int rsnd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
        }
 
        /* set format */
+       rdai->bit_clk_inv = 0;
        switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
        case SND_SOC_DAIFMT_I2S:
                rdai->sys_delay = 0;
index f6a7466..fc5d089 100644 (file)
@@ -286,6 +286,11 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod,
        if (rsnd_ssi_is_multi_slave(mod, io))
                return 0;
 
+       if (rsnd_runtime_is_tdm_split(io))
+               chan = rsnd_io_converted_chan(io);
+
+       chan = rsnd_channel_normalization(chan);
+
        if (ssi->usrcnt > 0) {
                if (ssi->rate != rate) {
                        dev_err(dev, "SSI parent/child should use same rate\n");
@@ -300,11 +305,6 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod,
                return 0;
        }
 
-       if (rsnd_runtime_is_tdm_split(io))
-               chan = rsnd_io_converted_chan(io);
-
-       chan = rsnd_channel_normalization(chan);
-
        main_rate = rsnd_ssi_clk_query(rdai, rate, chan, &idx);
        if (!main_rate) {
                dev_err(dev, "unsupported clock rate\n");
index 35f48e9..88978a3 100644 (file)
@@ -978,7 +978,7 @@ static void soc_cleanup_component(struct snd_soc_component *component)
        /* For framework level robustness */
        snd_soc_component_set_jack(component, NULL, NULL);
 
-       list_del(&component->card_list);
+       list_del_init(&component->card_list);
        snd_soc_dapm_free(snd_soc_component_get_dapm(component));
        soc_cleanup_component_debugfs(component);
        component->card = NULL;
index e163dde..b600d3e 100644 (file)
@@ -1070,7 +1070,7 @@ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
                        return ret;
        }
 
-       snd_soc_dai_trigger(cpu_dai, substream, cmd);
+       ret = snd_soc_dai_trigger(cpu_dai, substream, cmd);
        if (ret < 0)
                return ret;
 
@@ -1097,7 +1097,7 @@ static int soc_pcm_bespoke_trigger(struct snd_pcm_substream *substream,
                        return ret;
        }
 
-       snd_soc_dai_bespoke_trigger(cpu_dai, substream, cmd);
+       ret = snd_soc_dai_bespoke_trigger(cpu_dai, substream, cmd);
        if (ret < 0)
                return ret;
 
@@ -1146,6 +1146,7 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe,
 {
        struct snd_soc_dpcm *dpcm;
        unsigned long flags;
+       char *name;
 
        /* only add new dpcms */
        for_each_dpcm_be(fe, stream, dpcm) {
@@ -1171,9 +1172,15 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe,
                        stream ? "<-" : "->", be->dai_link->name);
 
 #ifdef CONFIG_DEBUG_FS
-       dpcm->debugfs_state = debugfs_create_dir(be->dai_link->name,
-                                                fe->debugfs_dpcm_root);
-       debugfs_create_u32("state", 0644, dpcm->debugfs_state, &dpcm->state);
+       name = kasprintf(GFP_KERNEL, "%s:%s", be->dai_link->name,
+                        stream ? "capture" : "playback");
+       if (name) {
+               dpcm->debugfs_state = debugfs_create_dir(name,
+                                                        fe->debugfs_dpcm_root);
+               debugfs_create_u32("state", 0644, dpcm->debugfs_state,
+                                  &dpcm->state);
+               kfree(name);
+       }
 #endif
        return 1;
 }
index aa9a1fc..0fd0329 100644 (file)
@@ -1582,7 +1582,7 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg,
 
        /* map user to kernel widget ID */
        template.id = get_widget_id(le32_to_cpu(w->id));
-       if (template.id < 0)
+       if ((int)template.id < 0)
                return template.id;
 
        /* strings are allocated here, but used and freed by the widget */
index a4983f9..2b8711e 100644 (file)
@@ -60,13 +60,16 @@ int snd_sof_volume_put(struct snd_kcontrol *kcontrol,
        struct snd_sof_dev *sdev = scontrol->sdev;
        struct sof_ipc_ctrl_data *cdata = scontrol->control_data;
        unsigned int i, channels = scontrol->num_channels;
+       bool change = false;
+       u32 value;
 
        /* update each channel */
        for (i = 0; i < channels; i++) {
-               cdata->chanv[i].value =
-                       mixer_to_ipc(ucontrol->value.integer.value[i],
+               value = mixer_to_ipc(ucontrol->value.integer.value[i],
                                     scontrol->volume_table, sm->max + 1);
+               change = change || (value != cdata->chanv[i].value);
                cdata->chanv[i].channel = i;
+               cdata->chanv[i].value = value;
        }
 
        /* notify DSP of mixer updates */
@@ -76,8 +79,7 @@ int snd_sof_volume_put(struct snd_kcontrol *kcontrol,
                                              SOF_CTRL_TYPE_VALUE_CHAN_GET,
                                              SOF_CTRL_CMD_VOLUME,
                                              true);
-
-       return 0;
+       return change;
 }
 
 int snd_sof_switch_get(struct snd_kcontrol *kcontrol,
@@ -105,11 +107,15 @@ int snd_sof_switch_put(struct snd_kcontrol *kcontrol,
        struct snd_sof_dev *sdev = scontrol->sdev;
        struct sof_ipc_ctrl_data *cdata = scontrol->control_data;
        unsigned int i, channels = scontrol->num_channels;
+       bool change = false;
+       u32 value;
 
        /* update each channel */
        for (i = 0; i < channels; i++) {
-               cdata->chanv[i].value = ucontrol->value.integer.value[i];
+               value = ucontrol->value.integer.value[i];
+               change = change || (value != cdata->chanv[i].value);
                cdata->chanv[i].channel = i;
+               cdata->chanv[i].value = value;
        }
 
        /* notify DSP of mixer updates */
@@ -120,7 +126,7 @@ int snd_sof_switch_put(struct snd_kcontrol *kcontrol,
                                              SOF_CTRL_CMD_SWITCH,
                                              true);
 
-       return 0;
+       return change;
 }
 
 int snd_sof_enum_get(struct snd_kcontrol *kcontrol,
@@ -148,11 +154,15 @@ int snd_sof_enum_put(struct snd_kcontrol *kcontrol,
        struct snd_sof_dev *sdev = scontrol->sdev;
        struct sof_ipc_ctrl_data *cdata = scontrol->control_data;
        unsigned int i, channels = scontrol->num_channels;
+       bool change = false;
+       u32 value;
 
        /* update each channel */
        for (i = 0; i < channels; i++) {
-               cdata->chanv[i].value = ucontrol->value.enumerated.item[i];
+               value = ucontrol->value.enumerated.item[i];
+               change = change || (value != cdata->chanv[i].value);
                cdata->chanv[i].channel = i;
+               cdata->chanv[i].value = value;
        }
 
        /* notify DSP of enum updates */
@@ -163,7 +173,7 @@ int snd_sof_enum_put(struct snd_kcontrol *kcontrol,
                                              SOF_CTRL_CMD_ENUM,
                                              true);
 
-       return 0;
+       return change;
 }
 
 int snd_sof_bytes_get(struct snd_kcontrol *kcontrol,
index 479ba24..d62f51d 100644 (file)
@@ -273,6 +273,16 @@ config SND_SOC_SOF_HDA_AUDIO_CODEC
          Say Y if you want to enable HDAudio codecs with SOF.
          If unsure select "N".
 
+config SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1
+       bool "SOF enable DMI Link L1"
+       help
+         This option enables DMI L1 for both playback and capture
+         and disables known workarounds for specific HDaudio platforms.
+         Only use to look into power optimizations on platforms not
+         affected by DMI L1 issues. This option is not recommended.
+         Say Y if you want to enable DMI Link L1
+         If unsure, select "N".
+
 endif ## SND_SOC_SOF_HDA_COMMON
 
 config SND_SOC_SOF_HDA_LINK_BASELINE
index e282179..80e2826 100644 (file)
@@ -37,6 +37,7 @@
 #define MBOX_SIZE       0x1000
 #define MBOX_DUMP_SIZE 0x30
 #define EXCEPT_OFFSET  0x800
+#define EXCEPT_MAX_HDR_SIZE    0x400
 
 /* DSP peripherals */
 #define DMAC0_OFFSET    0xFE000
@@ -228,6 +229,11 @@ static void bdw_get_registers(struct snd_sof_dev *sdev,
        /* note: variable AR register array is not read */
 
        /* then get panic info */
+       if (xoops->arch_hdr.totalsize > EXCEPT_MAX_HDR_SIZE) {
+               dev_err(sdev->dev, "invalid header size 0x%x. FW oops is bogus\n",
+                       xoops->arch_hdr.totalsize);
+               return;
+       }
        offset += xoops->arch_hdr.totalsize;
        sof_mailbox_read(sdev, offset, panic_info, sizeof(*panic_info));
 
@@ -451,6 +457,7 @@ static int bdw_probe(struct snd_sof_dev *sdev)
        /* TODO: add offsets */
        sdev->mmio_bar = BDW_DSP_BAR;
        sdev->mailbox_bar = BDW_DSP_BAR;
+       sdev->dsp_oops_offset = MBOX_OFFSET;
 
        /* PCI base */
        mmio = platform_get_resource(pdev, IORESOURCE_MEM,
index 5e7a6aa..a1e514f 100644 (file)
@@ -28,6 +28,7 @@
 #define MBOX_OFFSET            0x144000
 #define MBOX_SIZE              0x1000
 #define EXCEPT_OFFSET          0x800
+#define EXCEPT_MAX_HDR_SIZE    0x400
 
 /* DSP peripherals */
 #define DMAC0_OFFSET           0x098000
@@ -126,6 +127,11 @@ static void byt_get_registers(struct snd_sof_dev *sdev,
        /* note: variable AR register array is not read */
 
        /* then get panic info */
+       if (xoops->arch_hdr.totalsize > EXCEPT_MAX_HDR_SIZE) {
+               dev_err(sdev->dev, "invalid header size 0x%x. FW oops is bogus\n",
+                       xoops->arch_hdr.totalsize);
+               return;
+       }
        offset += xoops->arch_hdr.totalsize;
        sof_mailbox_read(sdev, offset, panic_info, sizeof(*panic_info));
 
index bc41028..df1909e 100644 (file)
@@ -139,20 +139,16 @@ void hda_dsp_ctrl_misc_clock_gating(struct snd_sof_dev *sdev, bool enable)
  */
 int hda_dsp_ctrl_clock_power_gating(struct snd_sof_dev *sdev, bool enable)
 {
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
-       struct hdac_bus *bus = sof_to_bus(sdev);
-#endif
        u32 val;
 
        /* enable/disable audio dsp clock gating */
        val = enable ? PCI_CGCTL_ADSPDCGE : 0;
        snd_sof_pci_update_bits(sdev, PCI_CGCTL, PCI_CGCTL_ADSPDCGE, val);
 
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
-       /* enable/disable L1 support */
-       val = enable ? SOF_HDA_VS_EM2_L1SEN : 0;
-       snd_hdac_chip_updatel(bus, VS_EM2, SOF_HDA_VS_EM2_L1SEN, val);
-#endif
+       /* enable/disable DMI Link L1 support */
+       val = enable ? HDA_VS_INTEL_EM2_L1SEN : 0;
+       snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, HDA_VS_INTEL_EM2,
+                               HDA_VS_INTEL_EM2_L1SEN, val);
 
        /* enable/disable audio dsp power gating */
        val = enable ? 0 : PCI_PGCTL_ADSPPGD;
index 6427f0b..65c2af3 100644 (file)
@@ -44,6 +44,7 @@ static int cl_stream_prepare(struct snd_sof_dev *sdev, unsigned int format,
                return -ENODEV;
        }
        hstream = &dsp_stream->hstream;
+       hstream->substream = NULL;
 
        /* allocate DMA buffer */
        ret = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV_SG, &pci->dev, size, dmab);
index ad8d41f..2c74471 100644 (file)
@@ -185,6 +185,17 @@ hda_dsp_stream_get(struct snd_sof_dev *sdev, int direction)
                        direction == SNDRV_PCM_STREAM_PLAYBACK ?
                        "playback" : "capture");
 
+       /*
+        * Disable DMI Link L1 entry when capture stream is opened.
+        * Workaround to address a known issue with host DMA that results
+        * in xruns during pause/release in capture scenarios.
+        */
+       if (!IS_ENABLED(SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1))
+               if (stream && direction == SNDRV_PCM_STREAM_CAPTURE)
+                       snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR,
+                                               HDA_VS_INTEL_EM2,
+                                               HDA_VS_INTEL_EM2_L1SEN, 0);
+
        return stream;
 }
 
@@ -193,23 +204,43 @@ int hda_dsp_stream_put(struct snd_sof_dev *sdev, int direction, int stream_tag)
 {
        struct hdac_bus *bus = sof_to_bus(sdev);
        struct hdac_stream *s;
+       bool active_capture_stream = false;
+       bool found = false;
 
        spin_lock_irq(&bus->reg_lock);
 
-       /* find used stream */
+       /*
+        * close stream matching the stream tag
+        * and check if there are any open capture streams.
+        */
        list_for_each_entry(s, &bus->stream_list, list) {
-               if (s->direction == direction &&
-                   s->opened && s->stream_tag == stream_tag) {
+               if (!s->opened)
+                       continue;
+
+               if (s->direction == direction && s->stream_tag == stream_tag) {
                        s->opened = false;
-                       spin_unlock_irq(&bus->reg_lock);
-                       return 0;
+                       found = true;
+               } else if (s->direction == SNDRV_PCM_STREAM_CAPTURE) {
+                       active_capture_stream = true;
                }
        }
 
        spin_unlock_irq(&bus->reg_lock);
 
-       dev_dbg(sdev->dev, "stream_tag %d not opened!\n", stream_tag);
-       return -ENODEV;
+       /* Enable DMI L1 entry if there are no capture streams open */
+       if (!IS_ENABLED(SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1))
+               if (!active_capture_stream)
+                       snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR,
+                                               HDA_VS_INTEL_EM2,
+                                               HDA_VS_INTEL_EM2_L1SEN,
+                                               HDA_VS_INTEL_EM2_L1SEN);
+
+       if (!found) {
+               dev_dbg(sdev->dev, "stream_tag %d not opened!\n", stream_tag);
+               return -ENODEV;
+       }
+
+       return 0;
 }
 
 int hda_dsp_stream_trigger(struct snd_sof_dev *sdev,
index c72e9a0..06e8467 100644 (file)
@@ -35,6 +35,8 @@
 #define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348)
 #define IS_CNL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9dc8)
 
+#define EXCEPT_MAX_HDR_SIZE    0x400
+
 /*
  * Debug
  */
@@ -131,6 +133,11 @@ static void hda_dsp_get_registers(struct snd_sof_dev *sdev,
        /* note: variable AR register array is not read */
 
        /* then get panic info */
+       if (xoops->arch_hdr.totalsize > EXCEPT_MAX_HDR_SIZE) {
+               dev_err(sdev->dev, "invalid header size 0x%x. FW oops is bogus\n",
+                       xoops->arch_hdr.totalsize);
+               return;
+       }
        offset += xoops->arch_hdr.totalsize;
        sof_block_read(sdev, sdev->mmio_bar, offset,
                       panic_info, sizeof(*panic_info));
index 5591841..23e430d 100644 (file)
@@ -39,7 +39,6 @@
 #define SOF_HDA_WAKESTS                        0x0E
 #define SOF_HDA_WAKESTS_INT_MASK       ((1 << 8) - 1)
 #define SOF_HDA_RIRBSTS                        0x5d
-#define SOF_HDA_VS_EM2_L1SEN            BIT(13)
 
 /* SOF_HDA_GCTL register bist */
 #define SOF_HDA_GCTL_RESET             BIT(0)
 #define HDA_DSP_REG_HIPCIE             (HDA_DSP_IPC_BASE + 0x0C)
 #define HDA_DSP_REG_HIPCCTL            (HDA_DSP_IPC_BASE + 0x10)
 
+/* Intel Vendor Specific Registers */
+#define HDA_VS_INTEL_EM2               0x1030
+#define HDA_VS_INTEL_EM2_L1SEN         BIT(13)
+
 /*  HIPCI */
 #define HDA_DSP_REG_HIPCI_BUSY         BIT(31)
 #define HDA_DSP_REG_HIPCI_MSG_MASK     0x7FFFFFFF
index d7f3274..9a9a381 100644 (file)
@@ -546,10 +546,10 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev)
                                 msecs_to_jiffies(sdev->boot_timeout));
        if (ret == 0) {
                dev_err(sdev->dev, "error: firmware boot failure\n");
-               /* after this point FW_READY msg should be ignored */
-               sdev->boot_complete = true;
                snd_sof_dsp_dbg_dump(sdev, SOF_DBG_REGS | SOF_DBG_MBOX |
                        SOF_DBG_TEXT | SOF_DBG_PCI);
+               /* after this point FW_READY msg should be ignored */
+               sdev->boot_complete = true;
                return -EIO;
        }
 
index e3f6a6d..2b876d4 100644 (file)
@@ -244,7 +244,7 @@ static int sof_pcm_hw_free(struct snd_pcm_substream *substream)
                snd_soc_rtdcom_lookup(rtd, DRV_NAME);
        struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component);
        struct snd_sof_pcm *spcm;
-       int ret;
+       int ret, err = 0;
 
        /* nothing to do for BE */
        if (rtd->dai_link->no_pcm)
@@ -254,26 +254,26 @@ static int sof_pcm_hw_free(struct snd_pcm_substream *substream)
        if (!spcm)
                return -EINVAL;
 
-       if (!spcm->prepared[substream->stream])
-               return 0;
-
        dev_dbg(sdev->dev, "pcm: free stream %d dir %d\n", spcm->pcm.pcm_id,
                substream->stream);
 
-       ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm);
+       if (spcm->prepared[substream->stream]) {
+               ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm);
+               if (ret < 0)
+                       err = ret;
+       }
 
        snd_pcm_lib_free_pages(substream);
 
        cancel_work_sync(&spcm->stream[substream->stream].period_elapsed_work);
 
-       if (ret < 0)
-               return ret;
-
        ret = snd_sof_pcm_platform_hw_free(sdev, substream);
-       if (ret < 0)
+       if (ret < 0) {
                dev_err(sdev->dev, "error: platform hw free failed\n");
+               err = ret;
+       }
 
-       return ret;
+       return err;
 }
 
 static int sof_pcm_prepare(struct snd_pcm_substream *substream)
@@ -323,6 +323,7 @@ static int sof_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
        struct sof_ipc_stream stream;
        struct sof_ipc_reply reply;
        bool reset_hw_params = false;
+       bool ipc_first = false;
        int ret;
 
        /* nothing to do for BE */
@@ -343,6 +344,7 @@ static int sof_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
        switch (cmd) {
        case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
                stream.hdr.cmd |= SOF_IPC_STREAM_TRIG_PAUSE;
+               ipc_first = true;
                break;
        case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
                stream.hdr.cmd |= SOF_IPC_STREAM_TRIG_RELEASE;
@@ -363,6 +365,7 @@ static int sof_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
        case SNDRV_PCM_TRIGGER_SUSPEND:
        case SNDRV_PCM_TRIGGER_STOP:
                stream.hdr.cmd |= SOF_IPC_STREAM_TRIG_STOP;
+               ipc_first = true;
                reset_hw_params = true;
                break;
        default:
@@ -370,12 +373,22 @@ static int sof_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
                return -EINVAL;
        }
 
-       snd_sof_pcm_platform_trigger(sdev, substream, cmd);
+       /*
+        * DMA and IPC sequence is different for start and stop. Need to send
+        * STOP IPC before stop DMA
+        */
+       if (!ipc_first)
+               snd_sof_pcm_platform_trigger(sdev, substream, cmd);
 
        /* send IPC to the DSP */
        ret = sof_ipc_tx_message(sdev->ipc, stream.hdr.cmd, &stream,
                                 sizeof(stream), &reply, sizeof(reply));
 
+       /* need to STOP DMA even if STOP IPC failed */
+       if (ipc_first)
+               snd_sof_pcm_platform_trigger(sdev, substream, cmd);
+
+       /* free PCM if reset_hw_params is set and the STOP IPC is successful */
        if (!ret && reset_hw_params)
                ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm);
 
index fc85efb..0aabb31 100644 (file)
@@ -920,7 +920,9 @@ static void sof_parse_word_tokens(struct snd_soc_component *scomp,
                for (j = 0; j < count; j++) {
                        /* match token type */
                        if (!(tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_WORD ||
-                             tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_SHORT))
+                             tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_SHORT ||
+                             tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_BYTE ||
+                             tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_BOOL))
                                continue;
 
                        /* match token id */
index d7501f8..a406081 100644 (file)
@@ -505,10 +505,20 @@ static int stm32_sai_set_sysclk(struct snd_soc_dai *cpu_dai,
        if (dir == SND_SOC_CLOCK_OUT && sai->sai_mclk) {
                ret = regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX,
                                         SAI_XCR1_NODIV,
-                                        (unsigned int)~SAI_XCR1_NODIV);
+                                        freq ? 0 : SAI_XCR1_NODIV);
                if (ret < 0)
                        return ret;
 
+               /* Assume shutdown if requested frequency is 0Hz */
+               if (!freq) {
+                       /* Release mclk rate only if rate was actually set */
+                       if (sai->mclk_rate) {
+                               clk_rate_exclusive_put(sai->sai_mclk);
+                               sai->mclk_rate = 0;
+                       }
+                       return 0;
+               }
+
                /* If master clock is used, set parent clock now */
                ret = stm32_sai_set_parent_clock(sai, freq);
                if (ret)
@@ -1093,15 +1103,6 @@ static void stm32_sai_shutdown(struct snd_pcm_substream *substream,
 
        regmap_update_bits(sai->regmap, STM_SAI_IMR_REGX, SAI_XIMR_MASK, 0);
 
-       regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, SAI_XCR1_NODIV,
-                          SAI_XCR1_NODIV);
-
-       /* Release mclk rate only if rate was actually set */
-       if (sai->mclk_rate) {
-               clk_rate_exclusive_put(sai->sai_mclk);
-               sai->mclk_rate = 0;
-       }
-
        clk_disable_unprepare(sai->sai_ck);
 
        spin_lock_irqsave(&sai->irq_lock, flags);
index 87a9b9d..29f6105 100644 (file)
@@ -200,11 +200,18 @@ config SND_SOC_DM365_AIC3X_CODEC
 
 config SND_SOC_DM365_VOICE_CODEC
        bool "Voice Codec - CQ93VC"
-       select MFD_DAVINCI_VOICECODEC
-       select SND_SOC_CQ0093VC
        help
          Say Y if you want to add support for SoC On-chip voice codec
 endchoice
 
+config SND_SOC_DM365_VOICE_CODEC_MODULE
+       def_tristate y
+       depends on SND_SOC_DM365_VOICE_CODEC && SND_SOC
+       select MFD_DAVINCI_VOICECODEC
+       select SND_SOC_CQ0093VC
+       help
+         The is an internal symbol needed to ensure that the codec
+         and MFD driver can be built as loadable modules if necessary.
+
 endmenu
 
index 33cd267..ff5ab24 100644 (file)
@@ -348,6 +348,9 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs,
                ep = 0x84;
                ifnum = 0;
                goto add_sync_ep_from_ifnum;
+       case USB_ID(0x0582, 0x01d8): /* BOSS Katana */
+               /* BOSS Katana amplifiers do not need quirks */
+               return 0;
        }
 
        if (attr == USB_ENDPOINT_SYNC_ASYNC &&
index 25faf2d..0bbe120 100644 (file)
@@ -1657,7 +1657,10 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
        case 0x23ba:  /* Playback Designs */
        case 0x25ce:  /* Mytek devices */
        case 0x278b:  /* Rotel? */
+       case 0x292b:  /* Gustard/Ess based devices */
        case 0x2ab6:  /* T+A devices */
+       case 0x3842:  /* EVGA */
+       case 0xc502:  /* HiBy devices */
                if (fp->dsd_raw)
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
                break;
index 3c8f73a..a5e584b 100644 (file)
@@ -75,7 +75,7 @@ static bool validate_processing_unit(const void *p,
 
        if (d->bLength < sizeof(*d))
                return false;
-       len = d->bLength < sizeof(*d) + d->bNrInPins;
+       len = sizeof(*d) + d->bNrInPins;
        if (d->bLength < len)
                return false;
        switch (v->protocol) {
index a4217c1..2769360 100644 (file)
@@ -266,8 +266,10 @@ struct kvm_vcpu_events {
 #define   KVM_DEV_ARM_ITS_CTRL_RESET           4
 
 /* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_VCPU2_SHIFT                28
+#define KVM_ARM_IRQ_VCPU2_MASK         0xf
 #define KVM_ARM_IRQ_TYPE_SHIFT         24
-#define KVM_ARM_IRQ_TYPE_MASK          0xff
+#define KVM_ARM_IRQ_TYPE_MASK          0xf
 #define KVM_ARM_IRQ_VCPU_SHIFT         16
 #define KVM_ARM_IRQ_VCPU_MASK          0xff
 #define KVM_ARM_IRQ_NUM_SHIFT          0
index 9a50771..67c21f9 100644 (file)
@@ -325,8 +325,10 @@ struct kvm_vcpu_events {
 #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER                1
 
 /* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_VCPU2_SHIFT                28
+#define KVM_ARM_IRQ_VCPU2_MASK         0xf
 #define KVM_ARM_IRQ_TYPE_SHIFT         24
-#define KVM_ARM_IRQ_TYPE_MASK          0xff
+#define KVM_ARM_IRQ_TYPE_MASK          0xf
 #define KVM_ARM_IRQ_VCPU_SHIFT         16
 #define KVM_ARM_IRQ_VCPU_MASK          0xff
 #define KVM_ARM_IRQ_NUM_SHIFT          0
index 47104e5..436ec76 100644 (file)
@@ -231,6 +231,12 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_GSCB   (1UL << 9)
 #define KVM_SYNC_BPBC   (1UL << 10)
 #define KVM_SYNC_ETOKEN (1UL << 11)
+
+#define KVM_SYNC_S390_VALID_FIELDS \
+       (KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
+        KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
+        KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN)
+
 /* length and alignment of the sdnx as a power of two */
 #define SDNXC 8
 #define SDNXL (1UL << SDNXC)
index 5171b9c..0652d3e 100644 (file)
 #define X86_FEATURE_VMMCALL            ( 8*32+15) /* Prefer VMMCALL to VMCALL */
 #define X86_FEATURE_XENPV              ( 8*32+16) /* "" Xen paravirtual guest */
 #define X86_FEATURE_EPT_AD             ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
+#define X86_FEATURE_VMCALL             ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
+#define X86_FEATURE_VMW_VMMCALL                ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE           ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
 #define X86_FEATURE_AVX512_4VNNIW      (18*32+ 2) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS      (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
 #define X86_FEATURE_MD_CLEAR           (18*32+10) /* VERW clears CPU buffers */
 #define X86_FEATURE_TSX_FORCE_ABORT    (18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_PCONFIG            (18*32+18) /* Intel PCONFIG */
index a9731f8..2e8a30f 100644 (file)
@@ -75,6 +75,7 @@
 #define SVM_EXIT_MWAIT         0x08b
 #define SVM_EXIT_MWAIT_COND    0x08c
 #define SVM_EXIT_XSETBV        0x08d
+#define SVM_EXIT_RDPRU         0x08e
 #define SVM_EXIT_NPF           0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI           0x401
 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS     0x402
index 30d7d04..196fdd0 100644 (file)
@@ -3,7 +3,7 @@
 #define _UAPI_ASM_X86_UNISTD_H
 
 /* x32 syscall flag bit */
-#define __X32_SYSCALL_BIT      0x40000000
+#define __X32_SYSCALL_BIT      0x40000000UL
 
 #ifndef __KERNEL__
 # ifdef __i386__
index f0b0c90..3eb8411 100644 (file)
@@ -31,6 +31,7 @@
 #define EXIT_REASON_EXCEPTION_NMI       0
 #define EXIT_REASON_EXTERNAL_INTERRUPT  1
 #define EXIT_REASON_TRIPLE_FAULT        2
+#define EXIT_REASON_INIT_SIGNAL                        3
 
 #define EXIT_REASON_PENDING_INTERRUPT   7
 #define EXIT_REASON_NMI_WINDOW          8
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
+#define EXIT_REASON_UMWAIT              67
+#define EXIT_REASON_TPAUSE              68
 
 #define VMX_EXIT_REASONS \
        { EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
        { EXIT_REASON_EXTERNAL_INTERRUPT,    "EXTERNAL_INTERRUPT" }, \
        { EXIT_REASON_TRIPLE_FAULT,          "TRIPLE_FAULT" }, \
+       { EXIT_REASON_INIT_SIGNAL,           "INIT_SIGNAL" }, \
        { EXIT_REASON_PENDING_INTERRUPT,     "PENDING_INTERRUPT" }, \
        { EXIT_REASON_NMI_WINDOW,            "NMI_WINDOW" }, \
        { EXIT_REASON_TASK_SWITCH,           "TASK_SWITCH" }, \
        { EXIT_REASON_RDSEED,                "RDSEED" }, \
        { EXIT_REASON_PML_FULL,              "PML_FULL" }, \
        { EXIT_REASON_XSAVES,                "XSAVES" }, \
-       { EXIT_REASON_XRSTORS,               "XRSTORS" }
+       { EXIT_REASON_XRSTORS,               "XRSTORS" }, \
+       { EXIT_REASON_UMWAIT,                "UMWAIT" }, \
+       { EXIT_REASON_TPAUSE,                "TPAUSE" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL       2
index fbf5e4a..5d1995f 100644 (file)
@@ -12,7 +12,11 @@ INSTALL ?= install
 CFLAGS += -Wall -O2
 CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/include/uapi -I$(srctree)/include
 
-ifeq ($(srctree),)
+# This will work when bpf is built in tools env. where srctree
+# isn't set and when invoked from selftests build, where srctree
+# is set to ".". building_out_of_srctree is undefined for in srctree
+# builds
+ifndef building_out_of_srctree
 srctree := $(patsubst %/,%,$(dir $(CURDIR)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
 endif
index 6ecdd10..1178d30 100644 (file)
@@ -3,7 +3,11 @@ include ../scripts/Makefile.include
 
 bindir ?= /usr/bin
 
-ifeq ($(srctree),)
+# This will work when gpio is built in tools env. where srctree
+# isn't set and when invoked from selftests build, where srctree
+# is set to ".". building_out_of_srctree is undefined for in srctree
+# builds
+ifndef building_out_of_srctree
 srctree := $(patsubst %/,%,$(dir $(CURDIR)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
 endif
diff --git a/tools/hv/Build b/tools/hv/Build
new file mode 100644 (file)
index 0000000..6cf51fa
--- /dev/null
@@ -0,0 +1,3 @@
+hv_kvp_daemon-y += hv_kvp_daemon.o
+hv_vss_daemon-y += hv_vss_daemon.o
+hv_fcopy_daemon-y += hv_fcopy_daemon.o
index 5db5e62..b57143d 100644 (file)
@@ -1,28 +1,55 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for Hyper-V tools
-
-WARNINGS = -Wall -Wextra
-CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS)
-
-CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+include ../scripts/Makefile.include
 
 sbindir ?= /usr/sbin
 libexecdir ?= /usr/libexec
 sharedstatedir ?= /var/lib
 
-ALL_PROGRAMS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+
+ALL_TARGETS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
 
 ALL_SCRIPTS := hv_get_dhcp_info.sh hv_get_dns_info.sh hv_set_ifconfig.sh
 
 all: $(ALL_PROGRAMS)
 
-%: %.c
-       $(CC) $(CFLAGS) -o $@ $^
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+HV_KVP_DAEMON_IN := $(OUTPUT)hv_kvp_daemon-in.o
+$(HV_KVP_DAEMON_IN): FORCE
+       $(Q)$(MAKE) $(build)=hv_kvp_daemon
+$(OUTPUT)hv_kvp_daemon: $(HV_KVP_DAEMON_IN)
+       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+HV_VSS_DAEMON_IN := $(OUTPUT)hv_vss_daemon-in.o
+$(HV_VSS_DAEMON_IN): FORCE
+       $(Q)$(MAKE) $(build)=hv_vss_daemon
+$(OUTPUT)hv_vss_daemon: $(HV_VSS_DAEMON_IN)
+       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+HV_FCOPY_DAEMON_IN := $(OUTPUT)hv_fcopy_daemon-in.o
+$(HV_FCOPY_DAEMON_IN): FORCE
+       $(Q)$(MAKE) $(build)=hv_fcopy_daemon
+$(OUTPUT)hv_fcopy_daemon: $(HV_FCOPY_DAEMON_IN)
+       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
 
 clean:
-       $(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+       rm -f $(ALL_PROGRAMS)
+       find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
 
-install: all
+install: $(ALL_PROGRAMS)
        install -d -m 755 $(DESTDIR)$(sbindir); \
        install -d -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd; \
        install -d -m 755 $(DESTDIR)$(sharedstatedir); \
@@ -33,3 +60,7 @@ install: all
        for script in $(ALL_SCRIPTS); do \
                install $$script -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd/$${script%.sh}; \
        done
+
+FORCE:
+
+.PHONY: all install clean FORCE prepare
index bbd75ac..550223f 100644 (file)
@@ -3,6 +3,7 @@
 #define _TOOLS_ASM_BUG_H
 
 #include <linux/compiler.h>
+#include <stdio.h>
 
 #define __WARN_printf(arg...)  do { fprintf(stderr, arg); } while (0)
 
index d83763a..e03b1ea 100644 (file)
@@ -31,25 +31,9 @@ struct rb_root {
        struct rb_node *rb_node;
 };
 
-/*
- * Leftmost-cached rbtrees.
- *
- * We do not cache the rightmost node based on footprint
- * size vs number of potential users that could benefit
- * from O(1) rb_last(). Just not worth it, users that want
- * this feature can always implement the logic explicitly.
- * Furthermore, users that want to cache both pointers may
- * find it a bit asymmetric, but that's ok.
- */
-struct rb_root_cached {
-       struct rb_root rb_root;
-       struct rb_node *rb_leftmost;
-};
-
 #define rb_parent(r)   ((struct rb_node *)((r)->__rb_parent_color & ~3))
 
 #define RB_ROOT        (struct rb_root) { NULL, }
-#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
 #define        rb_entry(ptr, type, member) container_of(ptr, type, member)
 
 #define RB_EMPTY_ROOT(root)  (READ_ONCE((root)->rb_node) == NULL)
@@ -71,12 +55,6 @@ extern struct rb_node *rb_prev(const struct rb_node *);
 extern struct rb_node *rb_first(const struct rb_root *);
 extern struct rb_node *rb_last(const struct rb_root *);
 
-extern void rb_insert_color_cached(struct rb_node *,
-                                  struct rb_root_cached *, bool);
-extern void rb_erase_cached(struct rb_node *node, struct rb_root_cached *);
-/* Same as rb_first(), but O(1) */
-#define rb_first_cached(root) (root)->rb_leftmost
-
 /* Postorder iteration - always visit the parent after its children */
 extern struct rb_node *rb_first_postorder(const struct rb_root *);
 extern struct rb_node *rb_next_postorder(const struct rb_node *);
@@ -84,8 +62,6 @@ extern struct rb_node *rb_next_postorder(const struct rb_node *);
 /* Fast replacement of a single node without remove/rebalance/add/rebalance */
 extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
                            struct rb_root *root);
-extern void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new,
-                                  struct rb_root_cached *root);
 
 static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
                                struct rb_node **rb_link)
@@ -129,4 +105,51 @@ static inline void rb_erase_init(struct rb_node *n, struct rb_root *root)
        rb_erase(n, root);
        RB_CLEAR_NODE(n);
 }
+
+/*
+ * Leftmost-cached rbtrees.
+ *
+ * We do not cache the rightmost node based on footprint
+ * size vs number of potential users that could benefit
+ * from O(1) rb_last(). Just not worth it, users that want
+ * this feature can always implement the logic explicitly.
+ * Furthermore, users that want to cache both pointers may
+ * find it a bit asymmetric, but that's ok.
+ */
+struct rb_root_cached {
+       struct rb_root rb_root;
+       struct rb_node *rb_leftmost;
+};
+
+#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
+
+/* Same as rb_first(), but O(1) */
+#define rb_first_cached(root) (root)->rb_leftmost
+
+static inline void rb_insert_color_cached(struct rb_node *node,
+                                         struct rb_root_cached *root,
+                                         bool leftmost)
+{
+       if (leftmost)
+               root->rb_leftmost = node;
+       rb_insert_color(node, &root->rb_root);
+}
+
+static inline void rb_erase_cached(struct rb_node *node,
+                                  struct rb_root_cached *root)
+{
+       if (root->rb_leftmost == node)
+               root->rb_leftmost = rb_next(node);
+       rb_erase(node, &root->rb_root);
+}
+
+static inline void rb_replace_node_cached(struct rb_node *victim,
+                                         struct rb_node *new,
+                                         struct rb_root_cached *root)
+{
+       if (root->rb_leftmost == victim)
+               root->rb_leftmost = new;
+       rb_replace_node(victim, new, &root->rb_root);
+}
+
 #endif /* __TOOLS_LINUX_PERF_RBTREE_H */
index ddd0100..381aa94 100644 (file)
@@ -32,17 +32,16 @@ struct rb_augment_callbacks {
        void (*rotate)(struct rb_node *old, struct rb_node *new);
 };
 
-extern void __rb_insert_augmented(struct rb_node *node,
-                                 struct rb_root *root,
-                                 bool newleft, struct rb_node **leftmost,
+extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
        void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+
 /*
  * Fixup the rbtree and update the augmented information when rebalancing.
  *
  * On insertion, the user must update the augmented information on the path
  * leading to the inserted node, then call rb_link_node() as usual and
- * rb_augment_inserted() instead of the usual rb_insert_color() call.
- * If rb_augment_inserted() rebalances the rbtree, it will callback into
+ * rb_insert_augmented() instead of the usual rb_insert_color() call.
+ * If rb_insert_augmented() rebalances the rbtree, it will callback into
  * a user provided function to update the augmented information on the
  * affected subtrees.
  */
@@ -50,7 +49,7 @@ static inline void
 rb_insert_augmented(struct rb_node *node, struct rb_root *root,
                    const struct rb_augment_callbacks *augment)
 {
-       __rb_insert_augmented(node, root, false, NULL, augment->rotate);
+       __rb_insert_augmented(node, root, augment->rotate);
 }
 
 static inline void
@@ -58,45 +57,92 @@ rb_insert_augmented_cached(struct rb_node *node,
                           struct rb_root_cached *root, bool newleft,
                           const struct rb_augment_callbacks *augment)
 {
-       __rb_insert_augmented(node, &root->rb_root,
-                             newleft, &root->rb_leftmost, augment->rotate);
+       if (newleft)
+               root->rb_leftmost = node;
+       rb_insert_augmented(node, &root->rb_root, augment);
 }
 
-#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield,      \
-                            rbtype, rbaugmented, rbcompute)            \
+/*
+ * Template for declaring augmented rbtree callbacks (generic case)
+ *
+ * RBSTATIC:    'static' or empty
+ * RBNAME:      name of the rb_augment_callbacks structure
+ * RBSTRUCT:    struct type of the tree nodes
+ * RBFIELD:     name of struct rb_node field within RBSTRUCT
+ * RBAUGMENTED: name of field within RBSTRUCT holding data for subtree
+ * RBCOMPUTE:   name of function that recomputes the RBAUGMENTED data
+ */
+
+#define RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME,                         \
+                            RBSTRUCT, RBFIELD, RBAUGMENTED, RBCOMPUTE) \
 static inline void                                                     \
-rbname ## _propagate(struct rb_node *rb, struct rb_node *stop)         \
+RBNAME ## _propagate(struct rb_node *rb, struct rb_node *stop)         \
 {                                                                      \
        while (rb != stop) {                                            \
-               rbstruct *node = rb_entry(rb, rbstruct, rbfield);       \
-               rbtype augmented = rbcompute(node);                     \
-               if (node->rbaugmented == augmented)                     \
+               RBSTRUCT *node = rb_entry(rb, RBSTRUCT, RBFIELD);       \
+               if (RBCOMPUTE(node, true))                              \
                        break;                                          \
-               node->rbaugmented = augmented;                          \
-               rb = rb_parent(&node->rbfield);                         \
+               rb = rb_parent(&node->RBFIELD);                         \
        }                                                               \
 }                                                                      \
 static inline void                                                     \
-rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)                \
+RBNAME ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)                \
 {                                                                      \
-       rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);            \
-       rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);            \
-       new->rbaugmented = old->rbaugmented;                            \
+       RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);            \
+       RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);            \
+       new->RBAUGMENTED = old->RBAUGMENTED;                            \
 }                                                                      \
 static void                                                            \
-rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)      \
+RBNAME ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)      \
 {                                                                      \
-       rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);            \
-       rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);            \
-       new->rbaugmented = old->rbaugmented;                            \
-       old->rbaugmented = rbcompute(old);                              \
+       RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);            \
+       RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);            \
+       new->RBAUGMENTED = old->RBAUGMENTED;                            \
+       RBCOMPUTE(old, false);                                          \
 }                                                                      \
-rbstatic const struct rb_augment_callbacks rbname = {                  \
-       .propagate = rbname ## _propagate,                              \
-       .copy = rbname ## _copy,                                        \
-       .rotate = rbname ## _rotate                                     \
+RBSTATIC const struct rb_augment_callbacks RBNAME = {                  \
+       .propagate = RBNAME ## _propagate,                              \
+       .copy = RBNAME ## _copy,                                        \
+       .rotate = RBNAME ## _rotate                                     \
 };
 
+/*
+ * Template for declaring augmented rbtree callbacks,
+ * computing RBAUGMENTED scalar as max(RBCOMPUTE(node)) for all subtree nodes.
+ *
+ * RBSTATIC:    'static' or empty
+ * RBNAME:      name of the rb_augment_callbacks structure
+ * RBSTRUCT:    struct type of the tree nodes
+ * RBFIELD:     name of struct rb_node field within RBSTRUCT
+ * RBTYPE:      type of the RBAUGMENTED field
+ * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree
+ * RBCOMPUTE:   name of function that returns the per-node RBTYPE scalar
+ */
+
+#define RB_DECLARE_CALLBACKS_MAX(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,        \
+                                RBTYPE, RBAUGMENTED, RBCOMPUTE)              \
+static inline bool RBNAME ## _compute_max(RBSTRUCT *node, bool exit)         \
+{                                                                            \
+       RBSTRUCT *child;                                                      \
+       RBTYPE max = RBCOMPUTE(node);                                         \
+       if (node->RBFIELD.rb_left) {                                          \
+               child = rb_entry(node->RBFIELD.rb_left, RBSTRUCT, RBFIELD);   \
+               if (child->RBAUGMENTED > max)                                 \
+                       max = child->RBAUGMENTED;                             \
+       }                                                                     \
+       if (node->RBFIELD.rb_right) {                                         \
+               child = rb_entry(node->RBFIELD.rb_right, RBSTRUCT, RBFIELD);  \
+               if (child->RBAUGMENTED > max)                                 \
+                       max = child->RBAUGMENTED;                             \
+       }                                                                     \
+       if (exit && node->RBAUGMENTED == max)                                 \
+               return true;                                                  \
+       node->RBAUGMENTED = max;                                              \
+       return false;                                                         \
+}                                                                            \
+RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME,                                       \
+                    RBSTRUCT, RBFIELD, RBAUGMENTED, RBNAME ## _compute_max)
+
 
 #define        RB_RED          0
 #define        RB_BLACK        1
@@ -139,7 +185,6 @@ extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 
 static __always_inline struct rb_node *
 __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
-                    struct rb_node **leftmost,
                     const struct rb_augment_callbacks *augment)
 {
        struct rb_node *child = node->rb_right;
@@ -147,9 +192,6 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
        struct rb_node *parent, *rebalance;
        unsigned long pc;
 
-       if (leftmost && node == *leftmost)
-               *leftmost = rb_next(node);
-
        if (!tmp) {
                /*
                 * Case 1: node to erase has no more than 1 child (easy!)
@@ -249,8 +291,7 @@ static __always_inline void
 rb_erase_augmented(struct rb_node *node, struct rb_root *root,
                   const struct rb_augment_callbacks *augment)
 {
-       struct rb_node *rebalance = __rb_erase_augmented(node, root,
-                                                        NULL, augment);
+       struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
        if (rebalance)
                __rb_erase_color(rebalance, root, augment->rotate);
 }
@@ -259,11 +300,9 @@ static __always_inline void
 rb_erase_augmented_cached(struct rb_node *node, struct rb_root_cached *root,
                          const struct rb_augment_callbacks *augment)
 {
-       struct rb_node *rebalance = __rb_erase_augmented(node, &root->rb_root,
-                                                        &root->rb_leftmost,
-                                                        augment);
-       if (rebalance)
-               __rb_erase_color(rebalance, &root->rb_root, augment->rotate);
+       if (root->rb_leftmost == node)
+               root->rb_leftmost = rb_next(node);
+       rb_erase_augmented(node, &root->rb_root, augment);
 }
 
 #endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */
index 63b1f50..c160a53 100644 (file)
@@ -67,6 +67,9 @@
 #define MADV_WIPEONFORK 18             /* Zero memory on fork, child only */
 #define MADV_KEEPONFORK 19             /* Undo MADV_WIPEONFORK */
 
+#define MADV_COLD      20              /* deactivate these pages */
+#define MADV_PAGEOUT   21              /* reclaim these pages */
+
 /* compatibility flags */
 #define MAP_FILE       0
 
index 1be0e79..1fc8faa 100644 (file)
@@ -569,7 +569,7 @@ __SYSCALL(__NR_semget, sys_semget)
 __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl)
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_semtimedop 192
-__SC_COMP(__NR_semtimedop, sys_semtimedop, sys_semtimedop_time32)
+__SC_3264(__NR_semtimedop, sys_semtimedop_time32, sys_semtimedop)
 #endif
 #define __NR_semop 193
 __SYSCALL(__NR_semop, sys_semop)
index 328d05e..469dc51 100644 (file)
@@ -521,6 +521,7 @@ typedef struct drm_i915_irq_wait {
 #define   I915_SCHEDULER_CAP_PRIORITY  (1ul << 1)
 #define   I915_SCHEDULER_CAP_PREEMPTION        (1ul << 2)
 #define   I915_SCHEDULER_CAP_SEMAPHORES        (1ul << 3)
+#define   I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4)
 
 #define I915_PARAM_HUC_STATUS           42
 
index 2a616aa..379a612 100644 (file)
@@ -13,6 +13,9 @@
 #include <linux/limits.h>
 #include <linux/ioctl.h>
 #include <linux/types.h>
+#ifndef __KERNEL__
+#include <linux/fscrypt.h>
+#endif
 
 /* Use of MS_* flags within the kernel is restricted to core mount(2) code. */
 #if !defined(__KERNEL__)
@@ -212,57 +215,6 @@ struct fsxattr {
 #define FS_IOC_GETFSLABEL              _IOR(0x94, 49, char[FSLABEL_MAX])
 #define FS_IOC_SETFSLABEL              _IOW(0x94, 50, char[FSLABEL_MAX])
 
-/*
- * File system encryption support
- */
-/* Policy provided via an ioctl on the topmost directory */
-#define FS_KEY_DESCRIPTOR_SIZE 8
-
-#define FS_POLICY_FLAGS_PAD_4          0x00
-#define FS_POLICY_FLAGS_PAD_8          0x01
-#define FS_POLICY_FLAGS_PAD_16         0x02
-#define FS_POLICY_FLAGS_PAD_32         0x03
-#define FS_POLICY_FLAGS_PAD_MASK       0x03
-#define FS_POLICY_FLAG_DIRECT_KEY      0x04    /* use master key directly */
-#define FS_POLICY_FLAGS_VALID          0x07
-
-/* Encryption algorithms */
-#define FS_ENCRYPTION_MODE_INVALID             0
-#define FS_ENCRYPTION_MODE_AES_256_XTS         1
-#define FS_ENCRYPTION_MODE_AES_256_GCM         2
-#define FS_ENCRYPTION_MODE_AES_256_CBC         3
-#define FS_ENCRYPTION_MODE_AES_256_CTS         4
-#define FS_ENCRYPTION_MODE_AES_128_CBC         5
-#define FS_ENCRYPTION_MODE_AES_128_CTS         6
-#define FS_ENCRYPTION_MODE_SPECK128_256_XTS    7 /* Removed, do not use. */
-#define FS_ENCRYPTION_MODE_SPECK128_256_CTS    8 /* Removed, do not use. */
-#define FS_ENCRYPTION_MODE_ADIANTUM            9
-
-struct fscrypt_policy {
-       __u8 version;
-       __u8 contents_encryption_mode;
-       __u8 filenames_encryption_mode;
-       __u8 flags;
-       __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
-};
-
-#define FS_IOC_SET_ENCRYPTION_POLICY   _IOR('f', 19, struct fscrypt_policy)
-#define FS_IOC_GET_ENCRYPTION_PWSALT   _IOW('f', 20, __u8[16])
-#define FS_IOC_GET_ENCRYPTION_POLICY   _IOW('f', 21, struct fscrypt_policy)
-
-/* Parameters for passing an encryption key into the kernel keyring */
-#define FS_KEY_DESC_PREFIX             "fscrypt:"
-#define FS_KEY_DESC_PREFIX_SIZE                8
-
-/* Structure that userspace passes to the kernel keyring */
-#define FS_MAX_KEY_SIZE                        64
-
-struct fscrypt_key {
-       __u32 mode;
-       __u8 raw[FS_MAX_KEY_SIZE];
-       __u32 size;
-};
-
 /*
  * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
  *
@@ -306,6 +258,7 @@ struct fscrypt_key {
 #define FS_TOPDIR_FL                   0x00020000 /* Top of directory hierarchies*/
 #define FS_HUGE_FILE_FL                        0x00040000 /* Reserved for ext4 */
 #define FS_EXTENT_FL                   0x00080000 /* Extents */
+#define FS_VERITY_FL                   0x00100000 /* Verity protected inode */
 #define FS_EA_INODE_FL                 0x00200000 /* Inode used for large EA */
 #define FS_EOFBLOCKS_FL                        0x00400000 /* Reserved for ext4 */
 #define FS_NOCOW_FL                    0x00800000 /* Do not cow file */
diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h
new file mode 100644 (file)
index 0000000..39ccfe9
--- /dev/null
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * fscrypt user API
+ *
+ * These ioctls can be used on filesystems that support fscrypt.  See the
+ * "User API" section of Documentation/filesystems/fscrypt.rst.
+ */
+#ifndef _UAPI_LINUX_FSCRYPT_H
+#define _UAPI_LINUX_FSCRYPT_H
+
+#include <linux/types.h>
+
+/* Encryption policy flags */
+#define FSCRYPT_POLICY_FLAGS_PAD_4             0x00
+#define FSCRYPT_POLICY_FLAGS_PAD_8             0x01
+#define FSCRYPT_POLICY_FLAGS_PAD_16            0x02
+#define FSCRYPT_POLICY_FLAGS_PAD_32            0x03
+#define FSCRYPT_POLICY_FLAGS_PAD_MASK          0x03
+#define FSCRYPT_POLICY_FLAG_DIRECT_KEY         0x04
+#define FSCRYPT_POLICY_FLAGS_VALID             0x07
+
+/* Encryption algorithms */
+#define FSCRYPT_MODE_AES_256_XTS               1
+#define FSCRYPT_MODE_AES_256_CTS               4
+#define FSCRYPT_MODE_AES_128_CBC               5
+#define FSCRYPT_MODE_AES_128_CTS               6
+#define FSCRYPT_MODE_ADIANTUM                  9
+#define __FSCRYPT_MODE_MAX                     9
+
+/*
+ * Legacy policy version; ad-hoc KDF and no key verification.
+ * For new encrypted directories, use fscrypt_policy_v2 instead.
+ *
+ * Careful: the .version field for this is actually 0, not 1.
+ */
+#define FSCRYPT_POLICY_V1              0
+#define FSCRYPT_KEY_DESCRIPTOR_SIZE    8
+struct fscrypt_policy_v1 {
+       __u8 version;
+       __u8 contents_encryption_mode;
+       __u8 filenames_encryption_mode;
+       __u8 flags;
+       __u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
+};
+#define fscrypt_policy fscrypt_policy_v1
+
+/*
+ * Process-subscribed "logon" key description prefix and payload format.
+ * Deprecated; prefer FS_IOC_ADD_ENCRYPTION_KEY instead.
+ */
+#define FSCRYPT_KEY_DESC_PREFIX                "fscrypt:"
+#define FSCRYPT_KEY_DESC_PREFIX_SIZE   8
+#define FSCRYPT_MAX_KEY_SIZE           64
+struct fscrypt_key {
+       __u32 mode;
+       __u8 raw[FSCRYPT_MAX_KEY_SIZE];
+       __u32 size;
+};
+
+/*
+ * New policy version with HKDF and key verification (recommended).
+ */
+#define FSCRYPT_POLICY_V2              2
+#define FSCRYPT_KEY_IDENTIFIER_SIZE    16
+struct fscrypt_policy_v2 {
+       __u8 version;
+       __u8 contents_encryption_mode;
+       __u8 filenames_encryption_mode;
+       __u8 flags;
+       __u8 __reserved[4];
+       __u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
+};
+
+/* Struct passed to FS_IOC_GET_ENCRYPTION_POLICY_EX */
+struct fscrypt_get_policy_ex_arg {
+       __u64 policy_size; /* input/output */
+       union {
+               __u8 version;
+               struct fscrypt_policy_v1 v1;
+               struct fscrypt_policy_v2 v2;
+       } policy; /* output */
+};
+
+/*
+ * v1 policy keys are specified by an arbitrary 8-byte key "descriptor",
+ * matching fscrypt_policy_v1::master_key_descriptor.
+ */
+#define FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR       1
+
+/*
+ * v2 policy keys are specified by a 16-byte key "identifier" which the kernel
+ * calculates as a cryptographic hash of the key itself,
+ * matching fscrypt_policy_v2::master_key_identifier.
+ */
+#define FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER       2
+
+/*
+ * Specifies a key, either for v1 or v2 policies.  This doesn't contain the
+ * actual key itself; this is just the "name" of the key.
+ */
+struct fscrypt_key_specifier {
+       __u32 type;     /* one of FSCRYPT_KEY_SPEC_TYPE_* */
+       __u32 __reserved;
+       union {
+               __u8 __reserved[32]; /* reserve some extra space */
+               __u8 descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
+               __u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
+       } u;
+};
+
+/* Struct passed to FS_IOC_ADD_ENCRYPTION_KEY */
+struct fscrypt_add_key_arg {
+       struct fscrypt_key_specifier key_spec;
+       __u32 raw_size;
+       __u32 __reserved[9];
+       __u8 raw[];
+};
+
+/* Struct passed to FS_IOC_REMOVE_ENCRYPTION_KEY */
+struct fscrypt_remove_key_arg {
+       struct fscrypt_key_specifier key_spec;
+#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY     0x00000001
+#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS    0x00000002
+       __u32 removal_status_flags;     /* output */
+       __u32 __reserved[5];
+};
+
+/* Struct passed to FS_IOC_GET_ENCRYPTION_KEY_STATUS */
+struct fscrypt_get_key_status_arg {
+       /* input */
+       struct fscrypt_key_specifier key_spec;
+       __u32 __reserved[6];
+
+       /* output */
+#define FSCRYPT_KEY_STATUS_ABSENT              1
+#define FSCRYPT_KEY_STATUS_PRESENT             2
+#define FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED        3
+       __u32 status;
+#define FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF   0x00000001
+       __u32 status_flags;
+       __u32 user_count;
+       __u32 __out_reserved[13];
+};
+
+#define FS_IOC_SET_ENCRYPTION_POLICY           _IOR('f', 19, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_PWSALT           _IOW('f', 20, __u8[16])
+#define FS_IOC_GET_ENCRYPTION_POLICY           _IOW('f', 21, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_POLICY_EX                _IOWR('f', 22, __u8[9]) /* size + version */
+#define FS_IOC_ADD_ENCRYPTION_KEY              _IOWR('f', 23, struct fscrypt_add_key_arg)
+#define FS_IOC_REMOVE_ENCRYPTION_KEY           _IOWR('f', 24, struct fscrypt_remove_key_arg)
+#define FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS _IOWR('f', 25, struct fscrypt_remove_key_arg)
+#define FS_IOC_GET_ENCRYPTION_KEY_STATUS       _IOWR('f', 26, struct fscrypt_get_key_status_arg)
+
+/**********************************************************************/
+
+/* old names; don't add anything new here! */
+#ifndef __KERNEL__
+#define FS_KEY_DESCRIPTOR_SIZE         FSCRYPT_KEY_DESCRIPTOR_SIZE
+#define FS_POLICY_FLAGS_PAD_4          FSCRYPT_POLICY_FLAGS_PAD_4
+#define FS_POLICY_FLAGS_PAD_8          FSCRYPT_POLICY_FLAGS_PAD_8
+#define FS_POLICY_FLAGS_PAD_16         FSCRYPT_POLICY_FLAGS_PAD_16
+#define FS_POLICY_FLAGS_PAD_32         FSCRYPT_POLICY_FLAGS_PAD_32
+#define FS_POLICY_FLAGS_PAD_MASK       FSCRYPT_POLICY_FLAGS_PAD_MASK
+#define FS_POLICY_FLAG_DIRECT_KEY      FSCRYPT_POLICY_FLAG_DIRECT_KEY
+#define FS_POLICY_FLAGS_VALID          FSCRYPT_POLICY_FLAGS_VALID
+#define FS_ENCRYPTION_MODE_INVALID     0       /* never used */
+#define FS_ENCRYPTION_MODE_AES_256_XTS FSCRYPT_MODE_AES_256_XTS
+#define FS_ENCRYPTION_MODE_AES_256_GCM 2       /* never used */
+#define FS_ENCRYPTION_MODE_AES_256_CBC 3       /* never used */
+#define FS_ENCRYPTION_MODE_AES_256_CTS FSCRYPT_MODE_AES_256_CTS
+#define FS_ENCRYPTION_MODE_AES_128_CBC FSCRYPT_MODE_AES_128_CBC
+#define FS_ENCRYPTION_MODE_AES_128_CTS FSCRYPT_MODE_AES_128_CTS
+#define FS_ENCRYPTION_MODE_SPECK128_256_XTS    7       /* removed */
+#define FS_ENCRYPTION_MODE_SPECK128_256_CTS    8       /* removed */
+#define FS_ENCRYPTION_MODE_ADIANTUM    FSCRYPT_MODE_ADIANTUM
+#define FS_KEY_DESC_PREFIX             FSCRYPT_KEY_DESC_PREFIX
+#define FS_KEY_DESC_PREFIX_SIZE                FSCRYPT_KEY_DESC_PREFIX_SIZE
+#define FS_MAX_KEY_SIZE                        FSCRYPT_MAX_KEY_SIZE
+#endif /* !__KERNEL__ */
+
+#endif /* _UAPI_LINUX_FSCRYPT_H */
index 5e3f12d..52641d8 100644 (file)
@@ -243,6 +243,8 @@ struct kvm_hyperv_exit {
 #define KVM_INTERNAL_ERROR_SIMUL_EX    2
 /* Encounter unexpected vm-exit due to delivery event. */
 #define KVM_INTERNAL_ERROR_DELIVERY_EV 3
+/* Encounter unexpected vm-exit reason */
+#define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON      4
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
@@ -996,6 +998,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171
 #define KVM_CAP_ARM_PTRAUTH_GENERIC 172
 #define KVM_CAP_PMU_EVENT_FILTER 173
+#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
+#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1142,6 +1146,7 @@ struct kvm_dirty_tlb {
 #define KVM_REG_S390           0x5000000000000000ULL
 #define KVM_REG_ARM64          0x6000000000000000ULL
 #define KVM_REG_MIPS           0x7000000000000000ULL
+#define KVM_REG_RISCV          0x8000000000000000ULL
 
 #define KVM_REG_SIZE_SHIFT     52
 #define KVM_REG_SIZE_MASK      0x00f0000000000000ULL
index 094bb03..7da1b37 100644 (file)
@@ -181,7 +181,7 @@ struct prctl_mm_map {
 #define PR_GET_THP_DISABLE     42
 
 /*
- * Tell the kernel to start/stop helping userspace manage bounds tables.
+ * No longer implemented, but left here to ensure the numbers stay reserved:
  */
 #define PR_MPX_ENABLE_MANAGEMENT  43
 #define PR_MPX_DISABLE_MANAGEMENT 44
@@ -229,4 +229,9 @@ struct prctl_mm_map {
 # define PR_PAC_APDBKEY                        (1UL << 3)
 # define PR_PAC_APGAKEY                        (1UL << 4)
 
+/* Tagged user address controls for arm64 */
+#define PR_SET_TAGGED_ADDR_CTRL                55
+#define PR_GET_TAGGED_ADDR_CTRL                56
+# define PR_TAGGED_ADDR_ENABLE         (1UL << 0)
+
 #endif /* _LINUX_PRCTL_H */
index b3105ac..99335e1 100644 (file)
 #define CLONE_NEWNET           0x40000000      /* New network namespace */
 #define CLONE_IO               0x80000000      /* Clone io context */
 
-/*
- * Arguments for the clone3 syscall
+#ifndef __ASSEMBLY__
+/**
+ * struct clone_args - arguments for the clone3 syscall
+ * @flags:       Flags for the new process as listed above.
+ *               All flags are valid except for CSIGNAL and
+ *               CLONE_DETACHED.
+ * @pidfd:       If CLONE_PIDFD is set, a pidfd will be
+ *               returned in this argument.
+ * @child_tid:   If CLONE_CHILD_SETTID is set, the TID of the
+ *               child process will be returned in the child's
+ *               memory.
+ * @parent_tid:  If CLONE_PARENT_SETTID is set, the TID of
+ *               the child process will be returned in the
+ *               parent's memory.
+ * @exit_signal: The exit_signal the parent process will be
+ *               sent when the child exits.
+ * @stack:       Specify the location of the stack for the
+ *               child process.
+ * @stack_size:  The size of the stack for the child process.
+ * @tls:         If CLONE_SETTLS is set, the tls descriptor
+ *               is set to tls.
+ *
+ * The structure is versioned by size and thus extensible.
+ * New struct members must go at the end of the struct and
+ * must be properly 64bit aligned.
  */
 struct clone_args {
        __aligned_u64 flags;
@@ -46,6 +69,9 @@ struct clone_args {
        __aligned_u64 stack_size;
        __aligned_u64 tls;
 };
+#endif
+
+#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
 
 /*
  * Scheduling policies
index 78efe87..cf525cd 100644 (file)
@@ -158,6 +158,7 @@ struct usbdevfs_hub_portinfo {
 #define USBDEVFS_CAP_MMAP                      0x20
 #define USBDEVFS_CAP_DROP_PRIVILEGES           0x40
 #define USBDEVFS_CAP_CONNINFO_EX               0x80
+#define USBDEVFS_CAP_SUSPEND                   0x100
 
 /* USBDEVFS_DISCONNECT_CLAIM flags & struct */
 
@@ -223,5 +224,8 @@ struct usbdevfs_streams {
  * extending size of the data returned.
  */
 #define USBDEVFS_CONNINFO_EX(len)  _IOC(_IOC_READ, 'U', 32, len)
+#define USBDEVFS_FORBID_SUSPEND    _IO('U', 33)
+#define USBDEVFS_ALLOW_SUSPEND     _IO('U', 34)
+#define USBDEVFS_WAIT_FOR_RESUME   _IO('U', 35)
 
 #endif /* _UAPI_LINUX_USBDEVICE_FS_H */
index c6f94cf..56ce629 100644 (file)
@@ -8,7 +8,11 @@ LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION)))
 
 MAKEFLAGS += --no-print-directory
 
-ifeq ($(srctree),)
+# This will work when bpf is built in tools env. where srctree
+# isn't set and when invoked from selftests build, where srctree
+# is a ".". building_out_of_srctree is undefined for in srctree
+# builds
+ifndef building_out_of_srctree
 srctree := $(patsubst %/,%,$(dir $(CURDIR)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
@@ -110,6 +114,9 @@ override CFLAGS += $(INCLUDES)
 override CFLAGS += -fvisibility=hidden
 override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
 
+# flags specific for shared library
+SHLIB_FLAGS := -DSHARED
+
 ifeq ($(VERBOSE),1)
   Q =
 else
@@ -126,14 +133,17 @@ all:
 export srctree OUTPUT CC LD CFLAGS V
 include $(srctree)/tools/build/Makefile.include
 
-BPF_IN         := $(OUTPUT)libbpf-in.o
+SHARED_OBJDIR  := $(OUTPUT)sharedobjs/
+STATIC_OBJDIR  := $(OUTPUT)staticobjs/
+BPF_IN_SHARED  := $(SHARED_OBJDIR)libbpf-in.o
+BPF_IN_STATIC  := $(STATIC_OBJDIR)libbpf-in.o
 VERSION_SCRIPT := libbpf.map
 
 LIB_TARGET     := $(addprefix $(OUTPUT),$(LIB_TARGET))
 LIB_FILE       := $(addprefix $(OUTPUT),$(LIB_FILE))
 PC_FILE                := $(addprefix $(OUTPUT),$(PC_FILE))
 
-GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \
+GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
                           cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
                           awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}' | \
                           sort -u | wc -l)
@@ -155,7 +165,7 @@ all: fixdep
 
 all_cmd: $(CMD_TARGETS) check
 
-$(BPF_IN): force elfdep bpfdep
+$(BPF_IN_SHARED): force elfdep bpfdep
        @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \
        (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \
        echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true
@@ -171,17 +181,20 @@ $(BPF_IN): force elfdep bpfdep
        @(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \
        (diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \
        echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true
-       $(Q)$(MAKE) $(build)=libbpf
+       $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)"
+
+$(BPF_IN_STATIC): force elfdep bpfdep
+       $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
 
 $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
 
-$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN)
+$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
        $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
                                    -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@
        @ln -sf $(@F) $(OUTPUT)libbpf.so
        @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
 
-$(OUTPUT)libbpf.a: $(BPF_IN)
+$(OUTPUT)libbpf.a: $(BPF_IN_STATIC)
        $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
 
 $(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a
@@ -197,7 +210,7 @@ check: check_abi
 
 check_abi: $(OUTPUT)libbpf.so
        @if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then  \
-               echo "Warning: Num of global symbols in $(BPF_IN)"       \
+               echo "Warning: Num of global symbols in $(BPF_IN_SHARED)"        \
                     "($(GLOBAL_SYM_COUNT)) does NOT match with num of"  \
                     "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \
                     "Please make sure all LIBBPF_API symbols are"       \
@@ -255,9 +268,9 @@ config-clean:
        $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
 
 clean:
-       $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \
+       $(call QUIET_CLEAN, libbpf) $(RM) -rf $(TARGETS) $(CXX_TEST_TARGET) \
                *.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \
-               *.pc LIBBPF-CFLAGS
+               *.pc LIBBPF-CFLAGS $(SHARED_OBJDIR) $(STATIC_OBJDIR)
        $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
 
 
index 7159677..ede55fe 100644 (file)
@@ -48,6 +48,8 @@ struct btf_dump_type_aux_state {
        __u8 fwd_emitted: 1;
        /* whether unique non-duplicate name was already assigned */
        __u8 name_resolved: 1;
+       /* whether type is referenced from any other type */
+       __u8 referenced: 1;
 };
 
 struct btf_dump {
@@ -173,6 +175,7 @@ void btf_dump__free(struct btf_dump *d)
        free(d);
 }
 
+static int btf_dump_mark_referenced(struct btf_dump *d);
 static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
 static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
 
@@ -213,6 +216,11 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
                /* VOID is special */
                d->type_states[0].order_state = ORDERED;
                d->type_states[0].emit_state = EMITTED;
+
+               /* eagerly determine referenced types for anon enums */
+               err = btf_dump_mark_referenced(d);
+               if (err)
+                       return err;
        }
 
        d->emit_queue_cnt = 0;
@@ -226,6 +234,79 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
        return 0;
 }
 
+/*
+ * Mark all types that are referenced from any other type. This is used to
+ * determine top-level anonymous enums that need to be emitted as an
+ * independent type declarations.
+ * Anonymous enums come in two flavors: either embedded in a struct's field
+ * definition, in which case they have to be declared inline as part of field
+ * type declaration; or as a top-level anonymous enum, typically used for
+ * declaring global constants. It's impossible to distinguish between two
+ * without knowning whether given enum type was referenced from other type:
+ * top-level anonymous enum won't be referenced by anything, while embedded
+ * one will.
+ */
+static int btf_dump_mark_referenced(struct btf_dump *d)
+{
+       int i, j, n = btf__get_nr_types(d->btf);
+       const struct btf_type *t;
+       __u16 vlen;
+
+       for (i = 1; i <= n; i++) {
+               t = btf__type_by_id(d->btf, i);
+               vlen = btf_vlen(t);
+
+               switch (btf_kind(t)) {
+               case BTF_KIND_INT:
+               case BTF_KIND_ENUM:
+               case BTF_KIND_FWD:
+                       break;
+
+               case BTF_KIND_VOLATILE:
+               case BTF_KIND_CONST:
+               case BTF_KIND_RESTRICT:
+               case BTF_KIND_PTR:
+               case BTF_KIND_TYPEDEF:
+               case BTF_KIND_FUNC:
+               case BTF_KIND_VAR:
+                       d->type_states[t->type].referenced = 1;
+                       break;
+
+               case BTF_KIND_ARRAY: {
+                       const struct btf_array *a = btf_array(t);
+
+                       d->type_states[a->index_type].referenced = 1;
+                       d->type_states[a->type].referenced = 1;
+                       break;
+               }
+               case BTF_KIND_STRUCT:
+               case BTF_KIND_UNION: {
+                       const struct btf_member *m = btf_members(t);
+
+                       for (j = 0; j < vlen; j++, m++)
+                               d->type_states[m->type].referenced = 1;
+                       break;
+               }
+               case BTF_KIND_FUNC_PROTO: {
+                       const struct btf_param *p = btf_params(t);
+
+                       for (j = 0; j < vlen; j++, p++)
+                               d->type_states[p->type].referenced = 1;
+                       break;
+               }
+               case BTF_KIND_DATASEC: {
+                       const struct btf_var_secinfo *v = btf_var_secinfos(t);
+
+                       for (j = 0; j < vlen; j++, v++)
+                               d->type_states[v->type].referenced = 1;
+                       break;
+               }
+               default:
+                       return -EINVAL;
+               }
+       }
+       return 0;
+}
 static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
 {
        __u32 *new_queue;
@@ -395,7 +476,12 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
        }
        case BTF_KIND_ENUM:
        case BTF_KIND_FWD:
-               if (t->name_off != 0) {
+               /*
+                * non-anonymous or non-referenced enums are top-level
+                * declarations and should be emitted. Same logic can be
+                * applied to FWDs, it won't hurt anyways.
+                */
+               if (t->name_off != 0 || !tstate->referenced) {
                        err = btf_dump_add_emit_queue_id(d, id);
                        if (err)
                                return err;
@@ -536,11 +622,6 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
        t = btf__type_by_id(d->btf, id);
        kind = btf_kind(t);
 
-       if (top_level_def && t->name_off == 0) {
-               pr_warning("unexpected nameless definition, id:[%u]\n", id);
-               return;
-       }
-
        if (tstate->emit_state == EMITTING) {
                if (tstate->fwd_emitted)
                        return;
@@ -1167,6 +1248,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
                                return;
                        }
 
+                       next_id = decls->ids[decls->cnt - 1];
                        next_t = btf__type_by_id(d->btf, next_id);
                        multidim = btf_is_array(next_t);
                        /* we need space if we have named non-pointer */
index 2e83a34..98216a6 100644 (file)
        (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD))
 #endif
 
+/* Symbol versioning is different between static and shared library.
+ * Properly versioned symbols are needed for shared library, but
+ * only the symbol of the new version is needed for static library.
+ */
+#ifdef SHARED
+# define COMPAT_VERSION(internal_name, api_name, version) \
+       asm(".symver " #internal_name "," #api_name "@" #version);
+# define DEFAULT_VERSION(internal_name, api_name, version) \
+       asm(".symver " #internal_name "," #api_name "@@" #version);
+#else
+# define COMPAT_VERSION(internal_name, api_name, version)
+# define DEFAULT_VERSION(internal_name, api_name, version) \
+       extern typeof(internal_name) api_name \
+       __attribute__((alias(#internal_name)));
+#endif
+
 extern void libbpf_print(enum libbpf_print_level level,
                         const char *format, ...)
        __attribute__((format(printf, 2, 3)));
index 842c4fd..a902838 100644 (file)
@@ -65,7 +65,6 @@ struct xsk_socket {
        int xsks_map_fd;
        __u32 queue_id;
        char ifname[IFNAMSIZ];
-       bool zc;
 };
 
 struct xsk_nl_info {
@@ -262,8 +261,8 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
        return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
                                        &config);
 }
-asm(".symver xsk_umem__create_v0_0_2, xsk_umem__create@LIBBPF_0.0.2");
-asm(".symver xsk_umem__create_v0_0_4, xsk_umem__create@@LIBBPF_0.0.4");
+COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
+DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
 
 static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 {
@@ -491,7 +490,6 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
        void *rx_map = NULL, *tx_map = NULL;
        struct sockaddr_xdp sxdp = {};
        struct xdp_mmap_offsets off;
-       struct xdp_options opts;
        struct xsk_socket *xsk;
        socklen_t optlen;
        int err;
@@ -611,15 +609,6 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 
        xsk->prog_fd = -1;
 
-       optlen = sizeof(opts);
-       err = getsockopt(xsk->fd, SOL_XDP, XDP_OPTIONS, &opts, &optlen);
-       if (err) {
-               err = -errno;
-               goto out_mmap_tx;
-       }
-
-       xsk->zc = opts.flags & XDP_OPTIONS_ZEROCOPY;
-
        if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
                err = xsk_setup_xdp_prog(xsk);
                if (err)
index 804f145..2548ff8 100644 (file)
@@ -83,14 +83,10 @@ __rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
 
 static __always_inline void
 __rb_insert(struct rb_node *node, struct rb_root *root,
-           bool newleft, struct rb_node **leftmost,
            void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
 {
        struct rb_node *parent = rb_red_parent(node), *gparent, *tmp;
 
-       if (newleft)
-               *leftmost = node;
-
        while (true) {
                /*
                 * Loop invariant: node is red.
@@ -436,34 +432,17 @@ static const struct rb_augment_callbacks dummy_callbacks = {
 
 void rb_insert_color(struct rb_node *node, struct rb_root *root)
 {
-       __rb_insert(node, root, false, NULL, dummy_rotate);
+       __rb_insert(node, root, dummy_rotate);
 }
 
 void rb_erase(struct rb_node *node, struct rb_root *root)
 {
        struct rb_node *rebalance;
-       rebalance = __rb_erase_augmented(node, root,
-                                        NULL, &dummy_callbacks);
+       rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
        if (rebalance)
                ____rb_erase_color(rebalance, root, dummy_rotate);
 }
 
-void rb_insert_color_cached(struct rb_node *node,
-                           struct rb_root_cached *root, bool leftmost)
-{
-       __rb_insert(node, &root->rb_root, leftmost,
-                   &root->rb_leftmost, dummy_rotate);
-}
-
-void rb_erase_cached(struct rb_node *node, struct rb_root_cached *root)
-{
-       struct rb_node *rebalance;
-       rebalance = __rb_erase_augmented(node, &root->rb_root,
-                                        &root->rb_leftmost, &dummy_callbacks);
-       if (rebalance)
-               ____rb_erase_color(rebalance, &root->rb_root, dummy_rotate);
-}
-
 /*
  * Augmented rbtree manipulation functions.
  *
@@ -472,10 +451,9 @@ void rb_erase_cached(struct rb_node *node, struct rb_root_cached *root)
  */
 
 void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
-                          bool newleft, struct rb_node **leftmost,
        void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
 {
-       __rb_insert(node, root, newleft, leftmost, augment_rotate);
+       __rb_insert(node, root, augment_rotate);
 }
 
 /*
@@ -580,15 +558,6 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new,
        __rb_change_child(victim, new, parent, root);
 }
 
-void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new,
-                           struct rb_root_cached *root)
-{
-       rb_replace_node(victim, new, &root->rb_root);
-
-       if (root->rb_leftmost == victim)
-               root->rb_leftmost = new;
-}
-
 static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
 {
        for (;;) {
index ed61fb3..5b2cd5e 100644 (file)
@@ -20,7 +20,13 @@ MAKEFLAGS += --no-print-directory
 LIBFILE = $(OUTPUT)libsubcmd.a
 
 CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
-CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -fPIC
+
+ifeq ($(DEBUG),0)
+  ifeq ($(feature-fortify-source), 1)
+    CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2
+  endif
+endif
 
 ifeq ($(CC_NO_CLANG), 0)
   CFLAGS += -O3
index ba54bfc..f9a5d79 100644 (file)
@@ -6,14 +6,3 @@ libtraceevent-y += parse-utils.o
 libtraceevent-y += kbuffer-parse.o
 libtraceevent-y += tep_strerror.o
 libtraceevent-y += event-parse-api.o
-
-plugin_jbd2-y         += plugin_jbd2.o
-plugin_hrtimer-y      += plugin_hrtimer.o
-plugin_kmem-y         += plugin_kmem.o
-plugin_kvm-y          += plugin_kvm.o
-plugin_mac80211-y     += plugin_mac80211.o
-plugin_sched_switch-y += plugin_sched_switch.o
-plugin_function-y     += plugin_function.o
-plugin_xen-y          += plugin_xen.o
-plugin_scsi-y         += plugin_scsi.o
-plugin_cfg80211-y     += plugin_cfg80211.o
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt
new file mode 100644 (file)
index 0000000..2c6a618
--- /dev/null
@@ -0,0 +1,130 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_print_event - Writes event information into a trace sequence.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+*#include <trace-seq.h>*
+
+void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seqpass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._)
+--
+
+DESCRIPTION
+-----------
+
+The _tep_print_event()_ function parses the event information of the given
+_record_ and writes it into the trace sequence _s_, according to the format
+string _fmt_. The desired information is specified after the format string.
+The _fmt_ is printf-like format string, following arguments are supported:
+[verse]
+--
+       TEP_PRINT_PID, "%d"  - PID of the event.
+       TEP_PRINT_CPU, "%d"  - Event CPU.
+       TEP_PRINT_COMM, "%s" - Event command string.
+       TEP_PRINT_NAME, "%s" - Event name.
+       TEP_PRINT_LATENCY, "%s" - Latency of the event. It prints 4 or more
+                       fields - interrupt state, scheduling state,
+                       current context, and preemption count.
+                       Field 1 is the interrupt enabled state:
+                               d : Interrupts are disabled
+                               . : Interrupts are enabled
+                               X : The architecture does not support this
+                                   information
+                       Field 2 is the "need resched" state.
+                               N : The task is set to call the scheduler when
+                                   possible, as another higher priority task
+                                   may need to be scheduled in.
+                               . : The task is not set to call the scheduler.
+                       Field 3 is the context state.
+                               . : Normal context
+                               s : Soft interrupt context
+                               h : Hard interrupt context
+                               H : Hard interrupt context which triggered
+                                   during soft interrupt context.
+                               z : NMI context
+                               Z : NMI context which triggered during hard
+                                   interrupt context
+                       Field 4 is the preemption count.
+                               . : The preempt count is zero.
+                       On preemptible kernels (where the task can be scheduled
+                       out in arbitrary locations while in kernel context), the
+                       preempt count, when non zero, will prevent the kernel
+                       from scheduling out the current task. The preempt count
+                       number is displayed when it is not zero.
+                       Depending on the kernel, it may show other fields
+                       (lock depth, or migration disabled, which are unique to
+                       specialized kernels).
+       TEP_PRINT_TIME, %d - event time stamp. A divisor and precision can be
+                       specified as part of this format string:
+                       "%precision.divisord". Example:
+                       "%3.1000d" - divide the time by 1000 and print the first
+                       3 digits before the dot. Thus, the time stamp
+                       "123456000" will be printed as "123.456"
+       TEP_PRINT_INFO, "%s" - event information.
+       TEP_PRINT_INFO_RAW, "%s" - event information, in raw format.
+
+--
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+#include <trace-seq.h>
+...
+struct trace_seq seq;
+trace_seq_init(&seq);
+struct tep_handle *tep = tep_alloc();
+...
+void print_my_event(struct tep_record *record)
+{
+       trace_seq_reset(&seq);
+       tep_print_event(tep, s, record, "%16s-%-5d [%03d] %s %6.1000d %s %s",
+                       TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_CPU,
+                       TEP_PRINT_LATENCY, TEP_PRINT_TIME, TEP_PRINT_NAME,
+                       TEP_PRINT_INFO);
+}
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+       Header file to include in order to have access to the library APIs.
+*trace-seq.h*
+       Header file to include in order to have access to trace sequences related APIs.
+       Trace sequences are used to allow a function to call several other functions
+       to create a string of data to use.
+*-ltraceevent*
+       Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
index 38bfea3..f6aca0d 100644 (file)
@@ -59,12 +59,12 @@ parser context.
 
 The _tep_register_function()_ function registers a function name mapped to an
 address and (optional) module. This mapping is used in case the function tracer
-or events have "%pF" or "%pS" parameter in its format string. It is common to
-pass in the kallsyms function names with their corresponding addresses with this
+or events have "%pS" parameter in its format string. It is common to pass in
+the kallsyms function names with their corresponding addresses with this
 function. The _tep_ argument is the trace event parser context. The _name_ is
-the name of the function, the string is copied internally. The _addr_ is
-the start address of the function. The _mod_ is the kernel module
-the function may be in (NULL for none).
+the name of the function, the string is copied internally. The _addr_ is the
+start address of the function. The _mod_ is the kernel module the function may
+be in (NULL for none).
 
 The _tep_register_print_string()_ function  registers a string by the address
 it was stored in the kernel. Some strings internal to the kernel with static
index 8d56831..45b2017 100644 (file)
@@ -3,7 +3,7 @@ libtraceevent(3)
 
 NAME
 ----
-tep_alloc, tep_free,tep_ref, tep_unref,tep_ref_get - Create, destroy, manage
+tep_alloc, tep_free,tep_ref, tep_unref,tep_get_ref - Create, destroy, manage
 references of trace event parser context.
 
 SYNOPSIS
@@ -16,7 +16,7 @@ struct tep_handle pass:[*]*tep_alloc*(void);
 void *tep_free*(struct tep_handle pass:[*]_tep_);
 void *tep_ref*(struct tep_handle pass:[*]_tep_);
 void *tep_unref*(struct tep_handle pass:[*]_tep_);
-int *tep_ref_get*(struct tep_handle pass:[*]_tep_);
+int *tep_get_ref*(struct tep_handle pass:[*]_tep_);
 --
 
 DESCRIPTION
@@ -57,9 +57,9 @@ EXAMPLE
 ...
 struct tep_handle *tep = tep_alloc();
 ...
-int ref = tep_ref_get(tep);
+int ref = tep_get_ref(tep);
 tep_ref(tep);
-if ( (ref+1) != tep_ref_get(tep)) {
+if ( (ref+1) != tep_get_ref(tep)) {
        /* Something wrong happened, the counter is not incremented by 1 */
 }
 tep_unref(tep);
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
new file mode 100644 (file)
index 0000000..596032a
--- /dev/null
@@ -0,0 +1,99 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_load_plugins, tep_unload_plugins - Load / unload traceevent plugins.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_);
+void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_);
+--
+
+DESCRIPTION
+-----------
+The _tep_load_plugins()_ function loads all plugins, located in the plugin
+directories. The _tep_ argument is trace event parser context.
+The plugin directories are :
+[verse]
+--
+       - System's plugin directory, defined at the library compile time. It
+         depends on the library installation prefix and usually is
+         _(install_preffix)/lib/traceevent/plugins_
+       - Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_
+       - User's plugin directory, located at _~/.local/lib/traceevent/plugins_
+--
+Loading of plugins can be controlled by the _tep_flags_, using the
+_tep_set_flag()_ API:
+[verse]
+--
+       _TEP_DISABLE_SYS_PLUGINS_       - do not load plugins, located in
+                                       the system's plugin directory.
+       _TEP_DISABLE_PLUGINS_           - do not load any plugins.
+--
+The _tep_set_flag()_ API needs to be called before _tep_load_plugins()_, if
+loading of all plugins is not the desired case.
+
+The _tep_unload_plugins()_ function unloads the plugins, previously loaded by
+_tep_load_plugins()_. The _tep_ argument is trace event parser context. The
+_plugin_list_ is the list of loaded plugins, returned by
+the _tep_load_plugins()_ function.
+
+RETURN VALUE
+------------
+The _tep_load_plugins()_ function returns a list of successfully loaded plugins,
+or NULL in case no plugins are loaded.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+struct tep_plugin_list *plugins = tep_load_plugins(tep);
+if (plugins == NULL) {
+       /* no plugins are loaded */
+}
+...
+tep_unload_plugins(plugins, tep);
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+       Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+       Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_, _tep_set_flag(3)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
index fbd977b..d530a7c 100644 (file)
@@ -16,7 +16,7 @@ Management of tep handler data structure and access of its members:
        void *tep_free*(struct tep_handle pass:[*]_tep_);
        void *tep_ref*(struct tep_handle pass:[*]_tep_);
        void *tep_unref*(struct tep_handle pass:[*]_tep_);
-       int *tep_ref_get*(struct tep_handle pass:[*]_tep_);
+       int *tep_get_ref*(struct tep_handle pass:[*]_tep_);
        void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
        void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
        bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flags_);
@@ -26,15 +26,12 @@ Management of tep handler data structure and access of its members:
        void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_);
        int *tep_get_page_size*(struct tep_handle pass:[*]_tep_);
        void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_);
-       bool *tep_is_latency_format*(struct tep_handle pass:[*]_tep_);
-       void *tep_set_latency_format*(struct tep_handle pass:[*]_tep_, int _lat_);
        int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_);
        int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_);
        bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_);
        int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_);
 
 Register / unregister APIs:
-       int *tep_register_trace_clock*(struct tep_handle pass:[*]_tep_, const char pass:[*]_trace_clock_);
        int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_);
        int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_);
        int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_);
@@ -57,14 +54,7 @@ Event related APIs:
        int *tep_get_events_count*(struct tep_handle pass:[*]_tep_);
        struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
        struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
-
-Event printing:
-       void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_, bool _use_trace_clock_);
-       void *tep_print_event_data*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]_record_);
-       void *tep_event_info*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]_record_);
-       void *tep_print_event_task*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]_record_);
-       void *tep_print_event_time*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]record, bool _use_trace_clock_);
-       void *tep_set_print_raw*(struct tep_handle pass:[*]_tep_, int _print_raw_);
+       void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._);
 
 Event finding:
        struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_);
@@ -116,7 +106,6 @@ Filter management:
        int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_);
 
 Parsing various data from the records:
-       void *tep_data_latency_format*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_);
        int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
        int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
        int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
index a39cdd0..5315f37 100644 (file)
@@ -58,30 +58,6 @@ export man_dir man_dir_SQ INSTALL
 export DESTDIR DESTDIR_SQ
 export EVENT_PARSE_VERSION
 
-set_plugin_dir := 1
-
-# Set plugin_dir to preffered global plugin location
-# If we install under $HOME directory we go under
-# $(HOME)/.local/lib/traceevent/plugins
-#
-# We dont set PLUGIN_DIR in case we install under $HOME
-# directory, because by default the code looks under:
-# $(HOME)/.local/lib/traceevent/plugins by default.
-#
-ifeq ($(plugin_dir),)
-ifeq ($(prefix),$(HOME))
-override plugin_dir = $(HOME)/.local/lib/traceevent/plugins
-set_plugin_dir := 0
-else
-override plugin_dir = $(libdir)/traceevent/plugins
-endif
-endif
-
-ifeq ($(set_plugin_dir),1)
-PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)"
-PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))'
-endif
-
 include ../../scripts/Makefile.include
 
 # copy a bit from Linux kbuild
@@ -105,7 +81,6 @@ export prefix libdir src obj
 # Shell quotes
 libdir_SQ = $(subst ','\'',$(libdir))
 libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
 
 CONFIG_INCLUDES = 
 CONFIG_LIBS    =
@@ -151,29 +126,14 @@ MAKEOVERRIDES=
 export srctree OUTPUT CC LD CFLAGS V
 build := -f $(srctree)/tools/build/Makefile.build dir=. obj
 
-PLUGINS  = plugin_jbd2.so
-PLUGINS += plugin_hrtimer.so
-PLUGINS += plugin_kmem.so
-PLUGINS += plugin_kvm.so
-PLUGINS += plugin_mac80211.so
-PLUGINS += plugin_sched_switch.so
-PLUGINS += plugin_function.so
-PLUGINS += plugin_xen.so
-PLUGINS += plugin_scsi.so
-PLUGINS += plugin_cfg80211.so
-
-PLUGINS    := $(addprefix $(OUTPUT),$(PLUGINS))
-PLUGINS_IN := $(PLUGINS:.so=-in.o)
-
 TE_IN      := $(OUTPUT)libtraceevent-in.o
 LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
-DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list
 
-CMD_TARGETS = $(LIB_TARGET) $(PLUGINS) $(DYNAMIC_LIST_FILE)
+CMD_TARGETS = $(LIB_TARGET)
 
 TARGETS = $(CMD_TARGETS)
 
-all: all_cmd
+all: all_cmd plugins
 
 all_cmd: $(CMD_TARGETS)
 
@@ -188,17 +148,6 @@ $(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN)
 $(OUTPUT)libtraceevent.a: $(TE_IN)
        $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
 
-$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS)
-       $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@)
-
-plugins: $(PLUGINS)
-
-__plugin_obj = $(notdir $@)
-  plugin_obj = $(__plugin_obj:-in.o=)
-
-$(PLUGINS_IN): force
-       $(Q)$(MAKE) $(build)=$(plugin_obj)
-
 $(OUTPUT)%.so: $(OUTPUT)%-in.o
        $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^
 
@@ -258,25 +207,6 @@ define do_install
        $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
 endef
 
-define do_install_plugins
-       for plugin in $1; do                            \
-         $(call do_install,$$plugin,$(plugin_dir_SQ)); \
-       done
-endef
-
-define do_generate_dynamic_list_file
-       symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \
-       xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\
-       if [ "$$symbol_type" = "U W" ];then                             \
-               (echo '{';                                              \
-               $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\
-               echo '};';                                              \
-               ) > $2;                                                 \
-       else                                                            \
-               (echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\
-       fi
-endef
-
 PKG_CONFIG_FILE = libtraceevent.pc
 define do_install_pkgconfig_file
        if [ -n "${pkgconfig_dir}" ]; then                                      \
@@ -296,10 +226,6 @@ install_lib: all_cmd install_plugins install_headers install_pkgconfig
                $(call do_install_mkdir,$(libdir_SQ)); \
                cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ)
 
-install_plugins: $(PLUGINS)
-       $(call QUIET_INSTALL, trace_plugins) \
-               $(call do_install_plugins, $(PLUGINS))
-
 install_pkgconfig:
        $(call QUIET_INSTALL, $(PKG_CONFIG_FILE)) \
                $(call do_install_pkgconfig_file,$(prefix))
@@ -313,7 +239,7 @@ install_headers:
 
 install: install_lib
 
-clean:
+clean: clean_plugins
        $(call QUIET_CLEAN, libtraceevent) \
                $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \
                $(RM) TRACEEVENT-CFLAGS tags TAGS; \
@@ -351,7 +277,19 @@ help:
        @echo '  doc-install         - install the man pages'
        @echo '  doc-uninstall       - uninstall the man pages'
        @echo''
-PHONY += force plugins
+
+PHONY += plugins
+plugins:
+       $(call descend,plugins)
+
+PHONY += install_plugins
+install_plugins:
+       $(call descend,plugins,install)
+
+PHONY += clean_plugins
+clean_plugins:
+       $(call descend,plugins,clean)
+
 force:
 
 # Declare the contents of the .PHONY variable as phony.  We keep that
index bb22238..d948475 100644 (file)
@@ -4367,10 +4367,20 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s
                                        switch (*ptr) {
                                        case 's':
                                        case 'S':
-                                       case 'f':
-                                       case 'F':
                                        case 'x':
                                                break;
+                                       case 'f':
+                                       case 'F':
+                                               /*
+                                                * Pre-5.5 kernels use %pf and
+                                                * %pF for printing symbols
+                                                * while kernels since 5.5 use
+                                                * %pfw for fwnodes. So check
+                                                * %p[fF] isn't followed by 'w'.
+                                                */
+                                               if (ptr[1] != 'w')
+                                                       break;
+                                               /* fall through */
                                        default:
                                                /*
                                                 * Older kernels do not process
@@ -4487,12 +4497,12 @@ get_bprint_format(void *data, int size __maybe_unused,
 
        printk = find_printk(tep, addr);
        if (!printk) {
-               if (asprintf(&format, "%%pf: (NO FORMAT FOUND at %llx)\n", addr) < 0)
+               if (asprintf(&format, "%%ps: (NO FORMAT FOUND at %llx)\n", addr) < 0)
                        return NULL;
                return format;
        }
 
-       if (asprintf(&format, "%s: %s", "%pf", printk->printk) < 0)
+       if (asprintf(&format, "%s: %s", "%ps", printk->printk) < 0)
                return NULL;
 
        return format;
@@ -5517,8 +5527,10 @@ static void print_event_time(struct tep_handle *tep, struct trace_seq *s,
        if (divstr && isdigit(*(divstr + 1)))
                div = atoi(divstr + 1);
        time = record->ts;
-       if (div)
+       if (div) {
+               time += div / 2;
                time /= div;
+       }
        pr = prec;
        while (pr--)
                p10 *= 10;
index d438ee4..b77837f 100644 (file)
@@ -441,6 +441,8 @@ int tep_register_print_string(struct tep_handle *tep, const char *fmt,
                              unsigned long long addr);
 bool tep_is_pid_registered(struct tep_handle *tep, int pid);
 
+struct tep_event *tep_get_event(struct tep_handle *tep, int index);
+
 #define TEP_PRINT_INFO         "INFO"
 #define TEP_PRINT_INFO_RAW     "INFO_RAW"
 #define TEP_PRINT_COMM         "COMM"
diff --git a/tools/lib/traceevent/plugin_cfg80211.c b/tools/lib/traceevent/plugin_cfg80211.c
deleted file mode 100644 (file)
index 3d43b56..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <string.h>
-#include <inttypes.h>
-#include <endian.h>
-#include "event-parse.h"
-
-/*
- * From glibc endian.h, for older systems where it is not present, e.g.: RHEL5,
- * Fedora6.
- */
-#ifndef le16toh
-# if __BYTE_ORDER == __LITTLE_ENDIAN
-#  define le16toh(x) (x)
-# else
-#  define le16toh(x) __bswap_16 (x)
-# endif
-#endif
-
-
-static unsigned long long
-process___le16_to_cpup(struct trace_seq *s, unsigned long long *args)
-{
-       uint16_t *val = (uint16_t *) (unsigned long) args[0];
-       return val ? (long long) le16toh(*val) : 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-       tep_register_print_function(tep,
-                                   process___le16_to_cpup,
-                                   TEP_FUNC_ARG_INT,
-                                   "__le16_to_cpup",
-                                   TEP_FUNC_ARG_PTR,
-                                   TEP_FUNC_ARG_VOID);
-       return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-       tep_unregister_print_function(tep, process___le16_to_cpup,
-                                     "__le16_to_cpup");
-}
diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugin_function.c
deleted file mode 100644 (file)
index 7770fcb..0000000
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "event-utils.h"
-#include "trace-seq.h"
-
-static struct func_stack {
-       int size;
-       char **stack;
-} *fstack;
-
-static int cpus = -1;
-
-#define STK_BLK 10
-
-struct tep_plugin_option plugin_options[] =
-{
-       {
-               .name = "parent",
-               .plugin_alias = "ftrace",
-               .description =
-               "Print parent of functions for function events",
-       },
-       {
-               .name = "indent",
-               .plugin_alias = "ftrace",
-               .description =
-               "Try to show function call indents, based on parents",
-               .set = 1,
-       },
-       {
-               .name = NULL,
-       }
-};
-
-static struct tep_plugin_option *ftrace_parent = &plugin_options[0];
-static struct tep_plugin_option *ftrace_indent = &plugin_options[1];
-
-static void add_child(struct func_stack *stack, const char *child, int pos)
-{
-       int i;
-
-       if (!child)
-               return;
-
-       if (pos < stack->size)
-               free(stack->stack[pos]);
-       else {
-               char **ptr;
-
-               ptr = realloc(stack->stack, sizeof(char *) *
-                             (stack->size + STK_BLK));
-               if (!ptr) {
-                       warning("could not allocate plugin memory\n");
-                       return;
-               }
-
-               stack->stack = ptr;
-
-               for (i = stack->size; i < stack->size + STK_BLK; i++)
-                       stack->stack[i] = NULL;
-               stack->size += STK_BLK;
-       }
-
-       stack->stack[pos] = strdup(child);
-}
-
-static int add_and_get_index(const char *parent, const char *child, int cpu)
-{
-       int i;
-
-       if (cpu < 0)
-               return 0;
-
-       if (cpu > cpus) {
-               struct func_stack *ptr;
-
-               ptr = realloc(fstack, sizeof(*fstack) * (cpu + 1));
-               if (!ptr) {
-                       warning("could not allocate plugin memory\n");
-                       return 0;
-               }
-
-               fstack = ptr;
-
-               /* Account for holes in the cpu count */
-               for (i = cpus + 1; i <= cpu; i++)
-                       memset(&fstack[i], 0, sizeof(fstack[i]));
-               cpus = cpu;
-       }
-
-       for (i = 0; i < fstack[cpu].size && fstack[cpu].stack[i]; i++) {
-               if (strcmp(parent, fstack[cpu].stack[i]) == 0) {
-                       add_child(&fstack[cpu], child, i+1);
-                       return i;
-               }
-       }
-
-       /* Not found */
-       add_child(&fstack[cpu], parent, 0);
-       add_child(&fstack[cpu], child, 1);
-       return 0;
-}
-
-static int function_handler(struct trace_seq *s, struct tep_record *record,
-                           struct tep_event *event, void *context)
-{
-       struct tep_handle *tep = event->tep;
-       unsigned long long function;
-       unsigned long long pfunction;
-       const char *func;
-       const char *parent;
-       int index = 0;
-
-       if (tep_get_field_val(s, event, "ip", record, &function, 1))
-               return trace_seq_putc(s, '!');
-
-       func = tep_find_function(tep, function);
-
-       if (tep_get_field_val(s, event, "parent_ip", record, &pfunction, 1))
-               return trace_seq_putc(s, '!');
-
-       parent = tep_find_function(tep, pfunction);
-
-       if (parent && ftrace_indent->set)
-               index = add_and_get_index(parent, func, record->cpu);
-
-       trace_seq_printf(s, "%*s", index*3, "");
-
-       if (func)
-               trace_seq_printf(s, "%s", func);
-       else
-               trace_seq_printf(s, "0x%llx", function);
-
-       if (ftrace_parent->set) {
-               trace_seq_printf(s, " <-- ");
-               if (parent)
-                       trace_seq_printf(s, "%s", parent);
-               else
-                       trace_seq_printf(s, "0x%llx", pfunction);
-       }
-
-       return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-       tep_register_event_handler(tep, -1, "ftrace", "function",
-                                  function_handler, NULL);
-
-       tep_plugin_add_options("ftrace", plugin_options);
-
-       return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-       int i, x;
-
-       tep_unregister_event_handler(tep, -1, "ftrace", "function",
-                                    function_handler, NULL);
-
-       for (i = 0; i <= cpus; i++) {
-               for (x = 0; x < fstack[i].size && fstack[i].stack[x]; x++)
-                       free(fstack[i].stack[x]);
-               free(fstack[i].stack);
-       }
-
-       tep_plugin_remove_options(plugin_options);
-
-       free(fstack);
-       fstack = NULL;
-       cpus = -1;
-}
diff --git a/tools/lib/traceevent/plugin_hrtimer.c b/tools/lib/traceevent/plugin_hrtimer.c
deleted file mode 100644 (file)
index bb434e0..0000000
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-static int timer_expire_handler(struct trace_seq *s,
-                               struct tep_record *record,
-                               struct tep_event *event, void *context)
-{
-       trace_seq_printf(s, "hrtimer=");
-
-       if (tep_print_num_field(s, "0x%llx", event, "timer",
-                               record, 0) == -1)
-               tep_print_num_field(s, "0x%llx", event, "hrtimer",
-                                   record, 1);
-
-       trace_seq_printf(s, " now=");
-
-       tep_print_num_field(s, "%llu", event, "now", record, 1);
-
-       tep_print_func_field(s, " function=%s", event, "function",
-                               record, 0);
-       return 0;
-}
-
-static int timer_start_handler(struct trace_seq *s,
-                              struct tep_record *record,
-                              struct tep_event *event, void *context)
-{
-       trace_seq_printf(s, "hrtimer=");
-
-       if (tep_print_num_field(s, "0x%llx", event, "timer",
-                               record, 0) == -1)
-               tep_print_num_field(s, "0x%llx", event, "hrtimer",
-                                   record, 1);
-
-       tep_print_func_field(s, " function=%s", event, "function",
-                            record, 0);
-
-       trace_seq_printf(s, " expires=");
-       tep_print_num_field(s, "%llu", event, "expires", record, 1);
-
-       trace_seq_printf(s, " softexpires=");
-       tep_print_num_field(s, "%llu", event, "softexpires", record, 1);
-       return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-       tep_register_event_handler(tep, -1,
-                                  "timer", "hrtimer_expire_entry",
-                                  timer_expire_handler, NULL);
-
-       tep_register_event_handler(tep, -1, "timer", "hrtimer_start",
-                                  timer_start_handler, NULL);
-       return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-       tep_unregister_event_handler(tep, -1,
-                                    "timer", "hrtimer_expire_entry",
-                                    timer_expire_handler, NULL);
-
-       tep_unregister_event_handler(tep, -1, "timer", "hrtimer_start",
-                                    timer_start_handler, NULL);
-}
diff --git a/tools/lib/traceevent/plugin_jbd2.c b/tools/lib/traceevent/plugin_jbd2.c
deleted file mode 100644 (file)
index 04fc125..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-#define MINORBITS      20
-#define MINORMASK      ((1U << MINORBITS) - 1)
-
-#define MAJOR(dev)     ((unsigned int) ((dev) >> MINORBITS))
-#define MINOR(dev)     ((unsigned int) ((dev) & MINORMASK))
-
-static unsigned long long
-process_jbd2_dev_to_name(struct trace_seq *s, unsigned long long *args)
-{
-       unsigned int dev = args[0];
-
-       trace_seq_printf(s, "%d:%d", MAJOR(dev), MINOR(dev));
-       return 0;
-}
-
-static unsigned long long
-process_jiffies_to_msecs(struct trace_seq *s, unsigned long long *args)
-{
-       unsigned long long jiffies = args[0];
-
-       trace_seq_printf(s, "%lld", jiffies);
-       return jiffies;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-       tep_register_print_function(tep,
-                                   process_jbd2_dev_to_name,
-                                   TEP_FUNC_ARG_STRING,
-                                   "jbd2_dev_to_name",
-                                   TEP_FUNC_ARG_INT,
-                                   TEP_FUNC_ARG_VOID);
-
-       tep_register_print_function(tep,
-                                   process_jiffies_to_msecs,
-                                   TEP_FUNC_ARG_LONG,
-                                   "jiffies_to_msecs",
-                                   TEP_FUNC_ARG_LONG,
-                                   TEP_FUNC_ARG_VOID);
-       return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-       tep_unregister_print_function(tep, process_jbd2_dev_to_name,
-                                     "jbd2_dev_to_name");
-
-       tep_unregister_print_function(tep, process_jiffies_to_msecs,
-                                     "jiffies_to_msecs");
-}
diff --git a/tools/lib/traceevent/plugin_kmem.c b/tools/lib/traceevent/plugin_kmem.c
deleted file mode 100644 (file)
index edaec5d..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-static int call_site_handler(struct trace_seq *s, struct tep_record *record,
-                            struct tep_event *event, void *context)
-{
-       struct tep_format_field *field;
-       unsigned long long val, addr;
-       void *data = record->data;
-       const char *func;
-
-       field = tep_find_field(event, "call_site");
-       if (!field)
-               return 1;
-
-       if (tep_read_number_field(field, data, &val))
-               return 1;
-
-       func = tep_find_function(event->tep, val);
-       if (!func)
-               return 1;
-
-       addr = tep_find_function_address(event->tep, val);
-
-       trace_seq_printf(s, "(%s+0x%x) ", func, (int)(val - addr));
-       return 1;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-       tep_register_event_handler(tep, -1, "kmem", "kfree",
-                                  call_site_handler, NULL);
-
-       tep_register_event_handler(tep, -1, "kmem", "kmalloc",
-                                  call_site_handler, NULL);
-
-       tep_register_event_handler(tep, -1, "kmem", "kmalloc_node",
-                                  call_site_handler, NULL);
-
-       tep_register_event_handler(tep, -1, "kmem", "kmem_cache_alloc",
-                                  call_site_handler, NULL);
-
-       tep_register_event_handler(tep, -1, "kmem",
-                                  "kmem_cache_alloc_node",
-                                  call_site_handler, NULL);
-
-       tep_register_event_handler(tep, -1, "kmem", "kmem_cache_free",
-                                  call_site_handler, NULL);
-       return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-       tep_unregister_event_handler(tep, -1, "kmem", "kfree",
-                                    call_site_handler, NULL);
-
-       tep_unregister_event_handler(tep, -1, "kmem", "kmalloc",
-                                    call_site_handler, NULL);
-
-       tep_unregister_event_handler(tep, -1, "kmem", "kmalloc_node",
-                                    call_site_handler, NULL);
-
-       tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_alloc",
-                                    call_site_handler, NULL);
-
-       tep_unregister_event_handler(tep, -1, "kmem",
-                                    "kmem_cache_alloc_node",
-                                    call_site_handler, NULL);
-
-       tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_free",
-                                    call_site_handler, NULL);
-}
diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugin_kvm.c
deleted file mode 100644 (file)
index c8e6230..0000000
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-#ifdef HAVE_UDIS86
-
-#include <udis86.h>
-
-static ud_t ud;
-
-static void init_disassembler(void)
-{
-       ud_init(&ud);
-       ud_set_syntax(&ud, UD_SYN_ATT);
-}
-
-static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
-                              int cr0_pe, int eflags_vm,
-                              int cs_d, int cs_l)
-{
-       int mode;
-
-       if (!cr0_pe)
-               mode = 16;
-       else if (eflags_vm)
-               mode = 16;
-       else if (cs_l)
-               mode = 64;
-       else if (cs_d)
-               mode = 32;
-       else
-               mode = 16;
-
-       ud_set_pc(&ud, rip);
-       ud_set_mode(&ud, mode);
-       ud_set_input_buffer(&ud, insn, len);
-       ud_disassemble(&ud);
-       return ud_insn_asm(&ud);
-}
-
-#else
-
-static void init_disassembler(void)
-{
-}
-
-static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
-                              int cr0_pe, int eflags_vm,
-                              int cs_d, int cs_l)
-{
-       static char out[15*3+1];
-       int i;
-
-       for (i = 0; i < len; ++i)
-               sprintf(out + i * 3, "%02x ", insn[i]);
-       out[len*3-1] = '\0';
-       return out;
-}
-
-#endif
-
-
-#define VMX_EXIT_REASONS                       \
-       _ER(EXCEPTION_NMI,       0)             \
-       _ER(EXTERNAL_INTERRUPT,  1)             \
-       _ER(TRIPLE_FAULT,        2)             \
-       _ER(PENDING_INTERRUPT,   7)             \
-       _ER(NMI_WINDOW,          8)             \
-       _ER(TASK_SWITCH,         9)             \
-       _ER(CPUID,               10)            \
-       _ER(HLT,                 12)            \
-       _ER(INVD,                13)            \
-       _ER(INVLPG,              14)            \
-       _ER(RDPMC,               15)            \
-       _ER(RDTSC,               16)            \
-       _ER(VMCALL,              18)            \
-       _ER(VMCLEAR,             19)            \
-       _ER(VMLAUNCH,            20)            \
-       _ER(VMPTRLD,             21)            \
-       _ER(VMPTRST,             22)            \
-       _ER(VMREAD,              23)            \
-       _ER(VMRESUME,            24)            \
-       _ER(VMWRITE,             25)            \
-       _ER(VMOFF,               26)            \
-       _ER(VMON,                27)            \
-       _ER(CR_ACCESS,           28)            \
-       _ER(DR_ACCESS,           29)            \
-       _ER(IO_INSTRUCTION,      30)            \
-       _ER(MSR_READ,            31)            \
-       _ER(MSR_WRITE,           32)            \
-       _ER(MWAIT_INSTRUCTION,   36)            \
-       _ER(MONITOR_INSTRUCTION, 39)            \
-       _ER(PAUSE_INSTRUCTION,   40)            \
-       _ER(MCE_DURING_VMENTRY,  41)            \
-       _ER(TPR_BELOW_THRESHOLD, 43)            \
-       _ER(APIC_ACCESS,         44)            \
-       _ER(EOI_INDUCED,         45)            \
-       _ER(EPT_VIOLATION,       48)            \
-       _ER(EPT_MISCONFIG,       49)            \
-       _ER(INVEPT,              50)            \
-       _ER(PREEMPTION_TIMER,    52)            \
-       _ER(WBINVD,              54)            \
-       _ER(XSETBV,              55)            \
-       _ER(APIC_WRITE,          56)            \
-       _ER(INVPCID,             58)            \
-       _ER(PML_FULL,            62)            \
-       _ER(XSAVES,              63)            \
-       _ER(XRSTORS,             64)
-
-#define SVM_EXIT_REASONS \
-       _ER(EXIT_READ_CR0,      0x000)          \
-       _ER(EXIT_READ_CR3,      0x003)          \
-       _ER(EXIT_READ_CR4,      0x004)          \
-       _ER(EXIT_READ_CR8,      0x008)          \
-       _ER(EXIT_WRITE_CR0,     0x010)          \
-       _ER(EXIT_WRITE_CR3,     0x013)          \
-       _ER(EXIT_WRITE_CR4,     0x014)          \
-       _ER(EXIT_WRITE_CR8,     0x018)          \
-       _ER(EXIT_READ_DR0,      0x020)          \
-       _ER(EXIT_READ_DR1,      0x021)          \
-       _ER(EXIT_READ_DR2,      0x022)          \
-       _ER(EXIT_READ_DR3,      0x023)          \
-       _ER(EXIT_READ_DR4,      0x024)          \
-       _ER(EXIT_READ_DR5,      0x025)          \
-       _ER(EXIT_READ_DR6,      0x026)          \
-       _ER(EXIT_READ_DR7,      0x027)          \
-       _ER(EXIT_WRITE_DR0,     0x030)          \
-       _ER(EXIT_WRITE_DR1,     0x031)          \
-       _ER(EXIT_WRITE_DR2,     0x032)          \
-       _ER(EXIT_WRITE_DR3,     0x033)          \
-       _ER(EXIT_WRITE_DR4,     0x034)          \
-       _ER(EXIT_WRITE_DR5,     0x035)          \
-       _ER(EXIT_WRITE_DR6,     0x036)          \
-       _ER(EXIT_WRITE_DR7,     0x037)          \
-       _ER(EXIT_EXCP_BASE,     0x040)          \
-       _ER(EXIT_INTR,          0x060)          \
-       _ER(EXIT_NMI,           0x061)          \
-       _ER(EXIT_SMI,           0x062)          \
-       _ER(EXIT_INIT,          0x063)          \
-       _ER(EXIT_VINTR,         0x064)          \
-       _ER(EXIT_CR0_SEL_WRITE, 0x065)          \
-       _ER(EXIT_IDTR_READ,     0x066)          \
-       _ER(EXIT_GDTR_READ,     0x067)          \
-       _ER(EXIT_LDTR_READ,     0x068)          \
-       _ER(EXIT_TR_READ,       0x069)          \
-       _ER(EXIT_IDTR_WRITE,    0x06a)          \
-       _ER(EXIT_GDTR_WRITE,    0x06b)          \
-       _ER(EXIT_LDTR_WRITE,    0x06c)          \
-       _ER(EXIT_TR_WRITE,      0x06d)          \
-       _ER(EXIT_RDTSC,         0x06e)          \
-       _ER(EXIT_RDPMC,         0x06f)          \
-       _ER(EXIT_PUSHF,         0x070)          \
-       _ER(EXIT_POPF,          0x071)          \
-       _ER(EXIT_CPUID,         0x072)          \
-       _ER(EXIT_RSM,           0x073)          \
-       _ER(EXIT_IRET,          0x074)          \
-       _ER(EXIT_SWINT,         0x075)          \
-       _ER(EXIT_INVD,          0x076)          \
-       _ER(EXIT_PAUSE,         0x077)          \
-       _ER(EXIT_HLT,           0x078)          \
-       _ER(EXIT_INVLPG,        0x079)          \
-       _ER(EXIT_INVLPGA,       0x07a)          \
-       _ER(EXIT_IOIO,          0x07b)          \
-       _ER(EXIT_MSR,           0x07c)          \
-       _ER(EXIT_TASK_SWITCH,   0x07d)          \
-       _ER(EXIT_FERR_FREEZE,   0x07e)          \
-       _ER(EXIT_SHUTDOWN,      0x07f)          \
-       _ER(EXIT_VMRUN,         0x080)          \
-       _ER(EXIT_VMMCALL,       0x081)          \
-       _ER(EXIT_VMLOAD,        0x082)          \
-       _ER(EXIT_VMSAVE,        0x083)          \
-       _ER(EXIT_STGI,          0x084)          \
-       _ER(EXIT_CLGI,          0x085)          \
-       _ER(EXIT_SKINIT,        0x086)          \
-       _ER(EXIT_RDTSCP,        0x087)          \
-       _ER(EXIT_ICEBP,         0x088)          \
-       _ER(EXIT_WBINVD,        0x089)          \
-       _ER(EXIT_MONITOR,       0x08a)          \
-       _ER(EXIT_MWAIT,         0x08b)          \
-       _ER(EXIT_MWAIT_COND,    0x08c)          \
-       _ER(EXIT_NPF,           0x400)          \
-       _ER(EXIT_ERR,           -1)
-
-#define _ER(reason, val)       { #reason, val },
-struct str_values {
-       const char      *str;
-       int             val;
-};
-
-static struct str_values vmx_exit_reasons[] = {
-       VMX_EXIT_REASONS
-       { NULL, -1}
-};
-
-static struct str_values svm_exit_reasons[] = {
-       SVM_EXIT_REASONS
-       { NULL, -1}
-};
-
-static struct isa_exit_reasons {
-       unsigned isa;
-       struct str_values *strings;
-} isa_exit_reasons[] = {
-       { .isa = 1, .strings = vmx_exit_reasons },
-       { .isa = 2, .strings = svm_exit_reasons },
-       { }
-};
-
-static const char *find_exit_reason(unsigned isa, int val)
-{
-       struct str_values *strings = NULL;
-       int i;
-
-       for (i = 0; isa_exit_reasons[i].strings; ++i)
-               if (isa_exit_reasons[i].isa == isa) {
-                       strings = isa_exit_reasons[i].strings;
-                       break;
-               }
-       if (!strings)
-               return "UNKNOWN-ISA";
-       for (i = 0; strings[i].val >= 0; i++)
-               if (strings[i].val == val)
-                       break;
-
-       return strings[i].str;
-}
-
-static int print_exit_reason(struct trace_seq *s, struct tep_record *record,
-                            struct tep_event *event, const char *field)
-{
-       unsigned long long isa;
-       unsigned long long val;
-       const char *reason;
-
-       if (tep_get_field_val(s, event, field, record, &val, 1) < 0)
-               return -1;
-
-       if (tep_get_field_val(s, event, "isa", record, &isa, 0) < 0)
-               isa = 1;
-
-       reason = find_exit_reason(isa, val);
-       if (reason)
-               trace_seq_printf(s, "reason %s", reason);
-       else
-               trace_seq_printf(s, "reason UNKNOWN (%llu)", val);
-       return 0;
-}
-
-static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record,
-                           struct tep_event *event, void *context)
-{
-       unsigned long long info1 = 0, info2 = 0;
-
-       if (print_exit_reason(s, record, event, "exit_reason") < 0)
-               return -1;
-
-       tep_print_num_field(s, " rip 0x%lx", event, "guest_rip", record, 1);
-
-       if (tep_get_field_val(s, event, "info1", record, &info1, 0) >= 0
-           && tep_get_field_val(s, event, "info2", record, &info2, 0) >= 0)
-               trace_seq_printf(s, " info %llx %llx", info1, info2);
-
-       return 0;
-}
-
-#define KVM_EMUL_INSN_F_CR0_PE (1 << 0)
-#define KVM_EMUL_INSN_F_EFL_VM (1 << 1)
-#define KVM_EMUL_INSN_F_CS_D   (1 << 2)
-#define KVM_EMUL_INSN_F_CS_L   (1 << 3)
-
-static int kvm_emulate_insn_handler(struct trace_seq *s,
-                                   struct tep_record *record,
-                                   struct tep_event *event, void *context)
-{
-       unsigned long long rip, csbase, len, flags, failed;
-       int llen;
-       uint8_t *insn;
-       const char *disasm;
-
-       if (tep_get_field_val(s, event, "rip", record, &rip, 1) < 0)
-               return -1;
-
-       if (tep_get_field_val(s, event, "csbase", record, &csbase, 1) < 0)
-               return -1;
-
-       if (tep_get_field_val(s, event, "len", record, &len, 1) < 0)
-               return -1;
-
-       if (tep_get_field_val(s, event, "flags", record, &flags, 1) < 0)
-               return -1;
-
-       if (tep_get_field_val(s, event, "failed", record, &failed, 1) < 0)
-               return -1;
-
-       insn = tep_get_field_raw(s, event, "insn", record, &llen, 1);
-       if (!insn)
-               return -1;
-
-       disasm = disassemble(insn, len, rip,
-                            flags & KVM_EMUL_INSN_F_CR0_PE,
-                            flags & KVM_EMUL_INSN_F_EFL_VM,
-                            flags & KVM_EMUL_INSN_F_CS_D,
-                            flags & KVM_EMUL_INSN_F_CS_L);
-
-       trace_seq_printf(s, "%llx:%llx: %s%s", csbase, rip, disasm,
-                        failed ? " FAIL" : "");
-       return 0;
-}
-
-
-static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_record *record,
-                                           struct tep_event *event, void *context)
-{
-       if (print_exit_reason(s, record, event, "exit_code") < 0)
-               return -1;
-
-       tep_print_num_field(s, " info1 %llx", event, "exit_info1", record, 1);
-       tep_print_num_field(s, " info2 %llx", event, "exit_info2", record, 1);
-       tep_print_num_field(s, " int_info %llx", event, "exit_int_info", record, 1);
-       tep_print_num_field(s, " int_info_err %llx", event, "exit_int_info_err", record, 1);
-
-       return 0;
-}
-
-static int kvm_nested_vmexit_handler(struct trace_seq *s, struct tep_record *record,
-                                    struct tep_event *event, void *context)
-{
-       tep_print_num_field(s, "rip %llx ", event, "rip", record, 1);
-
-       return kvm_nested_vmexit_inject_handler(s, record, event, context);
-}
-
-union kvm_mmu_page_role {
-       unsigned word;
-       struct {
-               unsigned level:4;
-               unsigned cr4_pae:1;
-               unsigned quadrant:2;
-               unsigned direct:1;
-               unsigned access:3;
-               unsigned invalid:1;
-               unsigned nxe:1;
-               unsigned cr0_wp:1;
-               unsigned smep_and_not_wp:1;
-               unsigned smap_and_not_wp:1;
-               unsigned pad_for_nice_hex_output:8;
-               unsigned smm:8;
-       };
-};
-
-static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
-                             struct tep_event *event, void *context)
-{
-       unsigned long long val;
-       static const char *access_str[] = {
-               "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"
-       };
-       union kvm_mmu_page_role role;
-
-       if (tep_get_field_val(s, event, "role", record, &val, 1) < 0)
-               return -1;
-
-       role.word = (int)val;
-
-       /*
-        * We can only use the structure if file is of the same
-        * endianness.
-        */
-       if (tep_is_file_bigendian(event->tep) ==
-           tep_is_local_bigendian(event->tep)) {
-
-               trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s",
-                                role.level,
-                                role.quadrant,
-                                role.direct ? " direct" : "",
-                                access_str[role.access],
-                                role.invalid ? " invalid" : "",
-                                role.cr4_pae ? "" : "!",
-                                role.nxe ? "" : "!",
-                                role.cr0_wp ? "" : "!",
-                                role.smep_and_not_wp ? " smep" : "",
-                                role.smap_and_not_wp ? " smap" : "",
-                                role.smm ? " smm" : "");
-       } else
-               trace_seq_printf(s, "WORD: %08x", role.word);
-
-       tep_print_num_field(s, " root %u ",  event,
-                           "root_count", record, 1);
-
-       if (tep_get_field_val(s, event, "unsync", record, &val, 1) < 0)
-               return -1;
-
-       trace_seq_printf(s, "%s%c",  val ? "unsync" : "sync", 0);
-       return 0;
-}
-
-static int kvm_mmu_get_page_handler(struct trace_seq *s,
-                                   struct tep_record *record,
-                                   struct tep_event *event, void *context)
-{
-       unsigned long long val;
-
-       if (tep_get_field_val(s, event, "created", record, &val, 1) < 0)
-               return -1;
-
-       trace_seq_printf(s, "%s ", val ? "new" : "existing");
-
-       if (tep_get_field_val(s, event, "gfn", record, &val, 1) < 0)
-               return -1;
-
-       trace_seq_printf(s, "sp gfn %llx ", val);
-       return kvm_mmu_print_role(s, record, event, context);
-}
-
-#define PT_WRITABLE_SHIFT 1
-#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
-
-static unsigned long long
-process_is_writable_pte(struct trace_seq *s, unsigned long long *args)
-{
-       unsigned long pte = args[0];
-       return pte & PT_WRITABLE_MASK;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-       init_disassembler();
-
-       tep_register_event_handler(tep, -1, "kvm", "kvm_exit",
-                                  kvm_exit_handler, NULL);
-
-       tep_register_event_handler(tep, -1, "kvm", "kvm_emulate_insn",
-                                  kvm_emulate_insn_handler, NULL);
-
-       tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit",
-                                  kvm_nested_vmexit_handler, NULL);
-
-       tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject",
-                                  kvm_nested_vmexit_inject_handler, NULL);
-
-       tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page",
-                                  kvm_mmu_get_page_handler, NULL);
-
-       tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page",
-                                  kvm_mmu_print_role, NULL);
-
-       tep_register_event_handler(tep, -1,
-                                  "kvmmmu", "kvm_mmu_unsync_page",
-                                  kvm_mmu_print_role, NULL);
-
-       tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page",
-                                  kvm_mmu_print_role, NULL);
-
-       tep_register_event_handler(tep, -1, "kvmmmu",
-                       "kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
-                       NULL);
-
-       tep_register_print_function(tep,
-                                   process_is_writable_pte,
-                                   TEP_FUNC_ARG_INT,
-                                   "is_writable_pte",
-                                   TEP_FUNC_ARG_LONG,
-                                   TEP_FUNC_ARG_VOID);
-       return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-       tep_unregister_event_handler(tep, -1, "kvm", "kvm_exit",
-                                    kvm_exit_handler, NULL);
-
-       tep_unregister_event_handler(tep, -1, "kvm", "kvm_emulate_insn",
-                                    kvm_emulate_insn_handler, NULL);
-
-       tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit",
-                                    kvm_nested_vmexit_handler, NULL);
-
-       tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject",
-                                    kvm_nested_vmexit_inject_handler, NULL);
-
-       tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page",
-                                    kvm_mmu_get_page_handler, NULL);
-
-       tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page",
-                                    kvm_mmu_print_role, NULL);
-
-       tep_unregister_event_handler(tep, -1,
-                                    "kvmmmu", "kvm_mmu_unsync_page",
-                                    kvm_mmu_print_role, NULL);
-
-       tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page",
-                                    kvm_mmu_print_role, NULL);
-
-       tep_unregister_event_handler(tep, -1, "kvmmmu",
-                       "kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
-                       NULL);
-
-       tep_unregister_print_function(tep, process_is_writable_pte,
-                                     "is_writable_pte");
-}
diff --git a/tools/lib/traceevent/plugin_mac80211.c b/tools/lib/traceevent/plugin_mac80211.c
deleted file mode 100644 (file)
index 884303c..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-#define INDENT 65
-
-static void print_string(struct trace_seq *s, struct tep_event *event,
-                        const char *name, const void *data)
-{
-       struct tep_format_field *f = tep_find_field(event, name);
-       int offset;
-       int length;
-
-       if (!f) {
-               trace_seq_printf(s, "NOTFOUND:%s", name);
-               return;
-       }
-
-       offset = f->offset;
-       length = f->size;
-
-       if (!strncmp(f->type, "__data_loc", 10)) {
-               unsigned long long v;
-               if (tep_read_number_field(f, data, &v)) {
-                       trace_seq_printf(s, "invalid_data_loc");
-                       return;
-               }
-               offset = v & 0xffff;
-               length = v >> 16;
-       }
-
-       trace_seq_printf(s, "%.*s", length, (char *)data + offset);
-}
-
-#define SF(fn) tep_print_num_field(s, fn ":%d", event, fn, record, 0)
-#define SFX(fn)        tep_print_num_field(s, fn ":%#x", event, fn, record, 0)
-#define SP()   trace_seq_putc(s, ' ')
-
-static int drv_bss_info_changed(struct trace_seq *s,
-                               struct tep_record *record,
-                               struct tep_event *event, void *context)
-{
-       void *data = record->data;
-
-       print_string(s, event, "wiphy_name", data);
-       trace_seq_printf(s, " vif:");
-       print_string(s, event, "vif_name", data);
-       tep_print_num_field(s, "(%d)", event, "vif_type", record, 1);
-
-       trace_seq_printf(s, "\n%*s", INDENT, "");
-       SF("assoc"); SP();
-       SF("aid"); SP();
-       SF("cts"); SP();
-       SF("shortpre"); SP();
-       SF("shortslot"); SP();
-       SF("dtimper"); SP();
-       trace_seq_printf(s, "\n%*s", INDENT, "");
-       SF("bcnint"); SP();
-       SFX("assoc_cap"); SP();
-       SFX("basic_rates"); SP();
-       SF("enable_beacon");
-       trace_seq_printf(s, "\n%*s", INDENT, "");
-       SF("ht_operation_mode");
-
-       return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-       tep_register_event_handler(tep, -1, "mac80211",
-                                  "drv_bss_info_changed",
-                                  drv_bss_info_changed, NULL);
-       return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-       tep_unregister_event_handler(tep, -1, "mac80211",
-                                    "drv_bss_info_changed",
-                                    drv_bss_info_changed, NULL);
-}
diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c
deleted file mode 100644 (file)
index 957389a..0000000
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-static void write_state(struct trace_seq *s, int val)
-{
-       const char states[] = "SDTtZXxW";
-       int found = 0;
-       int i;
-
-       for (i = 0; i < (sizeof(states) - 1); i++) {
-               if (!(val & (1 << i)))
-                       continue;
-
-               if (found)
-                       trace_seq_putc(s, '|');
-
-               found = 1;
-               trace_seq_putc(s, states[i]);
-       }
-
-       if (!found)
-               trace_seq_putc(s, 'R');
-}
-
-static void write_and_save_comm(struct tep_format_field *field,
-                               struct tep_record *record,
-                               struct trace_seq *s, int pid)
-{
-       const char *comm;
-       int len;
-
-       comm = (char *)(record->data + field->offset);
-       len = s->len;
-       trace_seq_printf(s, "%.*s",
-                        field->size, comm);
-
-       /* make sure the comm has a \0 at the end. */
-       trace_seq_terminate(s);
-       comm = &s->buffer[len];
-
-       /* Help out the comm to ids. This will handle dups */
-       tep_register_comm(field->event->tep, comm, pid);
-}
-
-static int sched_wakeup_handler(struct trace_seq *s,
-                               struct tep_record *record,
-                               struct tep_event *event, void *context)
-{
-       struct tep_format_field *field;
-       unsigned long long val;
-
-       if (tep_get_field_val(s, event, "pid", record, &val, 1))
-               return trace_seq_putc(s, '!');
-
-       field = tep_find_any_field(event, "comm");
-       if (field) {
-               write_and_save_comm(field, record, s, val);
-               trace_seq_putc(s, ':');
-       }
-       trace_seq_printf(s, "%lld", val);
-
-       if (tep_get_field_val(s, event, "prio", record, &val, 0) == 0)
-               trace_seq_printf(s, " [%lld]", val);
-
-       if (tep_get_field_val(s, event, "success", record, &val, 1) == 0)
-               trace_seq_printf(s, " success=%lld", val);
-
-       if (tep_get_field_val(s, event, "target_cpu", record, &val, 0) == 0)
-               trace_seq_printf(s, " CPU:%03llu", val);
-
-       return 0;
-}
-
-static int sched_switch_handler(struct trace_seq *s,
-                               struct tep_record *record,
-                               struct tep_event *event, void *context)
-{
-       struct tep_format_field *field;
-       unsigned long long val;
-
-       if (tep_get_field_val(s, event, "prev_pid", record, &val, 1))
-               return trace_seq_putc(s, '!');
-
-       field = tep_find_any_field(event, "prev_comm");
-       if (field) {
-               write_and_save_comm(field, record, s, val);
-               trace_seq_putc(s, ':');
-       }
-       trace_seq_printf(s, "%lld ", val);
-
-       if (tep_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
-               trace_seq_printf(s, "[%d] ", (int) val);
-
-       if (tep_get_field_val(s,  event, "prev_state", record, &val, 0) == 0)
-               write_state(s, val);
-
-       trace_seq_puts(s, " ==> ");
-
-       if (tep_get_field_val(s, event, "next_pid", record, &val, 1))
-               return trace_seq_putc(s, '!');
-
-       field = tep_find_any_field(event, "next_comm");
-       if (field) {
-               write_and_save_comm(field, record, s, val);
-               trace_seq_putc(s, ':');
-       }
-       trace_seq_printf(s, "%lld", val);
-
-       if (tep_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
-               trace_seq_printf(s, " [%d]", (int) val);
-
-       return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-       tep_register_event_handler(tep, -1, "sched", "sched_switch",
-                                  sched_switch_handler, NULL);
-
-       tep_register_event_handler(tep, -1, "sched", "sched_wakeup",
-                                  sched_wakeup_handler, NULL);
-
-       tep_register_event_handler(tep, -1, "sched", "sched_wakeup_new",
-                                  sched_wakeup_handler, NULL);
-       return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-       tep_unregister_event_handler(tep, -1, "sched", "sched_switch",
-                                    sched_switch_handler, NULL);
-
-       tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup",
-                                    sched_wakeup_handler, NULL);
-
-       tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup_new",
-                                    sched_wakeup_handler, NULL);
-}
diff --git a/tools/lib/traceevent/plugin_scsi.c b/tools/lib/traceevent/plugin_scsi.c
deleted file mode 100644 (file)
index 5d0387a..0000000
+++ /dev/null
@@ -1,434 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <string.h>
-#include <inttypes.h>
-#include "event-parse.h"
-#include "trace-seq.h"
-
-typedef unsigned long sector_t;
-typedef uint64_t u64;
-typedef unsigned int u32;
-
-/*
- *      SCSI opcodes
- */
-#define TEST_UNIT_READY                        0x00
-#define REZERO_UNIT                    0x01
-#define REQUEST_SENSE                  0x03
-#define FORMAT_UNIT                    0x04
-#define READ_BLOCK_LIMITS              0x05
-#define REASSIGN_BLOCKS                        0x07
-#define INITIALIZE_ELEMENT_STATUS      0x07
-#define READ_6                         0x08
-#define WRITE_6                                0x0a
-#define SEEK_6                         0x0b
-#define READ_REVERSE                   0x0f
-#define WRITE_FILEMARKS                        0x10
-#define SPACE                          0x11
-#define INQUIRY                                0x12
-#define RECOVER_BUFFERED_DATA          0x14
-#define MODE_SELECT                    0x15
-#define RESERVE                                0x16
-#define RELEASE                                0x17
-#define COPY                           0x18
-#define ERASE                          0x19
-#define MODE_SENSE                     0x1a
-#define START_STOP                     0x1b
-#define RECEIVE_DIAGNOSTIC             0x1c
-#define SEND_DIAGNOSTIC                        0x1d
-#define ALLOW_MEDIUM_REMOVAL           0x1e
-
-#define READ_FORMAT_CAPACITIES         0x23
-#define SET_WINDOW                     0x24
-#define READ_CAPACITY                  0x25
-#define READ_10                                0x28
-#define WRITE_10                       0x2a
-#define SEEK_10                                0x2b
-#define POSITION_TO_ELEMENT            0x2b
-#define WRITE_VERIFY                   0x2e
-#define VERIFY                         0x2f
-#define SEARCH_HIGH                    0x30
-#define SEARCH_EQUAL                   0x31
-#define SEARCH_LOW                     0x32
-#define SET_LIMITS                     0x33
-#define PRE_FETCH                      0x34
-#define READ_POSITION                  0x34
-#define SYNCHRONIZE_CACHE              0x35
-#define LOCK_UNLOCK_CACHE              0x36
-#define READ_DEFECT_DATA               0x37
-#define MEDIUM_SCAN                    0x38
-#define COMPARE                                0x39
-#define COPY_VERIFY                    0x3a
-#define WRITE_BUFFER                   0x3b
-#define READ_BUFFER                    0x3c
-#define UPDATE_BLOCK                   0x3d
-#define READ_LONG                      0x3e
-#define WRITE_LONG                     0x3f
-#define CHANGE_DEFINITION              0x40
-#define WRITE_SAME                     0x41
-#define UNMAP                          0x42
-#define READ_TOC                       0x43
-#define READ_HEADER                    0x44
-#define GET_EVENT_STATUS_NOTIFICATION  0x4a
-#define LOG_SELECT                     0x4c
-#define LOG_SENSE                      0x4d
-#define XDWRITEREAD_10                 0x53
-#define MODE_SELECT_10                 0x55
-#define RESERVE_10                     0x56
-#define RELEASE_10                     0x57
-#define MODE_SENSE_10                  0x5a
-#define PERSISTENT_RESERVE_IN          0x5e
-#define PERSISTENT_RESERVE_OUT         0x5f
-#define VARIABLE_LENGTH_CMD            0x7f
-#define REPORT_LUNS                    0xa0
-#define SECURITY_PROTOCOL_IN           0xa2
-#define MAINTENANCE_IN                 0xa3
-#define MAINTENANCE_OUT                        0xa4
-#define MOVE_MEDIUM                    0xa5
-#define EXCHANGE_MEDIUM                        0xa6
-#define READ_12                                0xa8
-#define SERVICE_ACTION_OUT_12          0xa9
-#define WRITE_12                       0xaa
-#define SERVICE_ACTION_IN_12           0xab
-#define WRITE_VERIFY_12                        0xae
-#define VERIFY_12                      0xaf
-#define SEARCH_HIGH_12                 0xb0
-#define SEARCH_EQUAL_12                        0xb1
-#define SEARCH_LOW_12                  0xb2
-#define SECURITY_PROTOCOL_OUT          0xb5
-#define READ_ELEMENT_STATUS            0xb8
-#define SEND_VOLUME_TAG                        0xb6
-#define WRITE_LONG_2                   0xea
-#define EXTENDED_COPY                  0x83
-#define RECEIVE_COPY_RESULTS           0x84
-#define ACCESS_CONTROL_IN              0x86
-#define ACCESS_CONTROL_OUT             0x87
-#define READ_16                                0x88
-#define WRITE_16                       0x8a
-#define READ_ATTRIBUTE                 0x8c
-#define WRITE_ATTRIBUTE                        0x8d
-#define VERIFY_16                      0x8f
-#define SYNCHRONIZE_CACHE_16           0x91
-#define WRITE_SAME_16                  0x93
-#define SERVICE_ACTION_BIDIRECTIONAL   0x9d
-#define SERVICE_ACTION_IN_16           0x9e
-#define SERVICE_ACTION_OUT_16          0x9f
-/* values for service action in */
-#define        SAI_READ_CAPACITY_16            0x10
-#define SAI_GET_LBA_STATUS             0x12
-/* values for VARIABLE_LENGTH_CMD service action codes
- * see spc4r17 Section D.3.5, table D.7 and D.8 */
-#define VLC_SA_RECEIVE_CREDENTIAL      0x1800
-/* values for maintenance in */
-#define MI_REPORT_IDENTIFYING_INFORMATION              0x05
-#define MI_REPORT_TARGET_PGS                           0x0a
-#define MI_REPORT_ALIASES                              0x0b
-#define MI_REPORT_SUPPORTED_OPERATION_CODES            0x0c
-#define MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS  0x0d
-#define MI_REPORT_PRIORITY                             0x0e
-#define MI_REPORT_TIMESTAMP                            0x0f
-#define MI_MANAGEMENT_PROTOCOL_IN                      0x10
-/* value for MI_REPORT_TARGET_PGS ext header */
-#define MI_EXT_HDR_PARAM_FMT           0x20
-/* values for maintenance out */
-#define MO_SET_IDENTIFYING_INFORMATION 0x06
-#define MO_SET_TARGET_PGS              0x0a
-#define MO_CHANGE_ALIASES              0x0b
-#define MO_SET_PRIORITY                        0x0e
-#define MO_SET_TIMESTAMP               0x0f
-#define MO_MANAGEMENT_PROTOCOL_OUT     0x10
-/* values for variable length command */
-#define XDREAD_32                      0x03
-#define XDWRITE_32                     0x04
-#define XPWRITE_32                     0x06
-#define XDWRITEREAD_32                 0x07
-#define READ_32                                0x09
-#define VERIFY_32                      0x0a
-#define WRITE_32                       0x0b
-#define WRITE_SAME_32                  0x0d
-
-#define SERVICE_ACTION16(cdb) (cdb[1] & 0x1f)
-#define SERVICE_ACTION32(cdb) ((cdb[8] << 8) | cdb[9])
-
-static const char *
-scsi_trace_misc(struct trace_seq *, unsigned char *, int);
-
-static const char *
-scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len)
-{
-       const char *ret = p->buffer + p->len;
-       sector_t lba = 0, txlen = 0;
-
-       lba |= ((cdb[1] & 0x1F) << 16);
-       lba |=  (cdb[2] << 8);
-       lba |=   cdb[3];
-       txlen = cdb[4];
-
-       trace_seq_printf(p, "lba=%llu txlen=%llu",
-                        (unsigned long long)lba, (unsigned long long)txlen);
-       trace_seq_putc(p, 0);
-       return ret;
-}
-
-static const char *
-scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len)
-{
-       const char *ret = p->buffer + p->len;
-       sector_t lba = 0, txlen = 0;
-
-       lba |= (cdb[2] << 24);
-       lba |= (cdb[3] << 16);
-       lba |= (cdb[4] << 8);
-       lba |=  cdb[5];
-       txlen |= (cdb[7] << 8);
-       txlen |=  cdb[8];
-
-       trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
-                        (unsigned long long)lba, (unsigned long long)txlen,
-                        cdb[1] >> 5);
-
-       if (cdb[0] == WRITE_SAME)
-               trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1);
-
-       trace_seq_putc(p, 0);
-       return ret;
-}
-
-static const char *
-scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len)
-{
-       const char *ret = p->buffer + p->len;
-       sector_t lba = 0, txlen = 0;
-
-       lba |= (cdb[2] << 24);
-       lba |= (cdb[3] << 16);
-       lba |= (cdb[4] << 8);
-       lba |=  cdb[5];
-       txlen |= (cdb[6] << 24);
-       txlen |= (cdb[7] << 16);
-       txlen |= (cdb[8] << 8);
-       txlen |=  cdb[9];
-
-       trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
-                        (unsigned long long)lba, (unsigned long long)txlen,
-                        cdb[1] >> 5);
-       trace_seq_putc(p, 0);
-       return ret;
-}
-
-static const char *
-scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len)
-{
-       const char *ret = p->buffer + p->len;
-       sector_t lba = 0, txlen = 0;
-
-       lba |= ((u64)cdb[2] << 56);
-       lba |= ((u64)cdb[3] << 48);
-       lba |= ((u64)cdb[4] << 40);
-       lba |= ((u64)cdb[5] << 32);
-       lba |= (cdb[6] << 24);
-       lba |= (cdb[7] << 16);
-       lba |= (cdb[8] << 8);
-       lba |=  cdb[9];
-       txlen |= (cdb[10] << 24);
-       txlen |= (cdb[11] << 16);
-       txlen |= (cdb[12] << 8);
-       txlen |=  cdb[13];
-
-       trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
-                        (unsigned long long)lba, (unsigned long long)txlen,
-                        cdb[1] >> 5);
-
-       if (cdb[0] == WRITE_SAME_16)
-               trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1);
-
-       trace_seq_putc(p, 0);
-       return ret;
-}
-
-static const char *
-scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len)
-{
-       const char *ret = p->buffer + p->len, *cmd;
-       sector_t lba = 0, txlen = 0;
-       u32 ei_lbrt = 0;
-
-       switch (SERVICE_ACTION32(cdb)) {
-       case READ_32:
-               cmd = "READ";
-               break;
-       case VERIFY_32:
-               cmd = "VERIFY";
-               break;
-       case WRITE_32:
-               cmd = "WRITE";
-               break;
-       case WRITE_SAME_32:
-               cmd = "WRITE_SAME";
-               break;
-       default:
-               trace_seq_printf(p, "UNKNOWN");
-               goto out;
-       }
-
-       lba |= ((u64)cdb[12] << 56);
-       lba |= ((u64)cdb[13] << 48);
-       lba |= ((u64)cdb[14] << 40);
-       lba |= ((u64)cdb[15] << 32);
-       lba |= (cdb[16] << 24);
-       lba |= (cdb[17] << 16);
-       lba |= (cdb[18] << 8);
-       lba |=  cdb[19];
-       ei_lbrt |= (cdb[20] << 24);
-       ei_lbrt |= (cdb[21] << 16);
-       ei_lbrt |= (cdb[22] << 8);
-       ei_lbrt |=  cdb[23];
-       txlen |= (cdb[28] << 24);
-       txlen |= (cdb[29] << 16);
-       txlen |= (cdb[30] << 8);
-       txlen |=  cdb[31];
-
-       trace_seq_printf(p, "%s_32 lba=%llu txlen=%llu protect=%u ei_lbrt=%u",
-                        cmd, (unsigned long long)lba,
-                        (unsigned long long)txlen, cdb[10] >> 5, ei_lbrt);
-
-       if (SERVICE_ACTION32(cdb) == WRITE_SAME_32)
-               trace_seq_printf(p, " unmap=%u", cdb[10] >> 3 & 1);
-
-out:
-       trace_seq_putc(p, 0);
-       return ret;
-}
-
-static const char *
-scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len)
-{
-       const char *ret = p->buffer + p->len;
-       unsigned int regions = cdb[7] << 8 | cdb[8];
-
-       trace_seq_printf(p, "regions=%u", (regions - 8) / 16);
-       trace_seq_putc(p, 0);
-       return ret;
-}
-
-static const char *
-scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len)
-{
-       const char *ret = p->buffer + p->len, *cmd;
-       sector_t lba = 0;
-       u32 alloc_len = 0;
-
-       switch (SERVICE_ACTION16(cdb)) {
-       case SAI_READ_CAPACITY_16:
-               cmd = "READ_CAPACITY_16";
-               break;
-       case SAI_GET_LBA_STATUS:
-               cmd = "GET_LBA_STATUS";
-               break;
-       default:
-               trace_seq_printf(p, "UNKNOWN");
-               goto out;
-       }
-
-       lba |= ((u64)cdb[2] << 56);
-       lba |= ((u64)cdb[3] << 48);
-       lba |= ((u64)cdb[4] << 40);
-       lba |= ((u64)cdb[5] << 32);
-       lba |= (cdb[6] << 24);
-       lba |= (cdb[7] << 16);
-       lba |= (cdb[8] << 8);
-       lba |=  cdb[9];
-       alloc_len |= (cdb[10] << 24);
-       alloc_len |= (cdb[11] << 16);
-       alloc_len |= (cdb[12] << 8);
-       alloc_len |=  cdb[13];
-
-       trace_seq_printf(p, "%s lba=%llu alloc_len=%u", cmd,
-                        (unsigned long long)lba, alloc_len);
-
-out:
-       trace_seq_putc(p, 0);
-       return ret;
-}
-
-static const char *
-scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len)
-{
-       switch (SERVICE_ACTION32(cdb)) {
-       case READ_32:
-       case VERIFY_32:
-       case WRITE_32:
-       case WRITE_SAME_32:
-               return scsi_trace_rw32(p, cdb, len);
-       default:
-               return scsi_trace_misc(p, cdb, len);
-       }
-}
-
-static const char *
-scsi_trace_misc(struct trace_seq *p, unsigned char *cdb, int len)
-{
-       const char *ret = p->buffer + p->len;
-
-       trace_seq_printf(p, "-");
-       trace_seq_putc(p, 0);
-       return ret;
-}
-
-const char *
-scsi_trace_parse_cdb(struct trace_seq *p, unsigned char *cdb, int len)
-{
-       switch (cdb[0]) {
-       case READ_6:
-       case WRITE_6:
-               return scsi_trace_rw6(p, cdb, len);
-       case READ_10:
-       case VERIFY:
-       case WRITE_10:
-       case WRITE_SAME:
-               return scsi_trace_rw10(p, cdb, len);
-       case READ_12:
-       case VERIFY_12:
-       case WRITE_12:
-               return scsi_trace_rw12(p, cdb, len);
-       case READ_16:
-       case VERIFY_16:
-       case WRITE_16:
-       case WRITE_SAME_16:
-               return scsi_trace_rw16(p, cdb, len);
-       case UNMAP:
-               return scsi_trace_unmap(p, cdb, len);
-       case SERVICE_ACTION_IN_16:
-               return scsi_trace_service_action_in(p, cdb, len);
-       case VARIABLE_LENGTH_CMD:
-               return scsi_trace_varlen(p, cdb, len);
-       default:
-               return scsi_trace_misc(p, cdb, len);
-       }
-}
-
-unsigned long long process_scsi_trace_parse_cdb(struct trace_seq *s,
-                                               unsigned long long *args)
-{
-       scsi_trace_parse_cdb(s, (unsigned char *) (unsigned long) args[1], args[2]);
-       return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-       tep_register_print_function(tep,
-                                   process_scsi_trace_parse_cdb,
-                                   TEP_FUNC_ARG_STRING,
-                                   "scsi_trace_parse_cdb",
-                                   TEP_FUNC_ARG_PTR,
-                                   TEP_FUNC_ARG_PTR,
-                                   TEP_FUNC_ARG_INT,
-                                   TEP_FUNC_ARG_VOID);
-       return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-       tep_unregister_print_function(tep, process_scsi_trace_parse_cdb,
-                                     "scsi_trace_parse_cdb");
-}
diff --git a/tools/lib/traceevent/plugin_xen.c b/tools/lib/traceevent/plugin_xen.c
deleted file mode 100644 (file)
index 993b208..0000000
+++ /dev/null
@@ -1,138 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "event-parse.h"
-#include "trace-seq.h"
-
-#define __HYPERVISOR_set_trap_table                    0
-#define __HYPERVISOR_mmu_update                                1
-#define __HYPERVISOR_set_gdt                           2
-#define __HYPERVISOR_stack_switch                      3
-#define __HYPERVISOR_set_callbacks                     4
-#define __HYPERVISOR_fpu_taskswitch                    5
-#define __HYPERVISOR_sched_op_compat                   6
-#define __HYPERVISOR_dom0_op                           7
-#define __HYPERVISOR_set_debugreg                      8
-#define __HYPERVISOR_get_debugreg                      9
-#define __HYPERVISOR_update_descriptor                 10
-#define __HYPERVISOR_memory_op                         12
-#define __HYPERVISOR_multicall                         13
-#define __HYPERVISOR_update_va_mapping                 14
-#define __HYPERVISOR_set_timer_op                      15
-#define __HYPERVISOR_event_channel_op_compat           16
-#define __HYPERVISOR_xen_version                       17
-#define __HYPERVISOR_console_io                                18
-#define __HYPERVISOR_physdev_op_compat                 19
-#define __HYPERVISOR_grant_table_op                    20
-#define __HYPERVISOR_vm_assist                         21
-#define __HYPERVISOR_update_va_mapping_otherdomain     22
-#define __HYPERVISOR_iret                              23 /* x86 only */
-#define __HYPERVISOR_vcpu_op                           24
-#define __HYPERVISOR_set_segment_base                  25 /* x86/64 only */
-#define __HYPERVISOR_mmuext_op                         26
-#define __HYPERVISOR_acm_op                            27
-#define __HYPERVISOR_nmi_op                            28
-#define __HYPERVISOR_sched_op                          29
-#define __HYPERVISOR_callback_op                       30
-#define __HYPERVISOR_xenoprof_op                       31
-#define __HYPERVISOR_event_channel_op                  32
-#define __HYPERVISOR_physdev_op                                33
-#define __HYPERVISOR_hvm_op                            34
-#define __HYPERVISOR_tmem_op                           38
-
-/* Architecture-specific hypercall definitions. */
-#define __HYPERVISOR_arch_0                            48
-#define __HYPERVISOR_arch_1                            49
-#define __HYPERVISOR_arch_2                            50
-#define __HYPERVISOR_arch_3                            51
-#define __HYPERVISOR_arch_4                            52
-#define __HYPERVISOR_arch_5                            53
-#define __HYPERVISOR_arch_6                            54
-#define __HYPERVISOR_arch_7                            55
-
-#define N(x)   [__HYPERVISOR_##x] = "("#x")"
-static const char *xen_hypercall_names[] = {
-       N(set_trap_table),
-       N(mmu_update),
-       N(set_gdt),
-       N(stack_switch),
-       N(set_callbacks),
-       N(fpu_taskswitch),
-       N(sched_op_compat),
-       N(dom0_op),
-       N(set_debugreg),
-       N(get_debugreg),
-       N(update_descriptor),
-       N(memory_op),
-       N(multicall),
-       N(update_va_mapping),
-       N(set_timer_op),
-       N(event_channel_op_compat),
-       N(xen_version),
-       N(console_io),
-       N(physdev_op_compat),
-       N(grant_table_op),
-       N(vm_assist),
-       N(update_va_mapping_otherdomain),
-       N(iret),
-       N(vcpu_op),
-       N(set_segment_base),
-       N(mmuext_op),
-       N(acm_op),
-       N(nmi_op),
-       N(sched_op),
-       N(callback_op),
-       N(xenoprof_op),
-       N(event_channel_op),
-       N(physdev_op),
-       N(hvm_op),
-
-/* Architecture-specific hypercall definitions. */
-       N(arch_0),
-       N(arch_1),
-       N(arch_2),
-       N(arch_3),
-       N(arch_4),
-       N(arch_5),
-       N(arch_6),
-       N(arch_7),
-};
-#undef N
-
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
-static const char *xen_hypercall_name(unsigned op)
-{
-       if (op < ARRAY_SIZE(xen_hypercall_names) &&
-           xen_hypercall_names[op] != NULL)
-               return xen_hypercall_names[op];
-
-       return "";
-}
-
-unsigned long long process_xen_hypercall_name(struct trace_seq *s,
-                                             unsigned long long *args)
-{
-       unsigned int op = args[0];
-
-       trace_seq_printf(s, "%s", xen_hypercall_name(op));
-       return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-       tep_register_print_function(tep,
-                                   process_xen_hypercall_name,
-                                   TEP_FUNC_ARG_STRING,
-                                   "xen_hypercall_name",
-                                   TEP_FUNC_ARG_INT,
-                                   TEP_FUNC_ARG_VOID);
-       return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-       tep_unregister_print_function(tep, process_xen_hypercall_name,
-                                     "xen_hypercall_name");
-}
diff --git a/tools/lib/traceevent/plugins/Build b/tools/lib/traceevent/plugins/Build
new file mode 100644 (file)
index 0000000..210d269
--- /dev/null
@@ -0,0 +1,10 @@
+plugin_jbd2-y         += plugin_jbd2.o
+plugin_hrtimer-y      += plugin_hrtimer.o
+plugin_kmem-y         += plugin_kmem.o
+plugin_kvm-y          += plugin_kvm.o
+plugin_mac80211-y     += plugin_mac80211.o
+plugin_sched_switch-y += plugin_sched_switch.o
+plugin_function-y     += plugin_function.o
+plugin_xen-y          += plugin_xen.o
+plugin_scsi-y         += plugin_scsi.o
+plugin_cfg80211-y     += plugin_cfg80211.o
diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile
new file mode 100644 (file)
index 0000000..f440989
--- /dev/null
@@ -0,0 +1,222 @@
+# SPDX-License-Identifier: GPL-2.0
+
+#MAKEFLAGS += --no-print-directory
+
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+  $(if $(or $(findstring environment,$(origin $(1))),\
+            $(findstring command line,$(origin $(1)))),,\
+    $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,NM,$(CROSS_COMPILE)nm)
+$(call allow-override,PKG_CONFIG,pkg-config)
+
+EXT = -std=gnu99
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
+ifeq ($(LP64), 1)
+  libdir_relative = lib64
+else
+  libdir_relative = lib
+endif
+
+prefix ?= /usr/local
+libdir = $(prefix)/$(libdir_relative)
+
+set_plugin_dir := 1
+
+# Set plugin_dir to preffered global plugin location
+# If we install under $HOME directory we go under
+# $(HOME)/.local/lib/traceevent/plugins
+#
+# We dont set PLUGIN_DIR in case we install under $HOME
+# directory, because by default the code looks under:
+# $(HOME)/.local/lib/traceevent/plugins by default.
+#
+ifeq ($(plugin_dir),)
+ifeq ($(prefix),$(HOME))
+override plugin_dir = $(HOME)/.local/lib/traceevent/plugins
+set_plugin_dir := 0
+else
+override plugin_dir = $(libdir)/traceevent/plugins
+endif
+endif
+
+ifeq ($(set_plugin_dir),1)
+PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)"
+PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))'
+endif
+
+include ../../../scripts/Makefile.include
+
+# copy a bit from Linux kbuild
+
+ifeq ("$(origin V)", "command line")
+  VERBOSE = $(V)
+endif
+ifndef VERBOSE
+  VERBOSE = 0
+endif
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+export prefix libdir src obj
+
+# Shell quotes
+plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
+
+CONFIG_INCLUDES =
+CONFIG_LIBS    =
+CONFIG_FLAGS   =
+
+OBJ            = $@
+N              =
+
+INCLUDES = -I. -I.. -I $(srctree)/tools/include $(CONFIG_INCLUDES)
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+  CFLAGS := $(EXTRA_CFLAGS)
+else
+  CFLAGS := -g -Wall
+endif
+
+# Append required CFLAGS
+override CFLAGS += -fPIC
+override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
+override CFLAGS += $(udis86-flags) -D_GNU_SOURCE
+
+ifeq ($(VERBOSE),1)
+  Q =
+else
+  Q = @
+endif
+
+# Disable command line variables (CFLAGS) override from top
+# level Makefile (perf), otherwise build Makefile will get
+# the same command line setup.
+MAKEOVERRIDES=
+
+export srctree OUTPUT CC LD CFLAGS V
+
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
+DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list
+
+PLUGINS  = plugin_jbd2.so
+PLUGINS += plugin_hrtimer.so
+PLUGINS += plugin_kmem.so
+PLUGINS += plugin_kvm.so
+PLUGINS += plugin_mac80211.so
+PLUGINS += plugin_sched_switch.so
+PLUGINS += plugin_function.so
+PLUGINS += plugin_xen.so
+PLUGINS += plugin_scsi.so
+PLUGINS += plugin_cfg80211.so
+
+PLUGINS    := $(addprefix $(OUTPUT),$(PLUGINS))
+PLUGINS_IN := $(PLUGINS:.so=-in.o)
+
+plugins: $(PLUGINS) $(DYNAMIC_LIST_FILE)
+
+__plugin_obj = $(notdir $@)
+  plugin_obj = $(__plugin_obj:-in.o=)
+
+$(PLUGINS_IN): force
+       $(Q)$(MAKE) $(build)=$(plugin_obj)
+
+$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS)
+       $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@)
+
+$(OUTPUT)%.so: $(OUTPUT)%-in.o
+       $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^
+
+define update_dir
+  (echo $1 > $@.tmp;                           \
+   if [ -r $@ ] && cmp -s $@ $@.tmp; then      \
+     rm -f $@.tmp;                             \
+   else                                                \
+     echo '  UPDATE                 $@';       \
+     mv -f $@.tmp $@;                          \
+   fi);
+endef
+
+tags:  force
+       $(RM) tags
+       find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
+       --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
+
+TAGS:  force
+       $(RM) TAGS
+       find . -name '*.[ch]' | xargs etags \
+       --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
+
+define do_install_mkdir
+       if [ ! -d '$(DESTDIR_SQ)$1' ]; then             \
+               $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+       fi
+endef
+
+define do_install
+       $(call do_install_mkdir,$2);                    \
+       $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
+endef
+
+define do_install_plugins
+       for plugin in $1; do                            \
+         $(call do_install,$$plugin,$(plugin_dir_SQ)); \
+       done
+endef
+
+define do_generate_dynamic_list_file
+       symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \
+       xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\
+       if [ "$$symbol_type" = "U W" ];then                             \
+               (echo '{';                                              \
+               $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\
+               echo '};';                                              \
+               ) > $2;                                                 \
+       else                                                            \
+               (echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\
+               fi
+endef
+
+install: $(PLUGINS)
+       $(call QUIET_INSTALL, trace_plugins) \
+       $(call do_install_plugins, $(PLUGINS))
+
+clean:
+       $(call QUIET_CLEAN, trace_plugins) \
+               $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \
+               $(RM) $(OUTPUT)libtraceevent-dynamic-list \
+               $(RM) TRACEEVENT-CFLAGS tags TAGS;
+
+PHONY += force plugins
+force:
+
+# Declare the contents of the .PHONY variable as phony.  We keep that
+# information in a variable so we can use it in if_changed and friends.
+.PHONY: $(PHONY)
diff --git a/tools/lib/traceevent/plugins/plugin_cfg80211.c b/tools/lib/traceevent/plugins/plugin_cfg80211.c
new file mode 100644 (file)
index 0000000..3d43b56
--- /dev/null
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <endian.h>
+#include "event-parse.h"
+
+/*
+ * From glibc endian.h, for older systems where it is not present, e.g.: RHEL5,
+ * Fedora6.
+ */
+#ifndef le16toh
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define le16toh(x) (x)
+# else
+#  define le16toh(x) __bswap_16 (x)
+# endif
+#endif
+
+
+static unsigned long long
+process___le16_to_cpup(struct trace_seq *s, unsigned long long *args)
+{
+       uint16_t *val = (uint16_t *) (unsigned long) args[0];
+       return val ? (long long) le16toh(*val) : 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+       tep_register_print_function(tep,
+                                   process___le16_to_cpup,
+                                   TEP_FUNC_ARG_INT,
+                                   "__le16_to_cpup",
+                                   TEP_FUNC_ARG_PTR,
+                                   TEP_FUNC_ARG_VOID);
+       return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+       tep_unregister_print_function(tep, process___le16_to_cpup,
+                                     "__le16_to_cpup");
+}
diff --git a/tools/lib/traceevent/plugins/plugin_function.c b/tools/lib/traceevent/plugins/plugin_function.c
new file mode 100644 (file)
index 0000000..7770fcb
--- /dev/null
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+#include "event-utils.h"
+#include "trace-seq.h"
+
+static struct func_stack {
+       int size;
+       char **stack;
+} *fstack;
+
+static int cpus = -1;
+
+#define STK_BLK 10
+
+struct tep_plugin_option plugin_options[] =
+{
+       {
+               .name = "parent",
+               .plugin_alias = "ftrace",
+               .description =
+               "Print parent of functions for function events",
+       },
+       {
+               .name = "indent",
+               .plugin_alias = "ftrace",
+               .description =
+               "Try to show function call indents, based on parents",
+               .set = 1,
+       },
+       {
+               .name = NULL,
+       }
+};
+
+static struct tep_plugin_option *ftrace_parent = &plugin_options[0];
+static struct tep_plugin_option *ftrace_indent = &plugin_options[1];
+
+static void add_child(struct func_stack *stack, const char *child, int pos)
+{
+       int i;
+
+       if (!child)
+               return;
+
+       if (pos < stack->size)
+               free(stack->stack[pos]);
+       else {
+               char **ptr;
+
+               ptr = realloc(stack->stack, sizeof(char *) *
+                             (stack->size + STK_BLK));
+               if (!ptr) {
+                       warning("could not allocate plugin memory\n");
+                       return;
+               }
+
+               stack->stack = ptr;
+
+               for (i = stack->size; i < stack->size + STK_BLK; i++)
+                       stack->stack[i] = NULL;
+               stack->size += STK_BLK;
+       }
+
+       stack->stack[pos] = strdup(child);
+}
+
+static int add_and_get_index(const char *parent, const char *child, int cpu)
+{
+       int i;
+
+       if (cpu < 0)
+               return 0;
+
+       if (cpu > cpus) {
+               struct func_stack *ptr;
+
+               ptr = realloc(fstack, sizeof(*fstack) * (cpu + 1));
+               if (!ptr) {
+                       warning("could not allocate plugin memory\n");
+                       return 0;
+               }
+
+               fstack = ptr;
+
+               /* Account for holes in the cpu count */
+               for (i = cpus + 1; i <= cpu; i++)
+                       memset(&fstack[i], 0, sizeof(fstack[i]));
+               cpus = cpu;
+       }
+
+       for (i = 0; i < fstack[cpu].size && fstack[cpu].stack[i]; i++) {
+               if (strcmp(parent, fstack[cpu].stack[i]) == 0) {
+                       add_child(&fstack[cpu], child, i+1);
+                       return i;
+               }
+       }
+
+       /* Not found */
+       add_child(&fstack[cpu], parent, 0);
+       add_child(&fstack[cpu], child, 1);
+       return 0;
+}
+
+static int function_handler(struct trace_seq *s, struct tep_record *record,
+                           struct tep_event *event, void *context)
+{
+       struct tep_handle *tep = event->tep;
+       unsigned long long function;
+       unsigned long long pfunction;
+       const char *func;
+       const char *parent;
+       int index = 0;
+
+       if (tep_get_field_val(s, event, "ip", record, &function, 1))
+               return trace_seq_putc(s, '!');
+
+       func = tep_find_function(tep, function);
+
+       if (tep_get_field_val(s, event, "parent_ip", record, &pfunction, 1))
+               return trace_seq_putc(s, '!');
+
+       parent = tep_find_function(tep, pfunction);
+
+       if (parent && ftrace_indent->set)
+               index = add_and_get_index(parent, func, record->cpu);
+
+       trace_seq_printf(s, "%*s", index*3, "");
+
+       if (func)
+               trace_seq_printf(s, "%s", func);
+       else
+               trace_seq_printf(s, "0x%llx", function);
+
+       if (ftrace_parent->set) {
+               trace_seq_printf(s, " <-- ");
+               if (parent)
+                       trace_seq_printf(s, "%s", parent);
+               else
+                       trace_seq_printf(s, "0x%llx", pfunction);
+       }
+
+       return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+       tep_register_event_handler(tep, -1, "ftrace", "function",
+                                  function_handler, NULL);
+
+       tep_plugin_add_options("ftrace", plugin_options);
+
+       return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+       int i, x;
+
+       tep_unregister_event_handler(tep, -1, "ftrace", "function",
+                                    function_handler, NULL);
+
+       for (i = 0; i <= cpus; i++) {
+               for (x = 0; x < fstack[i].size && fstack[i].stack[x]; x++)
+                       free(fstack[i].stack[x]);
+               free(fstack[i].stack);
+       }
+
+       tep_plugin_remove_options(plugin_options);
+
+       free(fstack);
+       fstack = NULL;
+       cpus = -1;
+}
diff --git a/tools/lib/traceevent/plugins/plugin_hrtimer.c b/tools/lib/traceevent/plugins/plugin_hrtimer.c
new file mode 100644 (file)
index 0000000..bb434e0
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+#include "trace-seq.h"
+
+static int timer_expire_handler(struct trace_seq *s,
+                               struct tep_record *record,
+                               struct tep_event *event, void *context)
+{
+       trace_seq_printf(s, "hrtimer=");
+
+       if (tep_print_num_field(s, "0x%llx", event, "timer",
+                               record, 0) == -1)
+               tep_print_num_field(s, "0x%llx", event, "hrtimer",
+                                   record, 1);
+
+       trace_seq_printf(s, " now=");
+
+       tep_print_num_field(s, "%llu", event, "now", record, 1);
+
+       tep_print_func_field(s, " function=%s", event, "function",
+                               record, 0);
+       return 0;
+}
+
+static int timer_start_handler(struct trace_seq *s,
+                              struct tep_record *record,
+                              struct tep_event *event, void *context)
+{
+       trace_seq_printf(s, "hrtimer=");
+
+       if (tep_print_num_field(s, "0x%llx", event, "timer",
+                               record, 0) == -1)
+               tep_print_num_field(s, "0x%llx", event, "hrtimer",
+                                   record, 1);
+
+       tep_print_func_field(s, " function=%s", event, "function",
+                            record, 0);
+
+       trace_seq_printf(s, " expires=");
+       tep_print_num_field(s, "%llu", event, "expires", record, 1);
+
+       trace_seq_printf(s, " softexpires=");
+       tep_print_num_field(s, "%llu", event, "softexpires", record, 1);
+       return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+       tep_register_event_handler(tep, -1,
+                                  "timer", "hrtimer_expire_entry",
+                                  timer_expire_handler, NULL);
+
+       tep_register_event_handler(tep, -1, "timer", "hrtimer_start",
+                                  timer_start_handler, NULL);
+       return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+       tep_unregister_event_handler(tep, -1,
+                                    "timer", "hrtimer_expire_entry",
+                                    timer_expire_handler, NULL);
+
+       tep_unregister_event_handler(tep, -1, "timer", "hrtimer_start",
+                                    timer_start_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugins/plugin_jbd2.c b/tools/lib/traceevent/plugins/plugin_jbd2.c
new file mode 100644 (file)
index 0000000..04fc125
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+#include "trace-seq.h"
+
+#define MINORBITS      20
+#define MINORMASK      ((1U << MINORBITS) - 1)
+
+#define MAJOR(dev)     ((unsigned int) ((dev) >> MINORBITS))
+#define MINOR(dev)     ((unsigned int) ((dev) & MINORMASK))
+
+static unsigned long long
+process_jbd2_dev_to_name(struct trace_seq *s, unsigned long long *args)
+{
+       unsigned int dev = args[0];
+
+       trace_seq_printf(s, "%d:%d", MAJOR(dev), MINOR(dev));
+       return 0;
+}
+
+static unsigned long long
+process_jiffies_to_msecs(struct trace_seq *s, unsigned long long *args)
+{
+       unsigned long long jiffies = args[0];
+
+       trace_seq_printf(s, "%lld", jiffies);
+       return jiffies;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+       tep_register_print_function(tep,
+                                   process_jbd2_dev_to_name,
+                                   TEP_FUNC_ARG_STRING,
+                                   "jbd2_dev_to_name",
+                                   TEP_FUNC_ARG_INT,
+                                   TEP_FUNC_ARG_VOID);
+
+       tep_register_print_function(tep,
+                                   process_jiffies_to_msecs,
+                                   TEP_FUNC_ARG_LONG,
+                                   "jiffies_to_msecs",
+                                   TEP_FUNC_ARG_LONG,
+                                   TEP_FUNC_ARG_VOID);
+       return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+       tep_unregister_print_function(tep, process_jbd2_dev_to_name,
+                                     "jbd2_dev_to_name");
+
+       tep_unregister_print_function(tep, process_jiffies_to_msecs,
+                                     "jiffies_to_msecs");
+}
diff --git a/tools/lib/traceevent/plugins/plugin_kmem.c b/tools/lib/traceevent/plugins/plugin_kmem.c
new file mode 100644 (file)
index 0000000..edaec5d
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+#include "trace-seq.h"
+
+static int call_site_handler(struct trace_seq *s, struct tep_record *record,
+                            struct tep_event *event, void *context)
+{
+       struct tep_format_field *field;
+       unsigned long long val, addr;
+       void *data = record->data;
+       const char *func;
+
+       field = tep_find_field(event, "call_site");
+       if (!field)
+               return 1;
+
+       if (tep_read_number_field(field, data, &val))
+               return 1;
+
+       func = tep_find_function(event->tep, val);
+       if (!func)
+               return 1;
+
+       addr = tep_find_function_address(event->tep, val);
+
+       trace_seq_printf(s, "(%s+0x%x) ", func, (int)(val - addr));
+       return 1;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+       tep_register_event_handler(tep, -1, "kmem", "kfree",
+                                  call_site_handler, NULL);
+
+       tep_register_event_handler(tep, -1, "kmem", "kmalloc",
+                                  call_site_handler, NULL);
+
+       tep_register_event_handler(tep, -1, "kmem", "kmalloc_node",
+                                  call_site_handler, NULL);
+
+       tep_register_event_handler(tep, -1, "kmem", "kmem_cache_alloc",
+                                  call_site_handler, NULL);
+
+       tep_register_event_handler(tep, -1, "kmem",
+                                  "kmem_cache_alloc_node",
+                                  call_site_handler, NULL);
+
+       tep_register_event_handler(tep, -1, "kmem", "kmem_cache_free",
+                                  call_site_handler, NULL);
+       return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+       tep_unregister_event_handler(tep, -1, "kmem", "kfree",
+                                    call_site_handler, NULL);
+
+       tep_unregister_event_handler(tep, -1, "kmem", "kmalloc",
+                                    call_site_handler, NULL);
+
+       tep_unregister_event_handler(tep, -1, "kmem", "kmalloc_node",
+                                    call_site_handler, NULL);
+
+       tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_alloc",
+                                    call_site_handler, NULL);
+
+       tep_unregister_event_handler(tep, -1, "kmem",
+                                    "kmem_cache_alloc_node",
+                                    call_site_handler, NULL);
+
+       tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_free",
+                                    call_site_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugins/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c
new file mode 100644 (file)
index 0000000..c8e6230
--- /dev/null
@@ -0,0 +1,523 @@
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+#include "event-parse.h"
+#include "trace-seq.h"
+
+#ifdef HAVE_UDIS86
+
+#include <udis86.h>
+
+static ud_t ud;
+
+static void init_disassembler(void)
+{
+       ud_init(&ud);
+       ud_set_syntax(&ud, UD_SYN_ATT);
+}
+
+static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
+                              int cr0_pe, int eflags_vm,
+                              int cs_d, int cs_l)
+{
+       int mode;
+
+       if (!cr0_pe)
+               mode = 16;
+       else if (eflags_vm)
+               mode = 16;
+       else if (cs_l)
+               mode = 64;
+       else if (cs_d)
+               mode = 32;
+       else
+               mode = 16;
+
+       ud_set_pc(&ud, rip);
+       ud_set_mode(&ud, mode);
+       ud_set_input_buffer(&ud, insn, len);
+       ud_disassemble(&ud);
+       return ud_insn_asm(&ud);
+}
+
+#else
+
+static void init_disassembler(void)
+{
+}
+
+static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
+                              int cr0_pe, int eflags_vm,
+                              int cs_d, int cs_l)
+{
+       static char out[15*3+1];
+       int i;
+
+       for (i = 0; i < len; ++i)
+               sprintf(out + i * 3, "%02x ", insn[i]);
+       out[len*3-1] = '\0';
+       return out;
+}
+
+#endif
+
+
+#define VMX_EXIT_REASONS                       \
+       _ER(EXCEPTION_NMI,       0)             \
+       _ER(EXTERNAL_INTERRUPT,  1)             \
+       _ER(TRIPLE_FAULT,        2)             \
+       _ER(PENDING_INTERRUPT,   7)             \
+       _ER(NMI_WINDOW,          8)             \
+       _ER(TASK_SWITCH,         9)             \
+       _ER(CPUID,               10)            \
+       _ER(HLT,                 12)            \
+       _ER(INVD,                13)            \
+       _ER(INVLPG,              14)            \
+       _ER(RDPMC,               15)            \
+       _ER(RDTSC,               16)            \
+       _ER(VMCALL,              18)            \
+       _ER(VMCLEAR,             19)            \
+       _ER(VMLAUNCH,            20)            \
+       _ER(VMPTRLD,             21)            \
+       _ER(VMPTRST,             22)            \
+       _ER(VMREAD,              23)            \
+       _ER(VMRESUME,            24)            \
+       _ER(VMWRITE,             25)            \
+       _ER(VMOFF,               26)            \
+       _ER(VMON,                27)            \
+       _ER(CR_ACCESS,           28)            \
+       _ER(DR_ACCESS,           29)            \
+       _ER(IO_INSTRUCTION,      30)            \
+       _ER(MSR_READ,            31)            \
+       _ER(MSR_WRITE,           32)            \
+       _ER(MWAIT_INSTRUCTION,   36)            \
+       _ER(MONITOR_INSTRUCTION, 39)            \
+       _ER(PAUSE_INSTRUCTION,   40)            \
+       _ER(MCE_DURING_VMENTRY,  41)            \
+       _ER(TPR_BELOW_THRESHOLD, 43)            \
+       _ER(APIC_ACCESS,         44)            \
+       _ER(EOI_INDUCED,         45)            \
+       _ER(EPT_VIOLATION,       48)            \
+       _ER(EPT_MISCONFIG,       49)            \
+       _ER(INVEPT,              50)            \
+       _ER(PREEMPTION_TIMER,    52)            \
+       _ER(WBINVD,              54)            \
+       _ER(XSETBV,              55)            \
+       _ER(APIC_WRITE,          56)            \
+       _ER(INVPCID,             58)            \
+       _ER(PML_FULL,            62)            \
+       _ER(XSAVES,              63)            \
+       _ER(XRSTORS,             64)
+
+#define SVM_EXIT_REASONS \
+       _ER(EXIT_READ_CR0,      0x000)          \
+       _ER(EXIT_READ_CR3,      0x003)          \
+       _ER(EXIT_READ_CR4,      0x004)          \
+       _ER(EXIT_READ_CR8,      0x008)          \
+       _ER(EXIT_WRITE_CR0,     0x010)          \
+       _ER(EXIT_WRITE_CR3,     0x013)          \
+       _ER(EXIT_WRITE_CR4,     0x014)          \
+       _ER(EXIT_WRITE_CR8,     0x018)          \
+       _ER(EXIT_READ_DR0,      0x020)          \
+       _ER(EXIT_READ_DR1,      0x021)          \
+       _ER(EXIT_READ_DR2,      0x022)          \
+       _ER(EXIT_READ_DR3,      0x023)          \
+       _ER(EXIT_READ_DR4,      0x024)          \
+       _ER(EXIT_READ_DR5,      0x025)          \
+       _ER(EXIT_READ_DR6,      0x026)          \
+       _ER(EXIT_READ_DR7,      0x027)          \
+       _ER(EXIT_WRITE_DR0,     0x030)          \
+       _ER(EXIT_WRITE_DR1,     0x031)          \
+       _ER(EXIT_WRITE_DR2,     0x032)          \
+       _ER(EXIT_WRITE_DR3,     0x033)          \
+       _ER(EXIT_WRITE_DR4,     0x034)          \
+       _ER(EXIT_WRITE_DR5,     0x035)          \
+       _ER(EXIT_WRITE_DR6,     0x036)          \
+       _ER(EXIT_WRITE_DR7,     0x037)          \
+       _ER(EXIT_EXCP_BASE,     0x040)          \
+       _ER(EXIT_INTR,          0x060)          \
+       _ER(EXIT_NMI,           0x061)          \
+       _ER(EXIT_SMI,           0x062)          \
+       _ER(EXIT_INIT,          0x063)          \
+       _ER(EXIT_VINTR,         0x064)          \
+       _ER(EXIT_CR0_SEL_WRITE, 0x065)          \
+       _ER(EXIT_IDTR_READ,     0x066)          \
+       _ER(EXIT_GDTR_READ,     0x067)          \
+       _ER(EXIT_LDTR_READ,     0x068)          \
+       _ER(EXIT_TR_READ,       0x069)          \
+       _ER(EXIT_IDTR_WRITE,    0x06a)          \
+       _ER(EXIT_GDTR_WRITE,    0x06b)          \
+       _ER(EXIT_LDTR_WRITE,    0x06c)          \
+       _ER(EXIT_TR_WRITE,      0x06d)          \
+       _ER(EXIT_RDTSC,         0x06e)          \
+       _ER(EXIT_RDPMC,         0x06f)          \
+       _ER(EXIT_PUSHF,         0x070)          \
+       _ER(EXIT_POPF,          0x071)          \
+       _ER(EXIT_CPUID,         0x072)          \
+       _ER(EXIT_RSM,           0x073)          \
+       _ER(EXIT_IRET,          0x074)          \
+       _ER(EXIT_SWINT,         0x075)          \
+       _ER(EXIT_INVD,          0x076)          \
+       _ER(EXIT_PAUSE,         0x077)          \
+       _ER(EXIT_HLT,           0x078)          \
+       _ER(EXIT_INVLPG,        0x079)          \
+       _ER(EXIT_INVLPGA,       0x07a)          \
+       _ER(EXIT_IOIO,          0x07b)          \
+       _ER(EXIT_MSR,           0x07c)          \
+       _ER(EXIT_TASK_SWITCH,   0x07d)          \
+       _ER(EXIT_FERR_FREEZE,   0x07e)          \
+       _ER(EXIT_SHUTDOWN,      0x07f)          \
+       _ER(EXIT_VMRUN,         0x080)          \
+       _ER(EXIT_VMMCALL,       0x081)          \
+       _ER(EXIT_VMLOAD,        0x082)          \
+       _ER(EXIT_VMSAVE,        0x083)          \
+       _ER(EXIT_STGI,          0x084)          \
+       _ER(EXIT_CLGI,          0x085)          \
+       _ER(EXIT_SKINIT,        0x086)          \
+       _ER(EXIT_RDTSCP,        0x087)          \
+       _ER(EXIT_ICEBP,         0x088)          \
+       _ER(EXIT_WBINVD,        0x089)          \
+       _ER(EXIT_MONITOR,       0x08a)          \
+       _ER(EXIT_MWAIT,         0x08b)          \
+       _ER(EXIT_MWAIT_COND,    0x08c)          \
+       _ER(EXIT_NPF,           0x400)          \
+       _ER(EXIT_ERR,           -1)
+
+#define _ER(reason, val)       { #reason, val },
+struct str_values {
+       const char      *str;
+       int             val;
+};
+
+static struct str_values vmx_exit_reasons[] = {
+       VMX_EXIT_REASONS
+       { NULL, -1}
+};
+
+static struct str_values svm_exit_reasons[] = {
+       SVM_EXIT_REASONS
+       { NULL, -1}
+};
+
+static struct isa_exit_reasons {
+       unsigned isa;
+       struct str_values *strings;
+} isa_exit_reasons[] = {
+       { .isa = 1, .strings = vmx_exit_reasons },
+       { .isa = 2, .strings = svm_exit_reasons },
+       { }
+};
+
+static const char *find_exit_reason(unsigned isa, int val)
+{
+       struct str_values *strings = NULL;
+       int i;
+
+       for (i = 0; isa_exit_reasons[i].strings; ++i)
+               if (isa_exit_reasons[i].isa == isa) {
+                       strings = isa_exit_reasons[i].strings;
+                       break;
+               }
+       if (!strings)
+               return "UNKNOWN-ISA";
+       for (i = 0; strings[i].val >= 0; i++)
+               if (strings[i].val == val)
+                       break;
+
+       return strings[i].str;
+}
+
+static int print_exit_reason(struct trace_seq *s, struct tep_record *record,
+                            struct tep_event *event, const char *field)
+{
+       unsigned long long isa;
+       unsigned long long val;
+       const char *reason;
+
+       if (tep_get_field_val(s, event, field, record, &val, 1) < 0)
+               return -1;
+
+       if (tep_get_field_val(s, event, "isa", record, &isa, 0) < 0)
+               isa = 1;
+
+       reason = find_exit_reason(isa, val);
+       if (reason)
+               trace_seq_printf(s, "reason %s", reason);
+       else
+               trace_seq_printf(s, "reason UNKNOWN (%llu)", val);
+       return 0;
+}
+
+static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record,
+                           struct tep_event *event, void *context)
+{
+       unsigned long long info1 = 0, info2 = 0;
+
+       if (print_exit_reason(s, record, event, "exit_reason") < 0)
+               return -1;
+
+       tep_print_num_field(s, " rip 0x%lx", event, "guest_rip", record, 1);
+
+       if (tep_get_field_val(s, event, "info1", record, &info1, 0) >= 0
+           && tep_get_field_val(s, event, "info2", record, &info2, 0) >= 0)
+               trace_seq_printf(s, " info %llx %llx", info1, info2);
+
+       return 0;
+}
+
+#define KVM_EMUL_INSN_F_CR0_PE (1 << 0)
+#define KVM_EMUL_INSN_F_EFL_VM (1 << 1)
+#define KVM_EMUL_INSN_F_CS_D   (1 << 2)
+#define KVM_EMUL_INSN_F_CS_L   (1 << 3)
+
+static int kvm_emulate_insn_handler(struct trace_seq *s,
+                                   struct tep_record *record,
+                                   struct tep_event *event, void *context)
+{
+       unsigned long long rip, csbase, len, flags, failed;
+       int llen;
+       uint8_t *insn;
+       const char *disasm;
+
+       if (tep_get_field_val(s, event, "rip", record, &rip, 1) < 0)
+               return -1;
+
+       if (tep_get_field_val(s, event, "csbase", record, &csbase, 1) < 0)
+               return -1;
+
+       if (tep_get_field_val(s, event, "len", record, &len, 1) < 0)
+               return -1;
+
+       if (tep_get_field_val(s, event, "flags", record, &flags, 1) < 0)
+               return -1;
+
+       if (tep_get_field_val(s, event, "failed", record, &failed, 1) < 0)
+               return -1;
+
+       insn = tep_get_field_raw(s, event, "insn", record, &llen, 1);
+       if (!insn)
+               return -1;
+
+       disasm = disassemble(insn, len, rip,
+                            flags & KVM_EMUL_INSN_F_CR0_PE,
+                            flags & KVM_EMUL_INSN_F_EFL_VM,
+                            flags & KVM_EMUL_INSN_F_CS_D,
+                            flags & KVM_EMUL_INSN_F_CS_L);
+
+       trace_seq_printf(s, "%llx:%llx: %s%s", csbase, rip, disasm,
+                        failed ? " FAIL" : "");
+       return 0;
+}
+
+
+static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_record *record,
+                                           struct tep_event *event, void *context)
+{
+       if (print_exit_reason(s, record, event, "exit_code") < 0)
+               return -1;
+
+       tep_print_num_field(s, " info1 %llx", event, "exit_info1", record, 1);
+       tep_print_num_field(s, " info2 %llx", event, "exit_info2", record, 1);
+       tep_print_num_field(s, " int_info %llx", event, "exit_int_info", record, 1);
+       tep_print_num_field(s, " int_info_err %llx", event, "exit_int_info_err", record, 1);
+
+       return 0;
+}
+
+static int kvm_nested_vmexit_handler(struct trace_seq *s, struct tep_record *record,
+                                    struct tep_event *event, void *context)
+{
+       tep_print_num_field(s, "rip %llx ", event, "rip", record, 1);
+
+       return kvm_nested_vmexit_inject_handler(s, record, event, context);
+}
+
+union kvm_mmu_page_role {
+       unsigned word;
+       struct {
+               unsigned level:4;
+               unsigned cr4_pae:1;
+               unsigned quadrant:2;
+               unsigned direct:1;
+               unsigned access:3;
+               unsigned invalid:1;
+               unsigned nxe:1;
+               unsigned cr0_wp:1;
+               unsigned smep_and_not_wp:1;
+               unsigned smap_and_not_wp:1;
+               unsigned pad_for_nice_hex_output:8;
+               unsigned smm:8;
+       };
+};
+
+static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
+                             struct tep_event *event, void *context)
+{
+       unsigned long long val;
+       static const char *access_str[] = {
+               "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"
+       };
+       union kvm_mmu_page_role role;
+
+       if (tep_get_field_val(s, event, "role", record, &val, 1) < 0)
+               return -1;
+
+       role.word = (int)val;
+
+       /*
+        * We can only use the structure if file is of the same
+        * endianness.
+        */
+       if (tep_is_file_bigendian(event->tep) ==
+           tep_is_local_bigendian(event->tep)) {
+
+               trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s",
+                                role.level,
+                                role.quadrant,
+                                role.direct ? " direct" : "",
+                                access_str[role.access],
+                                role.invalid ? " invalid" : "",
+                                role.cr4_pae ? "" : "!",
+                                role.nxe ? "" : "!",
+                                role.cr0_wp ? "" : "!",
+                                role.smep_and_not_wp ? " smep" : "",
+                                role.smap_and_not_wp ? " smap" : "",
+                                role.smm ? " smm" : "");
+       } else
+               trace_seq_printf(s, "WORD: %08x", role.word);
+
+       tep_print_num_field(s, " root %u ",  event,
+                           "root_count", record, 1);
+
+       if (tep_get_field_val(s, event, "unsync", record, &val, 1) < 0)
+               return -1;
+
+       trace_seq_printf(s, "%s%c",  val ? "unsync" : "sync", 0);
+       return 0;
+}
+
+static int kvm_mmu_get_page_handler(struct trace_seq *s,
+                                   struct tep_record *record,
+                                   struct tep_event *event, void *context)
+{
+       unsigned long long val;
+
+       if (tep_get_field_val(s, event, "created", record, &val, 1) < 0)
+               return -1;
+
+       trace_seq_printf(s, "%s ", val ? "new" : "existing");
+
+       if (tep_get_field_val(s, event, "gfn", record, &val, 1) < 0)
+               return -1;
+
+       trace_seq_printf(s, "sp gfn %llx ", val);
+       return kvm_mmu_print_role(s, record, event, context);
+}
+
+#define PT_WRITABLE_SHIFT 1
+#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
+
+static unsigned long long
+process_is_writable_pte(struct trace_seq *s, unsigned long long *args)
+{
+       unsigned long pte = args[0];
+       return pte & PT_WRITABLE_MASK;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+       init_disassembler();
+
+       tep_register_event_handler(tep, -1, "kvm", "kvm_exit",
+                                  kvm_exit_handler, NULL);
+
+       tep_register_event_handler(tep, -1, "kvm", "kvm_emulate_insn",
+                                  kvm_emulate_insn_handler, NULL);
+
+       tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit",
+                                  kvm_nested_vmexit_handler, NULL);
+
+       tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject",
+                                  kvm_nested_vmexit_inject_handler, NULL);
+
+       tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page",
+                                  kvm_mmu_get_page_handler, NULL);
+
+       tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page",
+                                  kvm_mmu_print_role, NULL);
+
+       tep_register_event_handler(tep, -1,
+                                  "kvmmmu", "kvm_mmu_unsync_page",
+                                  kvm_mmu_print_role, NULL);
+
+       tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page",
+                                  kvm_mmu_print_role, NULL);
+
+       tep_register_event_handler(tep, -1, "kvmmmu",
+                       "kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
+                       NULL);
+
+       tep_register_print_function(tep,
+                                   process_is_writable_pte,
+                                   TEP_FUNC_ARG_INT,
+                                   "is_writable_pte",
+                                   TEP_FUNC_ARG_LONG,
+                                   TEP_FUNC_ARG_VOID);
+       return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+       tep_unregister_event_handler(tep, -1, "kvm", "kvm_exit",
+                                    kvm_exit_handler, NULL);
+
+       tep_unregister_event_handler(tep, -1, "kvm", "kvm_emulate_insn",
+                                    kvm_emulate_insn_handler, NULL);
+
+       tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit",
+                                    kvm_nested_vmexit_handler, NULL);
+
+       tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject",
+                                    kvm_nested_vmexit_inject_handler, NULL);
+
+       tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page",
+                                    kvm_mmu_get_page_handler, NULL);
+
+       tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page",
+                                    kvm_mmu_print_role, NULL);
+
+       tep_unregister_event_handler(tep, -1,
+                                    "kvmmmu", "kvm_mmu_unsync_page",
+                                    kvm_mmu_print_role, NULL);
+
+       tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page",
+                                    kvm_mmu_print_role, NULL);
+
+       tep_unregister_event_handler(tep, -1, "kvmmmu",
+                       "kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
+                       NULL);
+
+       tep_unregister_print_function(tep, process_is_writable_pte,
+                                     "is_writable_pte");
+}
diff --git a/tools/lib/traceevent/plugins/plugin_mac80211.c b/tools/lib/traceevent/plugins/plugin_mac80211.c
new file mode 100644 (file)
index 0000000..884303c
--- /dev/null
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+#include "trace-seq.h"
+
+#define INDENT 65
+
+static void print_string(struct trace_seq *s, struct tep_event *event,
+                        const char *name, const void *data)
+{
+       struct tep_format_field *f = tep_find_field(event, name);
+       int offset;
+       int length;
+
+       if (!f) {
+               trace_seq_printf(s, "NOTFOUND:%s", name);
+               return;
+       }
+
+       offset = f->offset;
+       length = f->size;
+
+       if (!strncmp(f->type, "__data_loc", 10)) {
+               unsigned long long v;
+               if (tep_read_number_field(f, data, &v)) {
+                       trace_seq_printf(s, "invalid_data_loc");
+                       return;
+               }
+               offset = v & 0xffff;
+               length = v >> 16;
+       }
+
+       trace_seq_printf(s, "%.*s", length, (char *)data + offset);
+}
+
+#define SF(fn) tep_print_num_field(s, fn ":%d", event, fn, record, 0)
+#define SFX(fn)        tep_print_num_field(s, fn ":%#x", event, fn, record, 0)
+#define SP()   trace_seq_putc(s, ' ')
+
+static int drv_bss_info_changed(struct trace_seq *s,
+                               struct tep_record *record,
+                               struct tep_event *event, void *context)
+{
+       void *data = record->data;
+
+       print_string(s, event, "wiphy_name", data);
+       trace_seq_printf(s, " vif:");
+       print_string(s, event, "vif_name", data);
+       tep_print_num_field(s, "(%d)", event, "vif_type", record, 1);
+
+       trace_seq_printf(s, "\n%*s", INDENT, "");
+       SF("assoc"); SP();
+       SF("aid"); SP();
+       SF("cts"); SP();
+       SF("shortpre"); SP();
+       SF("shortslot"); SP();
+       SF("dtimper"); SP();
+       trace_seq_printf(s, "\n%*s", INDENT, "");
+       SF("bcnint"); SP();
+       SFX("assoc_cap"); SP();
+       SFX("basic_rates"); SP();
+       SF("enable_beacon");
+       trace_seq_printf(s, "\n%*s", INDENT, "");
+       SF("ht_operation_mode");
+
+       return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+       tep_register_event_handler(tep, -1, "mac80211",
+                                  "drv_bss_info_changed",
+                                  drv_bss_info_changed, NULL);
+       return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+       tep_unregister_event_handler(tep, -1, "mac80211",
+                                    "drv_bss_info_changed",
+                                    drv_bss_info_changed, NULL);
+}
diff --git a/tools/lib/traceevent/plugins/plugin_sched_switch.c b/tools/lib/traceevent/plugins/plugin_sched_switch.c
new file mode 100644 (file)
index 0000000..957389a
--- /dev/null
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+#include "trace-seq.h"
+
+static void write_state(struct trace_seq *s, int val)
+{
+       const char states[] = "SDTtZXxW";
+       int found = 0;
+       int i;
+
+       for (i = 0; i < (sizeof(states) - 1); i++) {
+               if (!(val & (1 << i)))
+                       continue;
+
+               if (found)
+                       trace_seq_putc(s, '|');
+
+               found = 1;
+               trace_seq_putc(s, states[i]);
+       }
+
+       if (!found)
+               trace_seq_putc(s, 'R');
+}
+
+static void write_and_save_comm(struct tep_format_field *field,
+                               struct tep_record *record,
+                               struct trace_seq *s, int pid)
+{
+       const char *comm;
+       int len;
+
+       comm = (char *)(record->data + field->offset);
+       len = s->len;
+       trace_seq_printf(s, "%.*s",
+                        field->size, comm);
+
+       /* make sure the comm has a \0 at the end. */
+       trace_seq_terminate(s);
+       comm = &s->buffer[len];
+
+       /* Help out the comm to ids. This will handle dups */
+       tep_register_comm(field->event->tep, comm, pid);
+}
+
+static int sched_wakeup_handler(struct trace_seq *s,
+                               struct tep_record *record,
+                               struct tep_event *event, void *context)
+{
+       struct tep_format_field *field;
+       unsigned long long val;
+
+       if (tep_get_field_val(s, event, "pid", record, &val, 1))
+               return trace_seq_putc(s, '!');
+
+       field = tep_find_any_field(event, "comm");
+       if (field) {
+               write_and_save_comm(field, record, s, val);
+               trace_seq_putc(s, ':');
+       }
+       trace_seq_printf(s, "%lld", val);
+
+       if (tep_get_field_val(s, event, "prio", record, &val, 0) == 0)
+               trace_seq_printf(s, " [%lld]", val);
+
+       if (tep_get_field_val(s, event, "success", record, &val, 1) == 0)
+               trace_seq_printf(s, " success=%lld", val);
+
+       if (tep_get_field_val(s, event, "target_cpu", record, &val, 0) == 0)
+               trace_seq_printf(s, " CPU:%03llu", val);
+
+       return 0;
+}
+
+static int sched_switch_handler(struct trace_seq *s,
+                               struct tep_record *record,
+                               struct tep_event *event, void *context)
+{
+       struct tep_format_field *field;
+       unsigned long long val;
+
+       if (tep_get_field_val(s, event, "prev_pid", record, &val, 1))
+               return trace_seq_putc(s, '!');
+
+       field = tep_find_any_field(event, "prev_comm");
+       if (field) {
+               write_and_save_comm(field, record, s, val);
+               trace_seq_putc(s, ':');
+       }
+       trace_seq_printf(s, "%lld ", val);
+
+       if (tep_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
+               trace_seq_printf(s, "[%d] ", (int) val);
+
+       if (tep_get_field_val(s,  event, "prev_state", record, &val, 0) == 0)
+               write_state(s, val);
+
+       trace_seq_puts(s, " ==> ");
+
+       if (tep_get_field_val(s, event, "next_pid", record, &val, 1))
+               return trace_seq_putc(s, '!');
+
+       field = tep_find_any_field(event, "next_comm");
+       if (field) {
+               write_and_save_comm(field, record, s, val);
+               trace_seq_putc(s, ':');
+       }
+       trace_seq_printf(s, "%lld", val);
+
+       if (tep_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
+               trace_seq_printf(s, " [%d]", (int) val);
+
+       return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+       tep_register_event_handler(tep, -1, "sched", "sched_switch",
+                                  sched_switch_handler, NULL);
+
+       tep_register_event_handler(tep, -1, "sched", "sched_wakeup",
+                                  sched_wakeup_handler, NULL);
+
+       tep_register_event_handler(tep, -1, "sched", "sched_wakeup_new",
+                                  sched_wakeup_handler, NULL);
+       return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+       tep_unregister_event_handler(tep, -1, "sched", "sched_switch",
+                                    sched_switch_handler, NULL);
+
+       tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup",
+                                    sched_wakeup_handler, NULL);
+
+       tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup_new",
+                                    sched_wakeup_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugins/plugin_scsi.c b/tools/lib/traceevent/plugins/plugin_scsi.c
new file mode 100644 (file)
index 0000000..5d0387a
--- /dev/null
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include "event-parse.h"
+#include "trace-seq.h"
+
+typedef unsigned long sector_t;
+typedef uint64_t u64;
+typedef unsigned int u32;
+
+/*
+ *      SCSI opcodes
+ */
+#define TEST_UNIT_READY                        0x00
+#define REZERO_UNIT                    0x01
+#define REQUEST_SENSE                  0x03
+#define FORMAT_UNIT                    0x04
+#define READ_BLOCK_LIMITS              0x05
+#define REASSIGN_BLOCKS                        0x07
+#define INITIALIZE_ELEMENT_STATUS      0x07
+#define READ_6                         0x08
+#define WRITE_6                                0x0a
+#define SEEK_6                         0x0b
+#define READ_REVERSE                   0x0f
+#define WRITE_FILEMARKS                        0x10
+#define SPACE                          0x11
+#define INQUIRY                                0x12
+#define RECOVER_BUFFERED_DATA          0x14
+#define MODE_SELECT                    0x15
+#define RESERVE                                0x16
+#define RELEASE                                0x17
+#define COPY                           0x18
+#define ERASE                          0x19
+#define MODE_SENSE                     0x1a
+#define START_STOP                     0x1b
+#define RECEIVE_DIAGNOSTIC             0x1c
+#define SEND_DIAGNOSTIC                        0x1d
+#define ALLOW_MEDIUM_REMOVAL           0x1e
+
+#define READ_FORMAT_CAPACITIES         0x23
+#define SET_WINDOW                     0x24
+#define READ_CAPACITY                  0x25
+#define READ_10                                0x28
+#define WRITE_10                       0x2a
+#define SEEK_10                                0x2b
+#define POSITION_TO_ELEMENT            0x2b
+#define WRITE_VERIFY                   0x2e
+#define VERIFY                         0x2f
+#define SEARCH_HIGH                    0x30
+#define SEARCH_EQUAL                   0x31
+#define SEARCH_LOW                     0x32
+#define SET_LIMITS                     0x33
+#define PRE_FETCH                      0x34
+#define READ_POSITION                  0x34
+#define SYNCHRONIZE_CACHE              0x35
+#define LOCK_UNLOCK_CACHE              0x36
+#define READ_DEFECT_DATA               0x37
+#define MEDIUM_SCAN                    0x38
+#define COMPARE                                0x39
+#define COPY_VERIFY                    0x3a
+#define WRITE_BUFFER                   0x3b
+#define READ_BUFFER                    0x3c
+#define UPDATE_BLOCK                   0x3d
+#define READ_LONG                      0x3e
+#define WRITE_LONG                     0x3f
+#define CHANGE_DEFINITION              0x40
+#define WRITE_SAME                     0x41
+#define UNMAP                          0x42
+#define READ_TOC                       0x43
+#define READ_HEADER                    0x44
+#define GET_EVENT_STATUS_NOTIFICATION  0x4a
+#define LOG_SELECT                     0x4c
+#define LOG_SENSE                      0x4d
+#define XDWRITEREAD_10                 0x53
+#define MODE_SELECT_10                 0x55
+#define RESERVE_10                     0x56
+#define RELEASE_10                     0x57
+#define MODE_SENSE_10                  0x5a
+#define PERSISTENT_RESERVE_IN          0x5e
+#define PERSISTENT_RESERVE_OUT         0x5f
+#define VARIABLE_LENGTH_CMD            0x7f
+#define REPORT_LUNS                    0xa0
+#define SECURITY_PROTOCOL_IN           0xa2
+#define MAINTENANCE_IN                 0xa3
+#define MAINTENANCE_OUT                        0xa4
+#define MOVE_MEDIUM                    0xa5
+#define EXCHANGE_MEDIUM                        0xa6
+#define READ_12                                0xa8
+#define SERVICE_ACTION_OUT_12          0xa9
+#define WRITE_12                       0xaa
+#define SERVICE_ACTION_IN_12           0xab
+#define WRITE_VERIFY_12                        0xae
+#define VERIFY_12                      0xaf
+#define SEARCH_HIGH_12                 0xb0
+#define SEARCH_EQUAL_12                        0xb1
+#define SEARCH_LOW_12                  0xb2
+#define SECURITY_PROTOCOL_OUT          0xb5
+#define READ_ELEMENT_STATUS            0xb8
+#define SEND_VOLUME_TAG                        0xb6
+#define WRITE_LONG_2                   0xea
+#define EXTENDED_COPY                  0x83
+#define RECEIVE_COPY_RESULTS           0x84
+#define ACCESS_CONTROL_IN              0x86
+#define ACCESS_CONTROL_OUT             0x87
+#define READ_16                                0x88
+#define WRITE_16                       0x8a
+#define READ_ATTRIBUTE                 0x8c
+#define WRITE_ATTRIBUTE                        0x8d
+#define VERIFY_16                      0x8f
+#define SYNCHRONIZE_CACHE_16           0x91
+#define WRITE_SAME_16                  0x93
+#define SERVICE_ACTION_BIDIRECTIONAL   0x9d
+#define SERVICE_ACTION_IN_16           0x9e
+#define SERVICE_ACTION_OUT_16          0x9f
+/* values for service action in */
+#define        SAI_READ_CAPACITY_16            0x10
+#define SAI_GET_LBA_STATUS             0x12
+/* values for VARIABLE_LENGTH_CMD service action codes
+ * see spc4r17 Section D.3.5, table D.7 and D.8 */
+#define VLC_SA_RECEIVE_CREDENTIAL      0x1800
+/* values for maintenance in */
+#define MI_REPORT_IDENTIFYING_INFORMATION              0x05
+#define MI_REPORT_TARGET_PGS                           0x0a
+#define MI_REPORT_ALIASES                              0x0b
+#define MI_REPORT_SUPPORTED_OPERATION_CODES            0x0c
+#define MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS  0x0d
+#define MI_REPORT_PRIORITY                             0x0e
+#define MI_REPORT_TIMESTAMP                            0x0f
+#define MI_MANAGEMENT_PROTOCOL_IN                      0x10
+/* value for MI_REPORT_TARGET_PGS ext header */
+#define MI_EXT_HDR_PARAM_FMT           0x20
+/* values for maintenance out */
+#define MO_SET_IDENTIFYING_INFORMATION 0x06
+#define MO_SET_TARGET_PGS              0x0a
+#define MO_CHANGE_ALIASES              0x0b
+#define MO_SET_PRIORITY                        0x0e
+#define MO_SET_TIMESTAMP               0x0f
+#define MO_MANAGEMENT_PROTOCOL_OUT     0x10
+/* values for variable length command */
+#define XDREAD_32                      0x03
+#define XDWRITE_32                     0x04
+#define XPWRITE_32                     0x06
+#define XDWRITEREAD_32                 0x07
+#define READ_32                                0x09
+#define VERIFY_32                      0x0a
+#define WRITE_32                       0x0b
+#define WRITE_SAME_32                  0x0d
+
+#define SERVICE_ACTION16(cdb) (cdb[1] & 0x1f)
+#define SERVICE_ACTION32(cdb) ((cdb[8] << 8) | cdb[9])
+
+static const char *
+scsi_trace_misc(struct trace_seq *, unsigned char *, int);
+
+static const char *
+scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len)
+{
+       const char *ret = p->buffer + p->len;
+       sector_t lba = 0, txlen = 0;
+
+       lba |= ((cdb[1] & 0x1F) << 16);
+       lba |=  (cdb[2] << 8);
+       lba |=   cdb[3];
+       txlen = cdb[4];
+
+       trace_seq_printf(p, "lba=%llu txlen=%llu",
+                        (unsigned long long)lba, (unsigned long long)txlen);
+       trace_seq_putc(p, 0);
+       return ret;
+}
+
+static const char *
+scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len)
+{
+       const char *ret = p->buffer + p->len;
+       sector_t lba = 0, txlen = 0;
+
+       lba |= (cdb[2] << 24);
+       lba |= (cdb[3] << 16);
+       lba |= (cdb[4] << 8);
+       lba |=  cdb[5];
+       txlen |= (cdb[7] << 8);
+       txlen |=  cdb[8];
+
+       trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
+                        (unsigned long long)lba, (unsigned long long)txlen,
+                        cdb[1] >> 5);
+
+       if (cdb[0] == WRITE_SAME)
+               trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1);
+
+       trace_seq_putc(p, 0);
+       return ret;
+}
+
+static const char *
+scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len)
+{
+       const char *ret = p->buffer + p->len;
+       sector_t lba = 0, txlen = 0;
+
+       lba |= (cdb[2] << 24);
+       lba |= (cdb[3] << 16);
+       lba |= (cdb[4] << 8);
+       lba |=  cdb[5];
+       txlen |= (cdb[6] << 24);
+       txlen |= (cdb[7] << 16);
+       txlen |= (cdb[8] << 8);
+       txlen |=  cdb[9];
+
+       trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
+                        (unsigned long long)lba, (unsigned long long)txlen,
+                        cdb[1] >> 5);
+       trace_seq_putc(p, 0);
+       return ret;
+}
+
+static const char *
+scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len)
+{
+       const char *ret = p->buffer + p->len;
+       sector_t lba = 0, txlen = 0;
+
+       lba |= ((u64)cdb[2] << 56);
+       lba |= ((u64)cdb[3] << 48);
+       lba |= ((u64)cdb[4] << 40);
+       lba |= ((u64)cdb[5] << 32);
+       lba |= (cdb[6] << 24);
+       lba |= (cdb[7] << 16);
+       lba |= (cdb[8] << 8);
+       lba |=  cdb[9];
+       txlen |= (cdb[10] << 24);
+       txlen |= (cdb[11] << 16);
+       txlen |= (cdb[12] << 8);
+       txlen |=  cdb[13];
+
+       trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
+                        (unsigned long long)lba, (unsigned long long)txlen,
+                        cdb[1] >> 5);
+
+       if (cdb[0] == WRITE_SAME_16)
+               trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1);
+
+       trace_seq_putc(p, 0);
+       return ret;
+}
+
+static const char *
+scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len)
+{
+       const char *ret = p->buffer + p->len, *cmd;
+       sector_t lba = 0, txlen = 0;
+       u32 ei_lbrt = 0;
+
+       switch (SERVICE_ACTION32(cdb)) {
+       case READ_32:
+               cmd = "READ";
+               break;
+       case VERIFY_32:
+               cmd = "VERIFY";
+               break;
+       case WRITE_32:
+               cmd = "WRITE";
+               break;
+       case WRITE_SAME_32:
+               cmd = "WRITE_SAME";
+               break;
+       default:
+               trace_seq_printf(p, "UNKNOWN");
+               goto out;
+       }
+
+       lba |= ((u64)cdb[12] << 56);
+       lba |= ((u64)cdb[13] << 48);
+       lba |= ((u64)cdb[14] << 40);
+       lba |= ((u64)cdb[15] << 32);
+       lba |= (cdb[16] << 24);
+       lba |= (cdb[17] << 16);
+       lba |= (cdb[18] << 8);
+       lba |=  cdb[19];
+       ei_lbrt |= (cdb[20] << 24);
+       ei_lbrt |= (cdb[21] << 16);
+       ei_lbrt |= (cdb[22] << 8);
+       ei_lbrt |=  cdb[23];
+       txlen |= (cdb[28] << 24);
+       txlen |= (cdb[29] << 16);
+       txlen |= (cdb[30] << 8);
+       txlen |=  cdb[31];
+
+       trace_seq_printf(p, "%s_32 lba=%llu txlen=%llu protect=%u ei_lbrt=%u",
+                        cmd, (unsigned long long)lba,
+                        (unsigned long long)txlen, cdb[10] >> 5, ei_lbrt);
+
+       if (SERVICE_ACTION32(cdb) == WRITE_SAME_32)
+               trace_seq_printf(p, " unmap=%u", cdb[10] >> 3 & 1);
+
+out:
+       trace_seq_putc(p, 0);
+       return ret;
+}
+
+static const char *
+scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len)
+{
+       const char *ret = p->buffer + p->len;
+       unsigned int regions = cdb[7] << 8 | cdb[8];
+
+       trace_seq_printf(p, "regions=%u", (regions - 8) / 16);
+       trace_seq_putc(p, 0);
+       return ret;
+}
+
+static const char *
+scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len)
+{
+       const char *ret = p->buffer + p->len, *cmd;
+       sector_t lba = 0;
+       u32 alloc_len = 0;
+
+       switch (SERVICE_ACTION16(cdb)) {
+       case SAI_READ_CAPACITY_16:
+               cmd = "READ_CAPACITY_16";
+               break;
+       case SAI_GET_LBA_STATUS:
+               cmd = "GET_LBA_STATUS";
+               break;
+       default:
+               trace_seq_printf(p, "UNKNOWN");
+               goto out;
+       }
+
+       lba |= ((u64)cdb[2] << 56);
+       lba |= ((u64)cdb[3] << 48);
+       lba |= ((u64)cdb[4] << 40);
+       lba |= ((u64)cdb[5] << 32);
+       lba |= (cdb[6] << 24);
+       lba |= (cdb[7] << 16);
+       lba |= (cdb[8] << 8);
+       lba |=  cdb[9];
+       alloc_len |= (cdb[10] << 24);
+       alloc_len |= (cdb[11] << 16);
+       alloc_len |= (cdb[12] << 8);
+       alloc_len |=  cdb[13];
+
+       trace_seq_printf(p, "%s lba=%llu alloc_len=%u", cmd,
+                        (unsigned long long)lba, alloc_len);
+
+out:
+       trace_seq_putc(p, 0);
+       return ret;
+}
+
+static const char *
+scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len)
+{
+       switch (SERVICE_ACTION32(cdb)) {
+       case READ_32:
+       case VERIFY_32:
+       case WRITE_32:
+       case WRITE_SAME_32:
+               return scsi_trace_rw32(p, cdb, len);
+       default:
+               return scsi_trace_misc(p, cdb, len);
+       }
+}
+
+static const char *
+scsi_trace_misc(struct trace_seq *p, unsigned char *cdb, int len)
+{
+       const char *ret = p->buffer + p->len;
+
+       trace_seq_printf(p, "-");
+       trace_seq_putc(p, 0);
+       return ret;
+}
+
+const char *
+scsi_trace_parse_cdb(struct trace_seq *p, unsigned char *cdb, int len)
+{
+       switch (cdb[0]) {
+       case READ_6:
+       case WRITE_6:
+               return scsi_trace_rw6(p, cdb, len);
+       case READ_10:
+       case VERIFY:
+       case WRITE_10:
+       case WRITE_SAME:
+               return scsi_trace_rw10(p, cdb, len);
+       case READ_12:
+       case VERIFY_12:
+       case WRITE_12:
+               return scsi_trace_rw12(p, cdb, len);
+       case READ_16:
+       case VERIFY_16:
+       case WRITE_16:
+       case WRITE_SAME_16:
+               return scsi_trace_rw16(p, cdb, len);
+       case UNMAP:
+               return scsi_trace_unmap(p, cdb, len);
+       case SERVICE_ACTION_IN_16:
+               return scsi_trace_service_action_in(p, cdb, len);
+       case VARIABLE_LENGTH_CMD:
+               return scsi_trace_varlen(p, cdb, len);
+       default:
+               return scsi_trace_misc(p, cdb, len);
+       }
+}
+
+unsigned long long process_scsi_trace_parse_cdb(struct trace_seq *s,
+                                               unsigned long long *args)
+{
+       scsi_trace_parse_cdb(s, (unsigned char *) (unsigned long) args[1], args[2]);
+       return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+       tep_register_print_function(tep,
+                                   process_scsi_trace_parse_cdb,
+                                   TEP_FUNC_ARG_STRING,
+                                   "scsi_trace_parse_cdb",
+                                   TEP_FUNC_ARG_PTR,
+                                   TEP_FUNC_ARG_PTR,
+                                   TEP_FUNC_ARG_INT,
+                                   TEP_FUNC_ARG_VOID);
+       return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+       tep_unregister_print_function(tep, process_scsi_trace_parse_cdb,
+                                     "scsi_trace_parse_cdb");
+}
diff --git a/tools/lib/traceevent/plugins/plugin_xen.c b/tools/lib/traceevent/plugins/plugin_xen.c
new file mode 100644 (file)
index 0000000..993b208
--- /dev/null
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "event-parse.h"
+#include "trace-seq.h"
+
+#define __HYPERVISOR_set_trap_table                    0
+#define __HYPERVISOR_mmu_update                                1
+#define __HYPERVISOR_set_gdt                           2
+#define __HYPERVISOR_stack_switch                      3
+#define __HYPERVISOR_set_callbacks                     4
+#define __HYPERVISOR_fpu_taskswitch                    5
+#define __HYPERVISOR_sched_op_compat                   6
+#define __HYPERVISOR_dom0_op                           7
+#define __HYPERVISOR_set_debugreg                      8
+#define __HYPERVISOR_get_debugreg                      9
+#define __HYPERVISOR_update_descriptor                 10
+#define __HYPERVISOR_memory_op                         12
+#define __HYPERVISOR_multicall                         13
+#define __HYPERVISOR_update_va_mapping                 14
+#define __HYPERVISOR_set_timer_op                      15
+#define __HYPERVISOR_event_channel_op_compat           16
+#define __HYPERVISOR_xen_version                       17
+#define __HYPERVISOR_console_io                                18
+#define __HYPERVISOR_physdev_op_compat                 19
+#define __HYPERVISOR_grant_table_op                    20
+#define __HYPERVISOR_vm_assist                         21
+#define __HYPERVISOR_update_va_mapping_otherdomain     22
+#define __HYPERVISOR_iret                              23 /* x86 only */
+#define __HYPERVISOR_vcpu_op                           24
+#define __HYPERVISOR_set_segment_base                  25 /* x86/64 only */
+#define __HYPERVISOR_mmuext_op                         26
+#define __HYPERVISOR_acm_op                            27
+#define __HYPERVISOR_nmi_op                            28
+#define __HYPERVISOR_sched_op                          29
+#define __HYPERVISOR_callback_op                       30
+#define __HYPERVISOR_xenoprof_op                       31
+#define __HYPERVISOR_event_channel_op                  32
+#define __HYPERVISOR_physdev_op                                33
+#define __HYPERVISOR_hvm_op                            34
+#define __HYPERVISOR_tmem_op                           38
+
+/* Architecture-specific hypercall definitions. */
+#define __HYPERVISOR_arch_0                            48
+#define __HYPERVISOR_arch_1                            49
+#define __HYPERVISOR_arch_2                            50
+#define __HYPERVISOR_arch_3                            51
+#define __HYPERVISOR_arch_4                            52
+#define __HYPERVISOR_arch_5                            53
+#define __HYPERVISOR_arch_6                            54
+#define __HYPERVISOR_arch_7                            55
+
+#define N(x)   [__HYPERVISOR_##x] = "("#x")"
+static const char *xen_hypercall_names[] = {
+       N(set_trap_table),
+       N(mmu_update),
+       N(set_gdt),
+       N(stack_switch),
+       N(set_callbacks),
+       N(fpu_taskswitch),
+       N(sched_op_compat),
+       N(dom0_op),
+       N(set_debugreg),
+       N(get_debugreg),
+       N(update_descriptor),
+       N(memory_op),
+       N(multicall),
+       N(update_va_mapping),
+       N(set_timer_op),
+       N(event_channel_op_compat),
+       N(xen_version),
+       N(console_io),
+       N(physdev_op_compat),
+       N(grant_table_op),
+       N(vm_assist),
+       N(update_va_mapping_otherdomain),
+       N(iret),
+       N(vcpu_op),
+       N(set_segment_base),
+       N(mmuext_op),
+       N(acm_op),
+       N(nmi_op),
+       N(sched_op),
+       N(callback_op),
+       N(xenoprof_op),
+       N(event_channel_op),
+       N(physdev_op),
+       N(hvm_op),
+
+/* Architecture-specific hypercall definitions. */
+       N(arch_0),
+       N(arch_1),
+       N(arch_2),
+       N(arch_3),
+       N(arch_4),
+       N(arch_5),
+       N(arch_6),
+       N(arch_7),
+};
+#undef N
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+static const char *xen_hypercall_name(unsigned op)
+{
+       if (op < ARRAY_SIZE(xen_hypercall_names) &&
+           xen_hypercall_names[op] != NULL)
+               return xen_hypercall_names[op];
+
+       return "";
+}
+
+unsigned long long process_xen_hypercall_name(struct trace_seq *s,
+                                             unsigned long long *args)
+{
+       unsigned int op = args[0];
+
+       trace_seq_printf(s, "%s", xen_hypercall_name(op));
+       return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+       tep_register_print_function(tep,
+                                   process_xen_hypercall_name,
+                                   TEP_FUNC_ARG_STRING,
+                                   "xen_hypercall_name",
+                                   TEP_FUNC_ARG_INT,
+                                   TEP_FUNC_ARG_VOID);
+       return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+       tep_unregister_print_function(tep, process_xen_hypercall_name,
+                                     "xen_hypercall_name");
+}
index 176f2f0..044c9a3 100644 (file)
@@ -138,7 +138,6 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
                "do_task_dead",
                "__module_put_and_exit",
                "complete_and_exit",
-               "kvm_spurious_fault",
                "__reiserfs_panic",
                "lbug_with_loc",
                "fortify_panic",
index 356b23a..2b62ba1 100644 (file)
@@ -71,6 +71,9 @@ ifdef::backend-docbook[]
 [header]
 template::[header-declarations]
 <refentry>
+ifdef::perf_date[]
+<refentryinfo><date>{perf_date}</date></refentryinfo>
+endif::perf_date[]
 <refmeta>
 <refentrytitle>{mantitle}</refentrytitle>
 <manvolnum>{manvolnum}</manvolnum>
index 4c62b07..52152d1 100644 (file)
@@ -36,8 +36,8 @@ III/ Jitdump file header format
 Each jitdump file starts with a fixed size header containing the following fields in order:
 
 
-* uint32_t magic     : a magic number tagging the file type. The value is 4-byte long and represents the string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can be used to detect the endianness of the file
-* uint32_t version   : a 4-byte value representing the format version. It is currently set to 2
+* uint32_t magic     : a magic number tagging the file type. The value is 4-byte long and represents the string "JiTD" in ASCII form. It written is as 0x4A695444. The reader will detect an endian mismatch when it reads 0x4454694a.
+* uint32_t version   : a 4-byte value representing the format version. It is currently set to 1
 * uint32_t total_size: size in bytes of file header
 * uint32_t elf_mach  : ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h)
 * uint32_t pad1      : padding. Reserved for future use
index a269d78..46f7fba 100644 (file)
@@ -924,7 +924,7 @@ ifndef NO_JVMTI
     JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
   else
     ifneq (,$(wildcard /usr/sbin/alternatives))
-      JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
+      JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed -e 's%/jre/bin/java.%%g' -e 's%/bin/java.%%g')
     endif
   endif
   ifndef JDIR
index f9807d8..902c792 100644 (file)
@@ -292,7 +292,7 @@ endif
 LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
 export LIBTRACEEVENT
 
-LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list
+LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)plugins/libtraceevent-dynamic-list
 
 #
 # The static build has no dynsym table, so this does not work for
@@ -567,7 +567,7 @@ all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
 # Create python binding output directory if not already present
 _dummy := $(shell [ -d '$(OUTPUT)python' ] || mkdir -p '$(OUTPUT)python')
 
-$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
+$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) $(LIBPERF)
        $(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \
         CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
          $(PYTHON_WORD) util/setup.py \
@@ -737,7 +737,7 @@ libtraceevent_plugins: FORCE
        $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
 
 $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
-       $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
+       $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)plugins/libtraceevent-dynamic-list
 
 $(LIBTRACEEVENT)-clean:
        $(call QUIET_CLEAN, libtraceevent)
index e1d4b48..2ff6ced 100644 (file)
@@ -37,7 +37,7 @@ static int arm__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
 
        arm = zalloc(sizeof(*arm));
        if (!arm)
-               return -1;
+               return ENOMEM;
 
 #define ARM_CONDS "(cc|cs|eq|ge|gt|hi|le|ls|lt|mi|ne|pl|vc|vs)"
        err = regcomp(&arm->call_insn, "^blx?" ARM_CONDS "?$", REG_EXTENDED);
@@ -59,5 +59,5 @@ out_free_call:
        regfree(&arm->call_insn);
 out_free_arm:
        free(arm);
-       return -1;
+       return SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP;
 }
index c32db09..ede040c 100644 (file)
 #include "../../util/event.h"
 #include "../../util/evlist.h"
 #include "../../util/evsel.h"
+#include "../../util/evsel_config.h"
 #include "../../util/pmu.h"
 #include "../../util/cs-etm.h"
-#include "../../util/util.h"
+#include <internal/lib.h> // page_size
 #include "../../util/session.h"
 
 #include <errno.h>
@@ -416,7 +417,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
                if (err)
                        goto out;
 
-               tracking_evsel = perf_evlist__last(evlist);
+               tracking_evsel = evlist__last(evlist);
                perf_evlist__set_tracking_event(evlist, tracking_evsel);
 
                tracking_evsel->core.attr.freq = 0;
@@ -648,7 +649,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
        if (priv_size != cs_etm_info_priv_size(itr, session->evlist))
                return -EINVAL;
 
-       if (!session->evlist->nr_mmaps)
+       if (!session->evlist->core.nr_mmaps)
                return -EINVAL;
 
        /* If the cpu_map is empty all online CPUs are involved */
index 43aa93e..037e292 100644 (file)
@@ -95,7 +95,7 @@ static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
 
        arm = zalloc(sizeof(*arm));
        if (!arm)
-               return -1;
+               return ENOMEM;
 
        /* bl, blr */
        err = regcomp(&arm->call_insn, "^blr?$", REG_EXTENDED);
@@ -118,5 +118,5 @@ out_free_call:
        regfree(&arm->call_insn);
 out_free_arm:
        free(arm);
-       return -1;
+       return SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP;
 }
index 4b36469..eba6541 100644 (file)
@@ -16,7 +16,7 @@
 #include "../../util/evsel.h"
 #include "../../util/evlist.h"
 #include "../../util/session.h"
-#include "../../util/util.h"
+#include <internal/lib.h> // page_size
 #include "../../util/pmu.h"
 #include "../../util/debug.h"
 #include "../../util/auxtrace.h"
@@ -51,7 +51,7 @@ static int arm_spe_info_fill(struct auxtrace_record *itr,
        if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE)
                return -EINVAL;
 
-       if (!session->evlist->nr_mmaps)
+       if (!session->evlist->core.nr_mmaps)
                return -EINVAL;
 
        auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
@@ -129,7 +129,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
        if (err)
                return err;
 
-       tracking_evsel = perf_evlist__last(evlist);
+       tracking_evsel = evlist__last(evlist);
        perf_evlist__set_tracking_event(evlist, tracking_evsel);
 
        tracking_evsel->core.attr.freq = 0;
index b047b88..917b97d 100644 (file)
@@ -11,7 +11,6 @@
 #include <dwarf-regs.h>
 #include <linux/ptrace.h> /* for struct user_pt_regs */
 #include <linux/stringify.h>
-#include "util.h"
 
 struct pt_regs_dwarfnum {
        const char *name;
index e41defa..a32e4b7 100644 (file)
@@ -1,5 +1,7 @@
 #include <stdio.h>
 #include <stdlib.h>
+#include <perf/cpumap.h>
+#include <internal/cpumap.h>
 #include <api/fs/fs.h>
 #include "debug.h"
 #include "header.h"
@@ -29,7 +31,7 @@ char *get_cpuid_str(struct perf_pmu *pmu)
 
        /* read midr from list of cpus mapped to this pmu */
        cpus = perf_cpu_map__get(pmu->cpus);
-       for (cpu = 0; cpu < cpus->nr; cpu++) {
+       for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
                scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR,
                                sysfs, cpus->map[cpu]);
 
index 002520d..1495a95 100644 (file)
@@ -5,8 +5,8 @@
 #include <libunwind.h>
 #include "perf_regs.h"
 #include "../../util/unwind.h"
-#include "../../util/debug.h"
 #endif
+#include "../../util/debug.h"
 
 int LIBUNWIND__ARCH_REG_ID(int regnum)
 {
index 4952890..0c4f4ca 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/ptrace.h>
 #include <linux/kernel.h>
 #include <linux/stringify.h>
-#include "util.h"
 
 struct pt_regs_dwarfnum {
        const char *name;
index 0b24266..3b4cdfc 100644 (file)
@@ -1,12 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <sys/types.h>
+#include <errno.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <linux/stringify.h>
 #include "header.h"
-#include "util.h"
 
 #define mfspr(rn)       ({unsigned long rval; \
                         asm volatile("mfspr %0," __stringify(rn) \
@@ -31,7 +31,7 @@ get_cpuid(char *buffer, size_t sz)
                buffer[nb-1] = '\0';
                return 0;
        }
-       return -1;
+       return ENOBUFS;
 }
 
 char *
index f0dbf7b..9cc1c4a 100644 (file)
@@ -5,9 +5,11 @@
 #include "util/debug.h"
 #include "util/evsel.h"
 #include "util/evlist.h"
+#include "util/pmu.h"
 
 #include "book3s_hv_exits.h"
 #include "book3s_hcalls.h"
+#include <subcmd/parse-options.h>
 
 #define NR_TPS 4
 
@@ -172,3 +174,46 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
 
        return ret;
 }
+
+/*
+ * Incase of powerpc architecture, pmu registers are programmable
+ * by guest kernel. So monitoring guest via host may not provide
+ * valid samples with default 'cycles' event. It is better to use
+ * 'trace_imc/trace_cycles' event for guest profiling, since it
+ * can track the guest instruction pointer in the trace-record.
+ *
+ * Function to parse the arguments and return appropriate values.
+ */
+int kvm_add_default_arch_event(int *argc, const char **argv)
+{
+       const char **tmp;
+       bool event = false;
+       int i, j = *argc;
+
+       const struct option event_options[] = {
+               OPT_BOOLEAN('e', "event", &event, NULL),
+               OPT_END()
+       };
+
+       tmp = calloc(j + 1, sizeof(char *));
+       if (!tmp)
+               return -EINVAL;
+
+       for (i = 0; i < j; i++)
+               tmp[i] = argv[i];
+
+       parse_options(j, tmp, event_options, NULL, PARSE_OPT_KEEP_UNKNOWN);
+       if (!event) {
+               if (pmu_have_event("trace_imc", "trace_cycles")) {
+                       argv[j++] = strdup("-e");
+                       argv[j++] = strdup("trace_imc/trace_cycles/");
+                       *argc += 2;
+               } else {
+                       free(tmp);
+                       return -EINVAL;
+               }
+       }
+
+       free(tmp);
+       return 0;
+}
index fc9c2f5..3018a05 100644 (file)
@@ -13,6 +13,7 @@
 #include "util/callchain.h"
 #include "util/debug.h"
 #include "util/dso.h"
+#include "util/event.h" // struct ip_callchain
 #include "util/map.h"
 #include "util/symbol.h"
 
index 8a4b717..abb7a12 100644 (file)
@@ -4,7 +4,6 @@
  * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
  */
 
-#include "debug.h"
 #include "dso.h"
 #include "symbol.h"
 #include "map.h"
index cb19878..6ac8887 100644 (file)
@@ -4,6 +4,7 @@ PERF_HAVE_DWARF_REGS := 1
 endif
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+PERF_HAVE_JITDUMP := 1
 
 #
 # Syscall table generation for perf
index 89bb8f2..a50e70b 100644 (file)
@@ -164,8 +164,10 @@ static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
        if (!arch->initialized) {
                arch->initialized = true;
                arch->associate_instruction_ops = s390__associate_ins_ops;
-               if (cpuid)
-                       err = s390__cpuid_parse(arch, cpuid);
+               if (cpuid) {
+                       if (s390__cpuid_parse(arch, cpuid))
+                               err = SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING;
+               }
        }
 
        return err;
index b0fb70e..0db5c58 100644 (file)
@@ -1,4 +1,5 @@
 #include <stdbool.h>
+#include <stdlib.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/bitops.h>
index 8b0b018..7933f68 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include <sys/types.h>
+#include <errno.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <string.h>
@@ -54,7 +55,7 @@ int get_cpuid(char *buffer, size_t sz)
 
        sysinfo = fopen(SYSINFO, "r");
        if (sysinfo == NULL)
-               return -1;
+               return errno;
 
        while ((read = getline(&line, &line_sz, sysinfo)) != -1) {
                if (!strncmp(line, SYSINFO_MANU, strlen(SYSINFO_MANU))) {
@@ -89,7 +90,7 @@ int get_cpuid(char *buffer, size_t sz)
 
        /* Missing manufacturer, type or model information should not happen */
        if (!manufacturer[0] || !type[0] || !model[0])
-               return -1;
+               return EINVAL;
 
        /*
         * Scan /proc/service_levels and return the CPU-MF counter facility
@@ -133,14 +134,14 @@ skip_sysinfo:
        else
                nbytes = snprintf(buffer, sz, "%s,%s,%s", manufacturer, type,
                                  model);
-       return (nbytes >= sz) ? -1 : 0;
+       return (nbytes >= sz) ? ENOBUFS : 0;
 }
 
 char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
 {
        char *buf = malloc(128);
 
-       if (buf && get_cpuid(buf, 128) < 0)
+       if (buf && get_cpuid(buf, 128))
                zfree(&buf);
        return buf;
 }
index c8c86a0..724efb2 100644 (file)
@@ -2,7 +2,7 @@
 #include <unistd.h>
 #include <stdio.h>
 #include <string.h>
-#include "util.h"
+#include <internal/lib.h> // page_size
 #include "machine.h"
 #include "api/fs/fs.h"
 #include "debug.h"
index 44f5aba..7eb5621 100644 (file)
@@ -196,8 +196,10 @@ static int x86__annotate_init(struct arch *arch, char *cpuid)
        if (arch->initialized)
                return 0;
 
-       if (cpuid)
-               err = x86__cpuid_parse(arch, cpuid);
+       if (cpuid) {
+               if (x86__cpuid_parse(arch, cpuid))
+                       err = SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING;
+       }
 
        arch->initialized = true;
        return err;
index 3b5cc33..3ec562a 100644 (file)
@@ -5,7 +5,7 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "arch-tests.h"
-#include "util.h"
+#include <internal/lib.h> // page_size
 
 #include <signal.h>
 #include <sys/mman.h>
@@ -63,9 +63,9 @@ int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subt
                goto out;
        }
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
        if (!evsel) {
-               pr_debug("perf_evlist__first failed\n");
+               pr_debug("evlist__first failed\n");
                goto out;
        }
 
index eb36359..fa94795 100644 (file)
@@ -15,9 +15,9 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "record.h"
 #include "tsc.h"
+#include "util/mmap.h"
 #include "tests/tests.h"
 
 #include "arch-tests.h"
@@ -66,7 +66,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
        union perf_event *event;
        u64 test_tsc, comm1_tsc, comm2_tsc;
        u64 test_time, comm1_time = 0, comm2_time = 0;
-       struct perf_mmap *md;
+       struct mmap *md;
 
        threads = thread_map__new(-1, getpid(), UINT_MAX);
        CHECK_NOT_NULL__(threads);
@@ -83,7 +83,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 
        perf_evlist__config(evlist, &opts, NULL);
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
 
        evsel->core.attr.comm = 1;
        evsel->core.attr.disabled = 1;
@@ -91,9 +91,9 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 
        CHECK__(evlist__open(evlist));
 
-       CHECK__(perf_evlist__mmap(evlist, UINT_MAX));
+       CHECK__(evlist__mmap(evlist, UINT_MAX));
 
-       pc = evlist->mmap[0].base;
+       pc = evlist->mmap[0].core.base;
        ret = perf_read_tsc_conversion(pc, &tc);
        if (ret) {
                if (ret == -EOPNOTSUPP) {
@@ -115,7 +115,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 
        evlist__disable(evlist);
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
                md = &evlist->mmap[i];
                if (perf_mmap__read_init(md) < 0)
                        continue;
index 6e67cee..1ea9166 100644 (file)
@@ -13,7 +13,7 @@
 #include "tests/tests.h"
 #include "cloexec.h"
 #include "event.h"
-#include "util.h"
+#include <internal/lib.h> // page_size
 #include "arch-tests.h"
 
 static u64 rdpmc(unsigned int counter)
index 9876c7a..3e67915 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "../../../../arch/x86/include/asm/insn.h"
 #include "archinsn.h"
+#include "event.h"
 #include "machine.h"
 #include "thread.h"
 #include "symbol.h"
index a3a0b68..d357c62 100644 (file)
@@ -3,6 +3,8 @@
 #include <linux/string.h>
 #include <linux/zalloc.h>
 
+#include "../../util/event.h"
+#include "../../util/synthetic-events.h"
 #include "../../util/machine.h"
 #include "../../util/tool.h"
 #include "../../util/map.h"
index 662ecf8..aa6deb4 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <sys/types.h>
+#include <errno.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -58,7 +59,7 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt)
                buffer[nb-1] = '\0';
                return 0;
        }
-       return -1;
+       return ENOBUFS;
 }
 
 int
index d263430..f7f68a5 100644 (file)
@@ -15,6 +15,7 @@
 #include "../../util/event.h"
 #include "../../util/evsel.h"
 #include "../../util/evlist.h"
+#include "../../util/mmap.h"
 #include "../../util/session.h"
 #include "../../util/pmu.h"
 #include "../../util/debug.h"
@@ -22,7 +23,7 @@
 #include "../../util/tsc.h"
 #include "../../util/auxtrace.h"
 #include "../../util/intel-bts.h"
-#include "../../util/util.h"
+#include <internal/lib.h> // page_size
 
 #define KiB(x) ((x) * 1024)
 #define MiB(x) ((x) * 1024 * 1024)
@@ -74,10 +75,10 @@ static int intel_bts_info_fill(struct auxtrace_record *itr,
        if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE)
                return -EINVAL;
 
-       if (!session->evlist->nr_mmaps)
+       if (!session->evlist->core.nr_mmaps)
                return -EINVAL;
 
-       pc = session->evlist->mmap[0].base;
+       pc = session->evlist->mmap[0].core.base;
        if (pc) {
                err = perf_read_tsc_conversion(pc, &tc);
                if (err) {
@@ -230,7 +231,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
                if (err)
                        return err;
 
-               tracking_evsel = perf_evlist__last(evlist);
+               tracking_evsel = evlist__last(evlist);
 
                perf_evlist__set_tracking_event(evlist, tracking_evsel);
 
index cb7cf16..d6d2625 100644 (file)
@@ -18,6 +18,7 @@
 #include "../../util/evlist.h"
 #include "../../util/evsel.h"
 #include "../../util/cpumap.h"
+#include "../../util/mmap.h"
 #include <subcmd/parse-options.h>
 #include "../../util/parse-events.h"
 #include "../../util/pmu.h"
@@ -26,7 +27,7 @@
 #include "../../util/record.h"
 #include "../../util/target.h"
 #include "../../util/tsc.h"
-#include "../../util/util.h"
+#include <internal/lib.h> // page_size
 #include "../../util/intel-pt.h"
 
 #define KiB(x) ((x) * 1024)
@@ -351,10 +352,10 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
        filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu);
        filter_str_len = filter ? strlen(filter) : 0;
 
-       if (!session->evlist->nr_mmaps)
+       if (!session->evlist->core.nr_mmaps)
                return -EINVAL;
 
-       pc = session->evlist->mmap[0].base;
+       pc = session->evlist->mmap[0].core.base;
        if (pc) {
                err = perf_read_tsc_conversion(pc, &tc);
                if (err) {
@@ -416,12 +417,12 @@ static int intel_pt_track_switches(struct evlist *evlist)
                return err;
        }
 
-       evsel = perf_evlist__last(evlist);
+       evsel = evlist__last(evlist);
 
        perf_evsel__set_sample_bit(evsel, CPU);
        perf_evsel__set_sample_bit(evsel, TIME);
 
-       evsel->system_wide = true;
+       evsel->core.system_wide = true;
        evsel->no_aux_samples = true;
        evsel->immediate = true;
 
@@ -716,13 +717,13 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
                                if (err)
                                        return err;
 
-                               switch_evsel = perf_evlist__last(evlist);
+                               switch_evsel = evlist__last(evlist);
 
                                switch_evsel->core.attr.freq = 0;
                                switch_evsel->core.attr.sample_period = 1;
                                switch_evsel->core.attr.context_switch = 1;
 
-                               switch_evsel->system_wide = true;
+                               switch_evsel->core.system_wide = true;
                                switch_evsel->no_aux_samples = true;
                                switch_evsel->immediate = true;
 
@@ -774,7 +775,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
                if (err)
                        return err;
 
-               tracking_evsel = perf_evlist__last(evlist);
+               tracking_evsel = evlist__last(evlist);
 
                perf_evlist__set_tracking_event(evlist, tracking_evsel);
 
index 1e9ec78..e17e080 100644 (file)
@@ -1,9 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/types.h>
 #include <linux/string.h>
+#include <limits.h>
 #include <stdlib.h>
 
-#include "../../util/util.h"
+#include <internal/lib.h> // page_size
 #include "../../util/machine.h"
 #include "../../util/map.h"
 #include "../../util/symbol.h"
index c5197a1..2f55afb 100644 (file)
@@ -8,6 +8,8 @@
 #include <linux/types.h>
 #include <asm/barrier.h>
 #include "../../../util/debug.h"
+#include "../../../util/event.h"
+#include "../../../util/synthetic-events.h"
 #include "../../../util/tsc.h"
 
 int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
index 05920e3..4735797 100644 (file)
@@ -1,11 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <errno.h>
+#include "../../util/debug.h"
 #ifndef REMOTE_UNWIND_LIBUNWIND
 #include <libunwind.h>
 #include "perf_regs.h"
 #include "../../util/unwind.h"
-#include "../../util/debug.h"
 #endif
 
 #ifdef HAVE_ARCH_X86_64_SUPPORT
index d1caa4a..bb617e5 100644 (file)
 #include <sys/resource.h>
 #include <sys/epoll.h>
 #include <sys/eventfd.h>
+#include <internal/cpumap.h>
 #include <perf/cpumap.h>
 
 #include "../util/stat.h"
 #include <subcmd/parse-options.h>
 #include "bench.h"
-#include "cpumap.h"
 
 #include <err.h>
 
index f6b4472..7af6944 100644 (file)
 #include <sys/epoll.h>
 #include <sys/eventfd.h>
 #include <sys/types.h>
+#include <internal/cpumap.h>
 #include <perf/cpumap.h>
 
 #include "../util/stat.h"
 #include <subcmd/parse-options.h>
 #include "bench.h"
-#include "cpumap.h"
 
 #include <err.h>
 
index 80e1389..8ba0c33 100644 (file)
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
 #include <sys/time.h>
+#include <internal/cpumap.h>
 #include <perf/cpumap.h>
 
 #include "../util/stat.h"
 #include <subcmd/parse-options.h>
 #include "bench.h"
 #include "futex.h"
-#include "cpumap.h"
 
 #include <err.h>
 
index c5d6d0a..d0cae81 100644 (file)
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
 #include <errno.h>
+#include <internal/cpumap.h>
 #include <perf/cpumap.h>
 #include "bench.h"
 #include "futex.h"
-#include "cpumap.h"
 
 #include <err.h>
 #include <stdlib.h>
index 75d3418..a00a689 100644 (file)
 #include <linux/kernel.h>
 #include <linux/time64.h>
 #include <errno.h>
+#include <internal/cpumap.h>
 #include <perf/cpumap.h>
 #include "bench.h"
 #include "futex.h"
-#include "cpumap.h"
 
 #include <err.h>
 #include <stdlib.h>
index 163fe16..a053cf2 100644 (file)
@@ -29,7 +29,8 @@ int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe
 #include <linux/time64.h>
 #include <errno.h>
 #include "futex.h"
-#include "cpumap.h"
+#include <internal/cpumap.h>
+#include <perf/cpumap.h>
 
 #include <err.h>
 #include <stdlib.h>
index 77dcdc1..df81009 100644 (file)
 #include <linux/kernel.h>
 #include <linux/time64.h>
 #include <errno.h>
+#include <internal/cpumap.h>
 #include <perf/cpumap.h>
 #include "bench.h"
 #include "futex.h"
-#include "cpumap.h"
 
 #include <err.h>
 #include <stdlib.h>
index 62b8ef4..5797253 100644 (file)
@@ -9,7 +9,6 @@
 /* For the CLR_() macros */
 #include <pthread.h>
 
-#include "../builtin.h"
 #include <subcmd/parse-options.h>
 #include "../util/cloexec.h"
 
index c63eb9a..97e4a4f 100644 (file)
@@ -10,9 +10,7 @@
  *
  */
 
-#include "../util/util.h"
 #include <subcmd/parse-options.h>
-#include "../builtin.h"
 #include "bench.h"
 
 /* Test groups of 20 processes spraying to 20 receivers */
index 35b07f1..3c88d1f 100644 (file)
@@ -9,9 +9,7 @@
  *  http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
  * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  */
-#include "../util/util.h"
 #include <subcmd/parse-options.h>
-#include "../builtin.h"
 #include "bench.h"
 
 #include <unistd.h>
index 4e4d2e7..8db8fc9 100644 (file)
@@ -27,6 +27,7 @@
 #include "util/sort.h"
 #include "util/hist.h"
 #include "util/dso.h"
+#include "util/machine.h"
 #include "util/map.h"
 #include "util/session.h"
 #include "util/tool.h"
@@ -39,6 +40,7 @@
 #include <dlfcn.h>
 #include <errno.h>
 #include <linux/bitmap.h>
+#include <linux/err.h>
 
 struct perf_annotate {
        struct perf_tool tool;
@@ -583,8 +585,8 @@ int cmd_annotate(int argc, const char **argv)
        data.path = input_name;
 
        annotate.session = perf_session__new(&data, false, &annotate.tool);
-       if (annotate.session == NULL)
-               return -1;
+       if (IS_ERR(annotate.session))
+               return PTR_ERR(annotate.session);
 
        annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
                                                      HEADER_BRANCH_STACK);
index 1a69eb5..39efa51 100644 (file)
@@ -28,6 +28,7 @@
 #include "util/util.h"
 #include "util/probe-file.h"
 #include <linux/string.h>
+#include <linux/err.h>
 
 static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
 {
@@ -422,8 +423,8 @@ int cmd_buildid_cache(int argc, const char **argv)
                data.force = force;
 
                session = perf_session__new(&data, false, NULL);
-               if (session == NULL)
-                       return -1;
+               if (IS_ERR(session))
+                       return PTR_ERR(session);
        }
 
        if (symbol__init(session ? &session->header.env : NULL) < 0)
index 5a0d8b3..e3ef755 100644 (file)
@@ -18,6 +18,7 @@
 #include "util/symbol.h"
 #include "util/data.h"
 #include <errno.h>
+#include <linux/err.h>
 
 static int sysfs__fprintf_build_id(FILE *fp)
 {
@@ -65,8 +66,8 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
                goto out;
 
        session = perf_session__new(&data, false, &build_id__mark_dso_hit_ops);
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        /*
         * We take all buildids when the file contains AUX area tracing data
index b09b12e..e69f449 100644 (file)
@@ -13,6 +13,7 @@
 #include <errno.h>
 #include <inttypes.h>
 #include <linux/compiler.h>
+#include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/stringify.h>
 #include <linux/zalloc.h>
@@ -20,6 +21,7 @@
 #include <sys/param.h>
 #include "debug.h"
 #include "builtin.h"
+#include <perf/cpumap.h>
 #include <subcmd/pager.h>
 #include <subcmd/parse-options.h>
 #include "map_symbol.h"
@@ -2633,6 +2635,7 @@ static int build_cl_output(char *cl_sort, bool no_source)
        bool add_sym   = false;
        bool add_dso   = false;
        bool add_src   = false;
+       int ret = 0;
 
        if (!buf)
                return -ENOMEM;
@@ -2651,7 +2654,8 @@ static int build_cl_output(char *cl_sort, bool no_source)
                        add_dso = true;
                } else if (strcmp(tok, "offset")) {
                        pr_err("unrecognized sort token: %s\n", tok);
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto err;
                }
        }
 
@@ -2674,13 +2678,15 @@ static int build_cl_output(char *cl_sort, bool no_source)
                add_sym ? "symbol," : "",
                add_dso ? "dso," : "",
                add_src ? "cl_srcline," : "",
-               "node") < 0)
-               return -ENOMEM;
+               "node") < 0) {
+               ret = -ENOMEM;
+               goto err;
+       }
 
        c2c.show_src = add_src;
-
+err:
        free(buf);
-       return 0;
+       return ret;
 }
 
 static int setup_coalesce(const char *coalesce, bool no_source)
@@ -2780,8 +2786,9 @@ static int perf_c2c__report(int argc, const char **argv)
        }
 
        session = perf_session__new(&data, 0, &c2c.tool);
-       if (session == NULL) {
-               pr_debug("No memory for session\n");
+       if (IS_ERR(session)) {
+               err = PTR_ERR(session);
+               pr_debug("Error creating perf session\n");
                goto out;
        }
 
index 42d8157..2603015 100644 (file)
@@ -9,7 +9,6 @@
 
 #include "util/cache.h"
 #include <subcmd/parse-options.h>
-#include "util/util.h"
 #include "util/debug.h"
 #include "util/config.h"
 #include <linux/string.h>
index 827e480..c37a786 100644 (file)
@@ -23,6 +23,7 @@
 #include "util/time-utils.h"
 #include "util/annotate.h"
 #include "util/map.h"
+#include <linux/err.h>
 #include <linux/zalloc.h>
 #include <subcmd/pager.h>
 #include <subcmd/parse-options.h>
@@ -1153,9 +1154,9 @@ static int check_file_brstack(void)
 
        data__for_each_file(i, d) {
                d->session = perf_session__new(&d->data, false, &pdiff.tool);
-               if (!d->session) {
+               if (IS_ERR(d->session)) {
                        pr_err("Failed to open %s\n", d->data.path);
-                       return -1;
+                       return PTR_ERR(d->session);
                }
 
                has_br_stack = perf_header__has_feat(&d->session->header,
@@ -1185,9 +1186,9 @@ static int __cmd_diff(void)
 
        data__for_each_file(i, d) {
                d->session = perf_session__new(&d->data, false, &pdiff.tool);
-               if (!d->session) {
+               if (IS_ERR(d->session)) {
+                       ret = PTR_ERR(d->session);
                        pr_err("Failed to open %s\n", d->data.path);
-                       ret = -1;
                        goto out_delete;
                }
 
index 238fa38..4405019 100644 (file)
@@ -5,18 +5,18 @@
  */
 #include "builtin.h"
 
-#include "util/util.h"
-
 #include <linux/list.h>
 
 #include "perf.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include "util/evsel_fprintf.h"
 #include "util/parse-events.h"
 #include <subcmd/parse-options.h>
 #include "util/session.h"
 #include "util/data.h"
 #include "util/debug.h"
+#include <linux/err.h>
 
 static int __cmd_evlist(const char *file_name, struct perf_attr_details *details)
 {
@@ -30,8 +30,8 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
        bool has_tracepoint = false;
 
        session = perf_session__new(&data, 0, NULL);
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        evlist__for_each_entry(session->evlist, pos) {
                perf_evsel__fprintf(pos, details, stdout);
index c14f40b..372ecb3 100644 (file)
@@ -21,7 +21,9 @@
 #include "util/auxtrace.h"
 #include "util/jit.h"
 #include "util/symbol.h"
+#include "util/synthetic-events.h"
 #include "util/thread.h"
+#include <linux/err.h>
 
 #include <subcmd/parse-options.h>
 
@@ -834,8 +836,8 @@ int cmd_inject(int argc, const char **argv)
 
        data.path = inject.input_name;
        inject.session = perf_session__new(&data, true, &inject.tool);
-       if (inject.session == NULL)
-               return -1;
+       if (IS_ERR(inject.session))
+               return PTR_ERR(inject.session);
 
        if (zstd_init(&(inject.session->zstd_data), 0) < 0)
                pr_warning("Decompression initialization failed.\n");
index b5682be..9661671 100644 (file)
@@ -14,6 +14,7 @@
 #include "util/tool.h"
 #include "util/callchain.h"
 #include "util/time-utils.h"
+#include <linux/err.h>
 
 #include <subcmd/pager.h>
 #include <subcmd/parse-options.h>
@@ -690,6 +691,7 @@ static char *compact_gfp_flags(char *gfp_flags)
                        new = realloc(new_flags, len + strlen(cpt) + 2);
                        if (new == NULL) {
                                free(new_flags);
+                               free(orig_flags);
                                return NULL;
                        }
 
@@ -1956,8 +1958,8 @@ int cmd_kmem(int argc, const char **argv)
        data.path = input_name;
 
        kmem_session = session = perf_session__new(&data, false, &perf_kmem);
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        ret = -1;
 
index 0a4fcbe..58a9e09 100644 (file)
@@ -5,6 +5,7 @@
 #include "util/build-id.h"
 #include "util/evsel.h"
 #include "util/evlist.h"
+#include "util/mmap.h"
 #include "util/term.h"
 #include "util/symbol.h"
 #include "util/thread.h"
 #include "util/debug.h"
 #include "util/tool.h"
 #include "util/stat.h"
+#include "util/synthetic-events.h"
 #include "util/top.h"
 #include "util/data.h"
 #include "util/ordered-events.h"
+#include "util/kvm-stat.h"
 #include "ui/ui.h"
 
 #include <sys/prctl.h>
@@ -31,6 +34,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 
+#include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/time64.h>
@@ -58,7 +62,6 @@ static const char *get_filename_for_perf_kvm(void)
 }
 
 #ifdef HAVE_KVM_STAT_SUPPORT
-#include "util/kvm-stat.h"
 
 void exit_event_get_key(struct evsel *evsel,
                        struct perf_sample *sample,
@@ -702,14 +705,15 @@ static int process_sample_event(struct perf_tool *tool,
 
 static int cpu_isa_config(struct perf_kvm_stat *kvm)
 {
-       char buf[64], *cpuid;
+       char buf[128], *cpuid;
        int err;
 
        if (kvm->live) {
                err = get_cpuid(buf, sizeof(buf));
                if (err != 0) {
-                       pr_err("Failed to look up CPU type\n");
-                       return err;
+                       pr_err("Failed to look up CPU type: %s\n",
+                              str_error_r(err, buf, sizeof(buf)));
+                       return -err;
                }
                cpuid = buf;
        } else
@@ -748,7 +752,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
 {
        struct evlist *evlist = kvm->evlist;
        union perf_event *event;
-       struct perf_mmap *md;
+       struct mmap *md;
        u64 timestamp;
        s64 n = 0;
        int err;
@@ -799,7 +803,7 @@ static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm)
        s64 n, ntotal = 0;
        u64 flush_time = ULLONG_MAX, mmap_time;
 
-       for (i = 0; i < kvm->evlist->nr_mmaps; i++) {
+       for (i = 0; i < kvm->evlist->core.nr_mmaps; i++) {
                n = perf_kvm__mmap_read_idx(kvm, i, &mmap_time);
                if (n < 0)
                        return -1;
@@ -964,10 +968,10 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm)
                goto out;
        }
 
-       if (perf_evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0)
+       if (evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0)
                goto out;
 
-       nr_stdin = perf_evlist__add_pollfd(kvm->evlist, fileno(stdin));
+       nr_stdin = evlist__add_pollfd(kvm->evlist, fileno(stdin));
        if (nr_stdin < 0)
                goto out;
 
@@ -978,7 +982,7 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm)
        evlist__enable(kvm->evlist);
 
        while (!done) {
-               struct fdarray *fda = &kvm->evlist->pollfd;
+               struct fdarray *fda = &kvm->evlist->core.pollfd;
                int rc;
 
                rc = perf_kvm__mmap_read(kvm);
@@ -1058,7 +1062,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
                goto out;
        }
 
-       if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) {
+       if (evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) {
                ui__error("Failed to mmap the events: %s\n",
                          str_error_r(errno, sbuf, sizeof(sbuf)));
                evlist__close(evlist);
@@ -1090,9 +1094,9 @@ static int read_events(struct perf_kvm_stat *kvm)
 
        kvm->tool = eops;
        kvm->session = perf_session__new(&file, false, &kvm->tool);
-       if (!kvm->session) {
+       if (IS_ERR(kvm->session)) {
                pr_err("Initializing perf session failed\n");
-               return -1;
+               return PTR_ERR(kvm->session);
        }
 
        symbol__init(&kvm->session->header.env);
@@ -1445,8 +1449,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
         * perf session
         */
        kvm->session = perf_session__new(&data, false, &kvm->tool);
-       if (kvm->session == NULL) {
-               err = -1;
+       if (IS_ERR(kvm->session)) {
+               err = PTR_ERR(kvm->session);
                goto out;
        }
        kvm->session->evlist = kvm->evlist;
@@ -1513,11 +1517,21 @@ perf_stat:
 }
 #endif /* HAVE_KVM_STAT_SUPPORT */
 
+int __weak kvm_add_default_arch_event(int *argc __maybe_unused,
+                                       const char **argv __maybe_unused)
+{
+       return 0;
+}
+
 static int __cmd_record(const char *file_name, int argc, const char **argv)
 {
-       int rec_argc, i = 0, j;
+       int rec_argc, i = 0, j, ret;
        const char **rec_argv;
 
+       ret = kvm_add_default_arch_event(&argc, argv);
+       if (ret)
+               return -EINVAL;
+
        rec_argc = argc + 2;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
        rec_argv[i++] = strdup("record");
index e290f6b..08e62ae 100644 (file)
@@ -81,9 +81,9 @@ int cmd_list(int argc, const char **argv)
                                                long_desc_flag, details_flag);
                else if (strcmp(argv[i], "sdt") == 0)
                        print_sdt_events(NULL, NULL, raw_dump);
-               else if (strcmp(argv[i], "metric") == 0)
+               else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0)
                        metricgroup__print(true, false, NULL, raw_dump, details_flag);
-               else if (strcmp(argv[i], "metricgroup") == 0)
+               else if (strcmp(argv[i], "metricgroup") == 0 || strcmp(argv[i], "metricgroups") == 0)
                        metricgroup__print(false, true, NULL, raw_dump, details_flag);
                else if ((sep = strchr(argv[i], ':')) != NULL) {
                        int sep_idx;
index 4c2b7f4..474dfd5 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/hash.h>
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
+#include <linux/err.h>
 
 static struct perf_session *session;
 
@@ -872,9 +873,9 @@ static int __cmd_report(bool display_info)
        };
 
        session = perf_session__new(&data, false, &eops);
-       if (!session) {
+       if (IS_ERR(session)) {
                pr_err("Initializing perf session failed\n");
-               return -1;
+               return PTR_ERR(session);
        }
 
        symbol__init(&session->header.env);
index 27d2bde..a13f581 100644 (file)
@@ -17,6 +17,7 @@
 #include "util/dso.h"
 #include "util/map.h"
 #include "util/symbol.h"
+#include <linux/err.h>
 
 #define MEM_OPERATION_LOAD     0x1
 #define MEM_OPERATION_STORE    0x2
@@ -249,8 +250,8 @@ static int report_raw_events(struct perf_mem *mem)
        struct perf_session *session = perf_session__new(&data, false,
                                                         &mem->tool);
 
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        if (mem->cpu_list) {
                ret = perf_session__cpu_bitmap(session, mem->cpu_list,
index 1447004..2333286 100644 (file)
@@ -20,6 +20,7 @@
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include "util/debug.h"
+#include "util/mmap.h"
 #include "util/target.h"
 #include "util/session.h"
 #include "util/tool.h"
@@ -38,6 +39,7 @@
 #include "util/trigger.h"
 #include "util/perf-hooks.h"
 #include "util/cpu-set-sched.h"
+#include "util/synthetic-events.h"
 #include "util/time-utils.h"
 #include "util/units.h"
 #include "util/bpf-event.h"
@@ -53,6 +55,7 @@
 #include <signal.h>
 #include <sys/mman.h>
 #include <sys/wait.h>
+#include <linux/err.h>
 #include <linux/string.h>
 #include <linux/time64.h>
 #include <linux/zalloc.h>
@@ -117,7 +120,7 @@ static bool switch_output_time(struct record *rec)
               trigger_is_ready(&switch_output_trigger);
 }
 
-static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
+static int record__write(struct record *rec, struct mmap *map __maybe_unused,
                         void *bf, size_t size)
 {
        struct perf_data_file *file = &rec->session->data->file;
@@ -166,7 +169,7 @@ static int record__aio_write(struct aiocb *cblock, int trace_fd,
        return rc;
 }
 
-static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
+static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
 {
        void *rem_buf;
        off_t rem_off;
@@ -212,7 +215,7 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
        return rc;
 }
 
-static int record__aio_sync(struct perf_mmap *md, bool sync_all)
+static int record__aio_sync(struct mmap *md, bool sync_all)
 {
        struct aiocb **aiocb = md->aio.aiocb;
        struct aiocb *cblocks = md->aio.cblocks;
@@ -253,12 +256,12 @@ struct record_aio {
        size_t          size;
 };
 
-static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size)
+static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
 {
        struct record_aio *aio = to;
 
        /*
-        * map->base data pointed by buf is copied into free map->aio.data[] buffer
+        * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
         * to release space in the kernel buffer as fast as possible, calling
         * perf_mmap__consume() from perf_mmap__push() function.
         *
@@ -298,7 +301,7 @@ static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t
        return size;
 }
 
-static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off)
+static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
 {
        int ret, idx;
        int trace_fd = rec->session->data->file.fd;
@@ -349,15 +352,15 @@ static void record__aio_mmap_read_sync(struct record *rec)
 {
        int i;
        struct evlist *evlist = rec->evlist;
-       struct perf_mmap *maps = evlist->mmap;
+       struct mmap *maps = evlist->mmap;
 
        if (!record__aio_enabled(rec))
                return;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
-               struct perf_mmap *map = &maps[i];
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
+               struct mmap *map = &maps[i];
 
-               if (map->base)
+               if (map->core.base)
                        record__aio_sync(map, true);
        }
 }
@@ -385,7 +388,7 @@ static int record__aio_parse(const struct option *opt,
 #else /* HAVE_AIO_SUPPORT */
 static int nr_cblocks_max = 0;
 
-static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused,
+static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
                            off_t *off __maybe_unused)
 {
        return -1;
@@ -437,7 +440,7 @@ static int record__mmap_flush_parse(const struct option *opt,
        if (!opts->mmap_flush)
                opts->mmap_flush = MMAP_FLUSH_DEFAULT;
 
-       flush_max = perf_evlist__mmap_size(opts->mmap_pages);
+       flush_max = evlist__mmap_size(opts->mmap_pages);
        flush_max /= 4;
        if (opts->mmap_flush > flush_max)
                opts->mmap_flush = flush_max;
@@ -480,7 +483,7 @@ static int process_synthesized_event(struct perf_tool *tool,
        return record__write(rec, NULL, event, event->header.size);
 }
 
-static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
+static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
 {
        struct record *rec = to;
 
@@ -525,7 +528,7 @@ static void record__sig_exit(void)
 #ifdef HAVE_AUXTRACE_SUPPORT
 
 static int record__process_auxtrace(struct perf_tool *tool,
-                                   struct perf_mmap *map,
+                                   struct mmap *map,
                                    union perf_event *event, void *data1,
                                    size_t len1, void *data2, size_t len2)
 {
@@ -563,7 +566,7 @@ static int record__process_auxtrace(struct perf_tool *tool,
 }
 
 static int record__auxtrace_mmap_read(struct record *rec,
-                                     struct perf_mmap *map)
+                                     struct mmap *map)
 {
        int ret;
 
@@ -579,7 +582,7 @@ static int record__auxtrace_mmap_read(struct record *rec,
 }
 
 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
-                                              struct perf_mmap *map)
+                                              struct mmap *map)
 {
        int ret;
 
@@ -600,8 +603,8 @@ static int record__auxtrace_read_snapshot_all(struct record *rec)
        int i;
        int rc = 0;
 
-       for (i = 0; i < rec->evlist->nr_mmaps; i++) {
-               struct perf_mmap *map = &rec->evlist->mmap[i];
+       for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
+               struct mmap *map = &rec->evlist->mmap[i];
 
                if (!map->auxtrace_mmap.base)
                        continue;
@@ -666,7 +669,7 @@ static int record__auxtrace_init(struct record *rec)
 
 static inline
 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
-                              struct perf_mmap *map __maybe_unused)
+                              struct mmap *map __maybe_unused)
 {
        return 0;
 }
@@ -705,7 +708,7 @@ static int record__mmap_evlist(struct record *rec,
        if (opts->affinity != PERF_AFFINITY_SYS)
                cpu__setup_cpunode_map();
 
-       if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
+       if (evlist__mmap_ex(evlist, opts->mmap_pages,
                                 opts->auxtrace_mmap_pages,
                                 opts->auxtrace_snapshot_mode,
                                 opts->nr_cblocks, opts->affinity,
@@ -753,9 +756,9 @@ static int record__open(struct record *rec)
                if (perf_evlist__add_dummy(evlist))
                        return -ENOMEM;
 
-               pos = perf_evlist__first(evlist);
+               pos = evlist__first(evlist);
                pos->tracking = 0;
-               pos = perf_evlist__last(evlist);
+               pos = evlist__last(evlist);
                pos->tracking = 1;
                pos->core.attr.enable_on_exec = 1;
        }
@@ -786,6 +789,17 @@ try_again:
                pos->supported = true;
        }
 
+       if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
+               pr_warning(
+"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
+"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
+"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
+"file is not found in the buildid cache or in the vmlinux path.\n\n"
+"Samples in kernel modules won't be resolved at all.\n\n"
+"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
+"even with a suitable vmlinux or kallsyms file.\n\n");
+       }
+
        if (perf_evlist__apply_filters(evlist, &pos)) {
                pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
                        pos->filter, perf_evsel__name(pos), errno,
@@ -888,7 +902,7 @@ static struct perf_event_header finished_round_event = {
        .type = PERF_RECORD_FINISHED_ROUND,
 };
 
-static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
+static void record__adjust_affinity(struct record *rec, struct mmap *map)
 {
        if (rec->opts.affinity != PERF_AFFINITY_SYS &&
            !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
@@ -935,7 +949,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
        u64 bytes_written = rec->bytes_written;
        int i;
        int rc = 0;
-       struct perf_mmap *maps;
+       struct mmap *maps;
        int trace_fd = rec->data.file.fd;
        off_t off = 0;
 
@@ -952,20 +966,20 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
        if (record__aio_enabled(rec))
                off = record__aio_get_pos(trace_fd);
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
                u64 flush = 0;
-               struct perf_mmap *map = &maps[i];
+               struct mmap *map = &maps[i];
 
-               if (map->base) {
+               if (map->core.base) {
                        record__adjust_affinity(rec, map);
                        if (synch) {
-                               flush = map->flush;
-                               map->flush = 1;
+                               flush = map->core.flush;
+                               map->core.flush = 1;
                        }
                        if (!record__aio_enabled(rec)) {
                                if (perf_mmap__push(map, rec, record__pushfn) < 0) {
                                        if (synch)
-                                               map->flush = flush;
+                                               map->core.flush = flush;
                                        rc = -1;
                                        goto out;
                                }
@@ -973,13 +987,13 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
                                if (record__aio_push(rec, map, &off) < 0) {
                                        record__aio_set_pos(trace_fd, off);
                                        if (synch)
-                                               map->flush = flush;
+                                               map->core.flush = flush;
                                        rc = -1;
                                        goto out;
                                }
                        }
                        if (synch)
-                               map->flush = flush;
+                               map->core.flush = flush;
                }
 
                if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
@@ -1180,23 +1194,14 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
 static void snapshot_sig_handler(int sig);
 static void alarm_sig_handler(int sig);
 
-int __weak
-perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
-                           struct perf_tool *tool __maybe_unused,
-                           perf_event__handler_t process __maybe_unused,
-                           struct machine *machine __maybe_unused)
-{
-       return 0;
-}
-
 static const struct perf_event_mmap_page *
 perf_evlist__pick_pc(struct evlist *evlist)
 {
        if (evlist) {
-               if (evlist->mmap && evlist->mmap[0].base)
-                       return evlist->mmap[0].base;
-               if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
-                       return evlist->overwrite_mmap[0].base;
+               if (evlist->mmap && evlist->mmap[0].core.base)
+                       return evlist->mmap[0].core.base;
+               if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
+                       return evlist->overwrite_mmap[0].core.base;
        }
        return NULL;
 }
@@ -1362,9 +1367,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
        }
 
        session = perf_session__new(data, false, tool);
-       if (session == NULL) {
+       if (IS_ERR(session)) {
                pr_err("Perf session creation failed.\n");
-               return -1;
+               return PTR_ERR(session);
        }
 
        fd = perf_data__fd(data);
@@ -1407,7 +1412,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                err = -1;
                goto out_child;
        }
-       session->header.env.comp_mmap_len = session->evlist->mmap_len;
+       session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
 
        err = bpf__apply_obj_config();
        if (err) {
@@ -1610,7 +1615,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                if (hits == rec->samples) {
                        if (done || draining)
                                break;
-                       err = perf_evlist__poll(rec->evlist, -1);
+                       err = evlist__poll(rec->evlist, -1);
                        /*
                         * Propagate error, only if there's any. Ignore positive
                         * number of returned events and interrupt error.
@@ -1619,7 +1624,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                                err = 0;
                        waking++;
 
-                       if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
+                       if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
                                draining = true;
                }
 
@@ -1976,7 +1981,7 @@ out_free:
 
 static void switch_output_size_warn(struct record *rec)
 {
-       u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
+       u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
        struct switch_output *s = &rec->switch_output;
 
        wakeup_size /= 2;
@@ -2371,16 +2376,6 @@ int cmd_record(int argc, const char **argv)
 
        err = -ENOMEM;
 
-       if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
-               pr_warning(
-"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
-"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
-"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
-"file is not found in the buildid cache or in the vmlinux path.\n\n"
-"Samples in kernel modules won't be resolved at all.\n\n"
-"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
-"even with a suitable vmlinux or kallsyms file.\n\n");
-
        if (rec->no_buildid_cache || rec->no_buildid) {
                disable_buildid_cache();
        } else if (rec->switch_output.enabled) {
index b18fab9..aae0e57 100644 (file)
@@ -48,7 +48,7 @@
 #include "util/auxtrace.h"
 #include "util/units.h"
 #include "util/branch.h"
-#include "util/util.h"
+#include "util/util.h" // perf_tip()
 #include "ui/ui.h"
 #include "ui/progress.h"
 
@@ -1269,8 +1269,8 @@ int cmd_report(int argc, const char **argv)
 
 repeat:
        session = perf_session__new(&data, false, &report.tool);
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        ret = evswitch__init(&report.evswitch, session->evlist, stderr);
        if (ret)
index ec96d64..5cacc4f 100644 (file)
@@ -3,8 +3,10 @@
 #include "perf.h"
 #include "perf-sys.h"
 
+#include "util/cpumap.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include "util/evsel_fprintf.h"
 #include "util/symbol.h"
 #include "util/thread.h"
 #include "util/header.h"
@@ -23,6 +25,7 @@
 #include "util/trace-event.h"
 
 #include "util/debug.h"
+#include "util/event.h"
 
 #include <linux/kernel.h>
 #include <linux/log2.h>
@@ -36,7 +39,9 @@
 #include <pthread.h>
 #include <math.h>
 #include <api/fs/fs.h>
+#include <perf/cpumap.h>
 #include <linux/time64.h>
+#include <linux/err.h>
 
 #include <linux/ctype.h>
 
@@ -1794,9 +1799,9 @@ static int perf_sched__read_events(struct perf_sched *sched)
        int rc = -1;
 
        session = perf_session__new(&data, false, &sched->tool);
-       if (session == NULL) {
-               pr_debug("No Memory for session\n");
-               return -1;
+       if (IS_ERR(session)) {
+               pr_debug("Error creating perf session");
+               return PTR_ERR(session);
        }
 
        symbol__init(&session->header.env);
@@ -2051,7 +2056,7 @@ static void timehist_print_sample(struct perf_sched *sched,
                            EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE |
                            EVSEL__PRINT_CALLCHAIN_ARROW |
                            EVSEL__PRINT_SKIP_IGNORED,
-                           &callchain_cursor, stdout);
+                           &callchain_cursor, symbol_conf.bt_stop_list,  stdout);
 
 out:
        printf("\n");
@@ -2986,8 +2991,8 @@ static int perf_sched__timehist(struct perf_sched *sched)
        symbol_conf.use_callchain = sched->show_callchain;
 
        session = perf_session__new(&data, false, &sched->tool);
-       if (session == NULL)
-               return -ENOMEM;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        evlist = session->evlist;
 
index e079b34..67be8d3 100644 (file)
@@ -17,6 +17,7 @@
 #include "util/trace-event.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include "util/evsel_fprintf.h"
 #include "util/evswitch.h"
 #include "util/sort.h"
 #include "util/data.h"
@@ -52,6 +53,7 @@
 #include <unistd.h>
 #include <subcmd/pager.h>
 #include <perf/evlist.h>
+#include <linux/err.h>
 #include "util/record.h"
 #include "util/util.h"
 #include "perf.h"
@@ -1061,7 +1063,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
                        continue;
 
                insn = 0;
-               for (off = 0;; off += ilen) {
+               for (off = 0; off < (unsigned)len; off += ilen) {
                        uint64_t ip = start + off;
 
                        printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
@@ -1072,6 +1074,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
                                        printed += print_srccode(thread, x.cpumode, ip);
                                break;
                        } else {
+                               ilen = 0;
                                printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
                                                   dump_insn(&x, ip, buffer + off, len - off, &ilen));
                                if (ilen == 0)
@@ -1081,6 +1084,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
                                insn++;
                        }
                }
+               if (off != (unsigned)len)
+                       printed += fprintf(fp, "\tmismatch of LBR data and executable\n");
        }
 
        /*
@@ -1121,6 +1126,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
                goto out;
        }
        for (off = 0; off <= end - start; off += ilen) {
+               ilen = 0;
                printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", start + off,
                                   dump_insn(&x, start + off, buffer + off, len - off, &ilen));
                if (ilen == 0)
@@ -1324,7 +1330,8 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
                } else
                        printed += fprintf(fp, "\n");
 
-               printed += sample__fprintf_sym(sample, al, 0, print_opts, cursor, fp);
+               printed += sample__fprintf_sym(sample, al, 0, print_opts, cursor,
+                                              symbol_conf.bt_stop_list, fp);
        }
 
        /* print branch_to information */
@@ -1866,7 +1873,8 @@ static void process_event(struct perf_script *script,
                        cursor = &callchain_cursor;
 
                fputc(cursor ? '\n' : ' ', fp);
-               sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, fp);
+               sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor,
+                                   symbol_conf.bt_stop_list, fp);
        }
 
        if (PRINT_FIELD(IREGS))
@@ -1915,7 +1923,7 @@ static void __process_stat(struct evsel *counter, u64 tstamp)
        int cpu, thread;
        static int header_printed;
 
-       if (counter->system_wide)
+       if (counter->core.system_wide)
                nthreads = 1;
 
        if (!header_printed) {
@@ -2042,7 +2050,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
                return err;
 
        evlist = *pevlist;
-       evsel = perf_evlist__last(*pevlist);
+       evsel = evlist__last(*pevlist);
 
        if (!evsel->priv) {
                if (scr->per_event_dump) {
@@ -3083,8 +3091,8 @@ int find_scripts(char **scripts_array, char **scripts_path_array, int num,
        int i = 0;
 
        session = perf_session__new(&data, false, NULL);
-       if (!session)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
 
@@ -3754,8 +3762,8 @@ int cmd_script(int argc, const char **argv)
        }
 
        session = perf_session__new(&data, false, &script.tool);
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        if (header || header_only) {
                script.tool.show_feat_hdr = SHOW_FEAT_HEADER;
index 7e17bf9..468fc49 100644 (file)
@@ -61,6 +61,7 @@
 #include "util/tool.h"
 #include "util/string2.h"
 #include "util/metricgroup.h"
+#include "util/synthetic-events.h"
 #include "util/target.h"
 #include "util/time-utils.h"
 #include "util/top.h"
@@ -82,6 +83,7 @@
 #include <unistd.h>
 #include <sys/time.h>
 #include <sys/resource.h>
+#include <linux/err.h>
 
 #include <linux/ctype.h>
 #include <perf/evlist.h>
@@ -233,7 +235,7 @@ static int write_stat_round_event(u64 tm, u64 type)
 #define WRITE_STAT_ROUND_EVENT(time, interval) \
        write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
 
-#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
 
 static int
 perf_evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
@@ -276,7 +278,7 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
        if (!counter->supported)
                return -ENOENT;
 
-       if (counter->system_wide)
+       if (counter->core.system_wide)
                nthreads = 1;
 
        for (thread = 0; thread < nthreads; thread++) {
@@ -540,8 +542,8 @@ try_again:
                if (err < 0)
                        return err;
 
-               err = perf_stat_synthesize_config(&stat_config, NULL, evsel_list,
-                                                 process_synthesized_event, is_pipe);
+               err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list,
+                                                        process_synthesized_event, is_pipe);
                if (err < 0)
                        return err;
        }
@@ -822,18 +824,6 @@ static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
        return cpu_map__get_core(map, cpu, NULL);
 }
 
-static int cpu_map__get_max(struct perf_cpu_map *map)
-{
-       int i, max = -1;
-
-       for (i = 0; i < map->nr; i++) {
-               if (map->map[i] > max)
-                       max = map->map[i];
-       }
-
-       return max;
-}
-
 static int perf_stat__get_aggr(struct perf_stat_config *config,
                               aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
 {
@@ -928,7 +918,7 @@ static int perf_stat_init_aggr_mode(void)
         * taking the highest cpu number to be the size of
         * the aggregation translate cpumap.
         */
-       nr = cpu_map__get_max(evsel_list->core.cpus);
+       nr = perf_cpu_map__max(evsel_list->core.cpus);
        stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1);
        return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
 }
@@ -1447,9 +1437,9 @@ static int __cmd_record(int argc, const char **argv)
        }
 
        session = perf_session__new(data, false, NULL);
-       if (session == NULL) {
-               pr_err("Perf session creation failed.\n");
-               return -1;
+       if (IS_ERR(session)) {
+               pr_err("Perf session creation failed\n");
+               return PTR_ERR(session);
        }
 
        init_features(session);
@@ -1646,8 +1636,8 @@ static int __cmd_report(int argc, const char **argv)
        perf_stat.data.mode = PERF_DATA_MODE_READ;
 
        session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        perf_stat.session  = session;
        stat_config.output = stderr;
@@ -1681,7 +1671,7 @@ static void setup_system_wide(int forks)
                struct evsel *counter;
 
                evlist__for_each_entry(evsel_list, counter) {
-                       if (!counter->system_wide)
+                       if (!counter->core.system_wide)
                                return;
                }
 
@@ -1963,8 +1953,11 @@ int cmd_stat(int argc, const char **argv)
                        fprintf(output, "[ perf stat: executing run #%d ... ]\n",
                                run_idx + 1);
 
+               if (run_idx != 0)
+                       perf_evlist__reset_prev_raw_counts(evsel_list);
+
                status = run_perf_stat(argc, argv, run_idx);
-               if (forever && status != -1) {
+               if (forever && status != -1 && !interval) {
                        print_counters(NULL, argc, argv);
                        perf_stat__reset_stats();
                }
index e0e8226..9e84fae 100644 (file)
@@ -35,6 +35,7 @@
 #include "util/tool.h"
 #include "util/data.h"
 #include "util/debug.h"
+#include <linux/err.h>
 
 #ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE
 FILE *open_memstream(char **ptr, size_t *sizeloc);
@@ -1601,8 +1602,8 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name)
                                                         &tchart->tool);
        int ret = -EINVAL;
 
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        symbol__init(&session->header.env);
 
index 726e3f2..1f60124 100644 (file)
 #include "util/dso.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include "util/evsel_config.h"
 #include "util/event.h"
 #include "util/machine.h"
 #include "util/map.h"
+#include "util/mmap.h"
 #include "util/session.h"
 #include "util/symbol.h"
+#include "util/synthetic-events.h"
 #include "util/top.h"
 #include "util/util.h"
 #include <linux/rbtree.h>
@@ -76,6 +79,7 @@
 #include <linux/stringify.h>
 #include <linux/time64.h>
 #include <linux/types.h>
+#include <linux/err.h>
 
 #include <linux/ctype.h>
 
@@ -528,7 +532,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
                                prompt_integer(&counter, "Enter details event counter");
 
                                if (counter >= top->evlist->core.nr_entries) {
-                                       top->sym_evsel = perf_evlist__first(top->evlist);
+                                       top->sym_evsel = evlist__first(top->evlist);
                                        fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
                                        sleep(1);
                                        break;
@@ -537,7 +541,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
                                        if (top->sym_evsel->idx == counter)
                                                break;
                        } else
-                               top->sym_evsel = perf_evlist__first(top->evlist);
+                               top->sym_evsel = evlist__first(top->evlist);
                        break;
                case 'f':
                        prompt_integer(&top->count_filter, "Enter display event count filter");
@@ -861,7 +865,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 {
        struct record_opts *opts = &top->record_opts;
        struct evlist *evlist = top->evlist;
-       struct perf_mmap *md;
+       struct mmap *md;
        union perf_event *event;
 
        md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
@@ -901,7 +905,7 @@ static void perf_top__mmap_read(struct perf_top *top)
        if (overwrite)
                perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
 
-       for (i = 0; i < top->evlist->nr_mmaps; i++)
+       for (i = 0; i < top->evlist->core.nr_mmaps; i++)
                perf_top__mmap_read_idx(top, i);
 
        if (overwrite) {
@@ -959,7 +963,7 @@ static int perf_top__overwrite_check(struct perf_top *top)
                /* has term for current event */
                if ((overwrite < 0) && (set >= 0)) {
                        /* if it's first event, set overwrite */
-                       if (evsel == perf_evlist__first(evlist))
+                       if (evsel == evlist__first(evlist))
                                overwrite = set;
                        else
                                return -1;
@@ -983,7 +987,7 @@ static int perf_top_overwrite_fallback(struct perf_top *top,
                return 0;
 
        /* only fall back when first event fails */
-       if (evsel != perf_evlist__first(evlist))
+       if (evsel != evlist__first(evlist))
                return 0;
 
        evlist__for_each_entry(evlist, counter)
@@ -1040,7 +1044,7 @@ try_again:
                }
        }
 
-       if (perf_evlist__mmap(evlist, opts->mmap_pages) < 0) {
+       if (evlist__mmap(evlist, opts->mmap_pages) < 0) {
                ui__error("Failed to mmap with %d (%s)\n",
                            errno, str_error_r(errno, msg, sizeof(msg)));
                goto out_err;
@@ -1304,7 +1308,7 @@ static int __cmd_top(struct perf_top *top)
        }
 
        /* Wait for a minimal set of events before starting the snapshot */
-       perf_evlist__poll(top->evlist, 100);
+       evlist__poll(top->evlist, 100);
 
        perf_top__mmap_read(top);
 
@@ -1314,7 +1318,7 @@ static int __cmd_top(struct perf_top *top)
                perf_top__mmap_read(top);
 
                if (opts->overwrite || (hits == top->samples))
-                       ret = perf_evlist__poll(top->evlist, 100);
+                       ret = evlist__poll(top->evlist, 100);
 
                if (resize) {
                        perf_top__resize(top);
@@ -1641,7 +1645,7 @@ int cmd_top(int argc, const char **argv)
                goto out_delete_evlist;
        }
 
-       top.sym_evsel = perf_evlist__first(top.evlist);
+       top.sym_evsel = evlist__first(top.evlist);
 
        if (!callchain_param.enabled) {
                symbol_conf.cumulate_callchain = false;
@@ -1671,8 +1675,8 @@ int cmd_top(int argc, const char **argv)
        }
 
        top.session = perf_session__new(NULL, false, NULL);
-       if (top.session == NULL) {
-               status = -1;
+       if (IS_ERR(top.session)) {
+               status = PTR_ERR(top.session);
                goto out_delete_evlist;
        }
 
index 0f633f0..bb5130d 100644 (file)
 #include "util/dso.h"
 #include "util/env.h"
 #include "util/event.h"
+#include "util/evsel.h"
+#include "util/evsel_fprintf.h"
+#include "util/synthetic-events.h"
 #include "util/evlist.h"
 #include "util/evswitch.h"
+#include "util/mmap.h"
 #include <subcmd/pager.h>
 #include <subcmd/exec-cmd.h>
 #include "util/machine.h"
@@ -2074,7 +2078,7 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam
                                        EVSEL__PRINT_DSO |
                                        EVSEL__PRINT_UNKNOWN_AS_ADDR;
 
-       return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
+       return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, symbol_conf.bt_stop_list, trace->output);
 }
 
 static const char *errno_to_name(struct evsel *evsel, int err)
@@ -3408,7 +3412,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
        if (trace->dump.map)
                bpf_map__fprintf(trace->dump.map, trace->output);
 
-       err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
+       err = evlist__mmap(evlist, trace->opts.mmap_pages);
        if (err < 0)
                goto out_error_mmap;
 
@@ -3425,7 +3429,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 
        trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
                                  evlist->core.threads->nr > 1 ||
-                                 perf_evlist__first(evlist)->core.attr.inherit;
+                                 evlist__first(evlist)->core.attr.inherit;
 
        /*
         * Now that we already used evsel->core.attr to ask the kernel to setup the
@@ -3441,9 +3445,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 again:
        before = trace->nr_events;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
                union perf_event *event;
-               struct perf_mmap *md;
+               struct mmap *md;
 
                md = &evlist->mmap[i];
                if (perf_mmap__read_init(md) < 0)
@@ -3472,8 +3476,8 @@ again:
        if (trace->nr_events == before) {
                int timeout = done ? 100 : -1;
 
-               if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
-                       if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
+               if (!draining && evlist__poll(evlist, timeout) > 0) {
+                       if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
                                draining = true;
 
                        goto again;
@@ -3584,8 +3588,8 @@ static int trace__replay(struct trace *trace)
        trace->multiple_threads = true;
 
        session = perf_session__new(&data, false, &trace->tool);
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        if (trace->opts.target.pid)
                symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
index e2e0f06..cea13cb 100755 (executable)
@@ -8,6 +8,7 @@ include/uapi/drm/i915_drm.h
 include/uapi/linux/fadvise.h
 include/uapi/linux/fcntl.h
 include/uapi/linux/fs.h
+include/uapi/linux/fscrypt.h
 include/uapi/linux/kcmp.h
 include/uapi/linux/kvm.h
 include/uapi/linux/in.h
index eaeb8cb..202cada 100644 (file)
@@ -1,8 +1,21 @@
 jvmti-y += libjvmti.o
 jvmti-y += jvmti_agent.o
 
+# For strlcpy
+jvmti-y += libstring.o libctype.o
+
 CFLAGS_jvmti         = -fPIC -DPIC -I$(JDIR)/include -I$(JDIR)/include/linux
 CFLAGS_REMOVE_jvmti  = -Wmissing-declarations
 CFLAGS_REMOVE_jvmti += -Wstrict-prototypes
 CFLAGS_REMOVE_jvmti += -Wextra
 CFLAGS_REMOVE_jvmti += -Wwrite-strings
+
+CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+
+$(OUTPUT)jvmti/libstring.o: ../lib/string.c FORCE
+       $(call rule_mkdir)
+       $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)jvmti/libctype.o: ../lib/ctype.c FORCE
+       $(call rule_mkdir)
+       $(call if_changed_dep,cc_o_c)
index a67efb8..85ccb8c 100644 (file)
@@ -59,7 +59,13 @@ else
   CFLAGS := -g -Wall
 endif
 
-INCLUDES = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/ -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
+INCLUDES = \
+-I$(srctree)/tools/perf/lib/include \
+-I$(srctree)/tools/lib/ \
+-I$(srctree)/tools/include \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/ \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \
+-I$(srctree)/tools/include/uapi
 
 # Append required CFLAGS
 override CFLAGS += $(EXTRA_WARNINGS)
@@ -88,13 +94,34 @@ LIBPERF_PC := $(OUTPUT)libperf.pc
 
 LIBPERF_ALL := $(LIBPERF_A) $(OUTPUT)libperf.so*
 
+LIB_DIR := $(srctree)/tools/lib/api/
+
+ifneq ($(OUTPUT),)
+ifneq ($(subdir),)
+  API_PATH=$(OUTPUT)/../lib/api/
+else
+  API_PATH=$(OUTPUT)
+endif
+else
+  API_PATH=$(LIB_DIR)
+endif
+
+LIBAPI = $(API_PATH)libapi.a
+
+$(LIBAPI): FORCE
+       $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
+
+$(LIBAPI)-clean:
+       $(call QUIET_CLEAN, libapi)
+       $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
+
 $(LIBPERF_IN): FORCE
        $(Q)$(MAKE) $(build)=libperf
 
 $(LIBPERF_A): $(LIBPERF_IN)
        $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN)
 
-$(LIBPERF_SO): $(LIBPERF_IN)
+$(LIBPERF_SO): $(LIBPERF_IN) $(LIBAPI)
        $(QUIET_LINK)$(CC) --shared -Wl,-soname,libperf.so \
                                     -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@
        @ln -sf $(@F) $(OUTPUT)libperf.so
@@ -106,12 +133,12 @@ libs: $(LIBPERF_A) $(LIBPERF_SO) $(LIBPERF_PC)
 all: fixdep
        $(Q)$(MAKE) libs
 
-clean:
+clean: $(LIBAPI)-clean
        $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \
                 *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd LIBPERF-CFLAGS $(LIBPERF_PC)
        $(Q)$(MAKE) -C tests clean
 
-tests:
+tests: libs
        $(Q)$(MAKE) -C tests
        $(Q)$(MAKE) -C tests run
 
@@ -146,6 +173,7 @@ install_headers:
                $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \
                $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \
                $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644);
+               $(call do_install,include/perf/event.h,$(prefix)/include/perf,644);
 
 install_pkgconfig: $(LIBPERF_PC)
        $(call QUIET_INSTALL, $(LIBPERF_PC)) \
index 29d5e33..d0b9ae4 100644 (file)
@@ -4,7 +4,9 @@
 
 #include <stdio.h>
 #include <stdarg.h>
+#include <unistd.h>
 #include <perf/core.h>
+#include <internal/lib.h>
 #include "internal.h"
 
 static int __base_pr(enum libperf_print_level level, const char *format,
@@ -15,11 +17,6 @@ static int __base_pr(enum libperf_print_level level, const char *format,
 
 static libperf_print_fn_t __libperf_pr = __base_pr;
 
-void libperf_set_print(libperf_print_fn_t fn)
-{
-       __libperf_pr = fn;
-}
-
 __printf(2, 3)
 void libperf_print(enum libperf_print_level level, const char *format, ...)
 {
@@ -32,3 +29,9 @@ void libperf_print(enum libperf_print_level level, const char *format, ...)
        __libperf_pr(level, format, args);
        va_end(args);
 }
+
+void libperf_init(libperf_print_fn_t fn)
+{
+       page_size = sysconf(_SC_PAGE_SIZE);
+       __libperf_pr = fn;
+}
index 1f0e6f3..2ca1faf 100644 (file)
@@ -260,3 +260,15 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
 
        return -1;
 }
+
+int perf_cpu_map__max(struct perf_cpu_map *map)
+{
+       int i, max = -1;
+
+       for (i = 0; i < map->nr; i++) {
+               if (map->map[i] > max)
+                       max = map->map[i];
+       }
+
+       return max;
+}
index f4dc9a2..d1496fe 100644 (file)
@@ -1,16 +1,30 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <perf/evlist.h>
 #include <perf/evsel.h>
+#include <linux/bitops.h>
 #include <linux/list.h>
+#include <linux/hash.h>
+#include <sys/ioctl.h>
 #include <internal/evlist.h>
 #include <internal/evsel.h>
+#include <internal/xyarray.h>
 #include <linux/zalloc.h>
 #include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <poll.h>
 #include <perf/cpumap.h>
 #include <perf/threadmap.h>
+#include <api/fd/array.h>
 
 void perf_evlist__init(struct perf_evlist *evlist)
 {
+       int i;
+
+       for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
+               INIT_HLIST_HEAD(&evlist->heads[i]);
        INIT_LIST_HEAD(&evlist->entries);
        evlist->nr_entries = 0;
 }
@@ -157,3 +171,113 @@ void perf_evlist__disable(struct perf_evlist *evlist)
        perf_evlist__for_each_entry(evlist, evsel)
                perf_evsel__disable(evsel);
 }
+
+u64 perf_evlist__read_format(struct perf_evlist *evlist)
+{
+       struct perf_evsel *first = perf_evlist__first(evlist);
+
+       return first->attr.read_format;
+}
+
+#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+
+static void perf_evlist__id_hash(struct perf_evlist *evlist,
+                                struct perf_evsel *evsel,
+                                int cpu, int thread, u64 id)
+{
+       int hash;
+       struct perf_sample_id *sid = SID(evsel, cpu, thread);
+
+       sid->id = id;
+       sid->evsel = evsel;
+       hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
+       hlist_add_head(&sid->node, &evlist->heads[hash]);
+}
+
+void perf_evlist__id_add(struct perf_evlist *evlist,
+                        struct perf_evsel *evsel,
+                        int cpu, int thread, u64 id)
+{
+       perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
+       evsel->id[evsel->ids++] = id;
+}
+
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+                          struct perf_evsel *evsel,
+                          int cpu, int thread, int fd)
+{
+       u64 read_data[4] = { 0, };
+       int id_idx = 1; /* The first entry is the counter value */
+       u64 id;
+       int ret;
+
+       ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
+       if (!ret)
+               goto add;
+
+       if (errno != ENOTTY)
+               return -1;
+
+       /* Legacy way to get event id.. All hail to old kernels! */
+
+       /*
+        * This way does not work with group format read, so bail
+        * out in that case.
+        */
+       if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
+               return -1;
+
+       if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
+           read(fd, &read_data, sizeof(read_data)) == -1)
+               return -1;
+
+       if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+               ++id_idx;
+       if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+               ++id_idx;
+
+       id = read_data[id_idx];
+
+add:
+       perf_evlist__id_add(evlist, evsel, cpu, thread, id);
+       return 0;
+}
+
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
+{
+       int nr_cpus = perf_cpu_map__nr(evlist->cpus);
+       int nr_threads = perf_thread_map__nr(evlist->threads);
+       int nfds = 0;
+       struct perf_evsel *evsel;
+
+       perf_evlist__for_each_entry(evlist, evsel) {
+               if (evsel->system_wide)
+                       nfds += nr_cpus;
+               else
+                       nfds += nr_cpus * nr_threads;
+       }
+
+       if (fdarray__available_entries(&evlist->pollfd) < nfds &&
+           fdarray__grow(&evlist->pollfd, nfds) < 0)
+               return -ENOMEM;
+
+       return 0;
+}
+
+int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
+                           void *ptr, short revent)
+{
+       int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
+
+       if (pos >= 0) {
+               evlist->pollfd.priv[pos].ptr = ptr;
+               fcntl(fd, F_SETFL, O_NONBLOCK);
+       }
+
+       return pos;
+}
+
+int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
+{
+       return fdarray__poll(&evlist->pollfd, timeout);
+}
index 24abc80..a8cb582 100644 (file)
@@ -230,3 +230,33 @@ struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel)
 {
        return &evsel->attr;
 }
+
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+       if (ncpus == 0 || nthreads == 0)
+               return 0;
+
+       if (evsel->system_wide)
+               nthreads = 1;
+
+       evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
+       if (evsel->sample_id == NULL)
+               return -ENOMEM;
+
+       evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
+       if (evsel->id == NULL) {
+               xyarray__delete(evsel->sample_id);
+               evsel->sample_id = NULL;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+void perf_evsel__free_id(struct perf_evsel *evsel)
+{
+       xyarray__delete(evsel->sample_id);
+       evsel->sample_id = NULL;
+       zfree(&evsel->id);
+       evsel->ids = 0;
+}
index 448891f..9f440ab 100644 (file)
@@ -3,6 +3,11 @@
 #define __LIBPERF_INTERNAL_EVLIST_H
 
 #include <linux/list.h>
+#include <api/fd/array.h>
+#include <internal/evsel.h>
+
+#define PERF_EVLIST__HLIST_BITS 8
+#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
 
 struct perf_cpu_map;
 struct perf_thread_map;
@@ -13,8 +18,16 @@ struct perf_evlist {
        bool                     has_user_cpus;
        struct perf_cpu_map     *cpus;
        struct perf_thread_map  *threads;
+       int                      nr_mmaps;
+       size_t                   mmap_len;
+       struct fdarray           pollfd;
+       struct hlist_head        heads[PERF_EVLIST__HLIST_SIZE];
 };
 
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
+int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
+                           void *ptr, short revent);
+
 /**
  * __perf_evlist__for_each_entry - iterate thru all the evsels
  * @list: list_head instance to iterate
@@ -47,4 +60,24 @@ struct perf_evlist {
 #define perf_evlist__for_each_entry_reverse(evlist, evsel) \
        __perf_evlist__for_each_entry_reverse(&(evlist)->entries, evsel)
 
+static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist)
+{
+       return list_entry(evlist->entries.next, struct perf_evsel, node);
+}
+
+static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist)
+{
+       return list_entry(evlist->entries.prev, struct perf_evsel, node);
+}
+
+u64 perf_evlist__read_format(struct perf_evlist *evlist);
+
+void perf_evlist__id_add(struct perf_evlist *evlist,
+                        struct perf_evsel *evsel,
+                        int cpu, int thread, u64 id);
+
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+                          struct perf_evsel *evsel,
+                          int cpu, int thread, int fd);
+
 #endif /* __LIBPERF_INTERNAL_EVLIST_H */
index 8b854d1..a69b829 100644 (file)
@@ -4,9 +4,35 @@
 
 #include <linux/types.h>
 #include <linux/perf_event.h>
+#include <stdbool.h>
+#include <sys/types.h>
 
 struct perf_cpu_map;
 struct perf_thread_map;
+struct xyarray;
+
+/*
+ * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
+ * more than one entry in the evlist.
+ */
+struct perf_sample_id {
+       struct hlist_node        node;
+       u64                      id;
+       struct perf_evsel       *evsel;
+       /*
+       * 'idx' will be used for AUX area sampling. A sample will have AUX area
+       * data that will be queued for decoding, where there are separate
+       * queues for each CPU (per-cpu tracing) or task (per-thread tracing).
+       * The sample ID can be used to lookup 'idx' which is effectively the
+       * queue number.
+       */
+       int                      idx;
+       int                      cpu;
+       pid_t                    tid;
+
+       /* Holds total ID period value for PERF_SAMPLE_READ processing. */
+       u64                      period;
+};
 
 struct perf_evsel {
        struct list_head         node;
@@ -15,9 +41,13 @@ struct perf_evsel {
        struct perf_cpu_map     *own_cpus;
        struct perf_thread_map  *threads;
        struct xyarray          *fd;
+       struct xyarray          *sample_id;
+       u64                     *id;
+       u32                      ids;
 
        /* parse modifier helper */
        int                      nr_members;
+       bool                     system_wide;
 };
 
 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
@@ -26,4 +56,7 @@ void perf_evsel__free_fd(struct perf_evsel *evsel);
 int perf_evsel__read_size(struct perf_evsel *evsel);
 int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter);
 
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
+void perf_evsel__free_id(struct perf_evsel *evsel);
+
 #endif /* __LIBPERF_INTERNAL_EVSEL_H */
index 0b56f12..5175d49 100644 (file)
@@ -2,7 +2,9 @@
 #ifndef __LIBPERF_INTERNAL_LIB_H
 #define __LIBPERF_INTERNAL_LIB_H
 
-#include <unistd.h>
+#include <sys/types.h>
+
+extern unsigned int page_size;
 
 ssize_t readn(int fd, void *buf, size_t n);
 ssize_t writen(int fd, const void *buf, size_t n);
diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h
new file mode 100644 (file)
index 0000000..ba1e519
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_INTERNAL_MMAP_H
+#define __LIBPERF_INTERNAL_MMAP_H
+
+#include <linux/compiler.h>
+#include <linux/refcount.h>
+#include <linux/types.h>
+#include <stdbool.h>
+
+/* perf sample has 16 bits size limit */
+#define PERF_SAMPLE_MAX_SIZE (1 << 16)
+
+/**
+ * struct perf_mmap - perf's ring buffer mmap details
+ *
+ * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
+ */
+struct perf_mmap {
+       void            *base;
+       int              mask;
+       int              fd;
+       int              cpu;
+       refcount_t       refcnt;
+       u64              prev;
+       u64              start;
+       u64              end;
+       bool             overwrite;
+       u64              flush;
+       char             event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+};
+
+#endif /* __LIBPERF_INTERNAL_MMAP_H */
index c341a7b..cfd70e7 100644 (file)
@@ -17,6 +17,6 @@ enum libperf_print_level {
 typedef int (*libperf_print_fn_t)(enum libperf_print_level level,
                                  const char *, va_list ap);
 
-LIBPERF_API void libperf_set_print(libperf_print_fn_t fn);
+LIBPERF_API void libperf_init(libperf_print_fn_t fn);
 
 #endif /* __LIBPERF_CORE_H */
index 8aa995c..ac9aa49 100644 (file)
@@ -16,6 +16,7 @@ LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
 LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
 LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
 LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
+LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map);
 
 #define perf_cpu_map__for_each_cpu(cpu, idx, cpus)             \
        for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx);   \
index 38365f8..8a2ce07 100644 (file)
@@ -31,5 +31,6 @@ LIBPERF_API void perf_evlist__disable(struct perf_evlist *evlist);
 LIBPERF_API void perf_evlist__set_maps(struct perf_evlist *evlist,
                                       struct perf_cpu_map *cpus,
                                       struct perf_thread_map *threads);
+LIBPERF_API int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
 
 #endif /* __LIBPERF_EVLIST_H */
index 2a81819..1865893 100644 (file)
@@ -5,6 +5,8 @@
 #include <linux/kernel.h>
 #include <internal/lib.h>
 
+unsigned int page_size;
+
 static ssize_t ion(bool is_read, int fd, void *buf, size_t n)
 {
        void *buf_start = buf;
index dc4d663..ab8dbde 100644 (file)
@@ -1,6 +1,6 @@
 LIBPERF_0.0.1 {
        global:
-               libperf_set_print;
+               libperf_init;
                perf_cpu_map__dummy_new;
                perf_cpu_map__get;
                perf_cpu_map__put;
@@ -9,6 +9,7 @@ LIBPERF_0.0.1 {
                perf_cpu_map__nr;
                perf_cpu_map__cpu;
                perf_cpu_map__empty;
+               perf_cpu_map__max;
                perf_thread_map__new_dummy;
                perf_thread_map__set_pid;
                perf_thread_map__comm;
@@ -38,6 +39,7 @@ LIBPERF_0.0.1 {
                perf_evlist__remove;
                perf_evlist__next;
                perf_evlist__set_maps;
+               perf_evlist__poll;
        local:
                *;
 };
index 76a43cf..aa34c20 100644 (file)
@@ -1,13 +1,23 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <stdarg.h>
+#include <stdio.h>
 #include <perf/cpumap.h>
 #include <internal/tests.h>
 
+static int libperf_print(enum libperf_print_level level,
+                        const char *fmt, va_list ap)
+{
+       return vfprintf(stderr, fmt, ap);
+}
+
 int main(int argc, char **argv)
 {
        struct perf_cpu_map *cpus;
 
        __T_START;
 
+       libperf_init(libperf_print);
+
        cpus = perf_cpu_map__dummy_new();
        if (!cpus)
                return -1;
index 4e1407f..e6b2ab2 100644 (file)
@@ -1,4 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdarg.h>
 #include <linux/perf_event.h>
 #include <perf/cpumap.h>
 #include <perf/threadmap.h>
@@ -6,6 +8,12 @@
 #include <perf/evsel.h>
 #include <internal/tests.h>
 
+static int libperf_print(enum libperf_print_level level,
+                        const char *fmt, va_list ap)
+{
+       return vfprintf(stderr, fmt, ap);
+}
+
 static int test_stat_cpu(void)
 {
        struct perf_cpu_map *cpus;
@@ -177,6 +185,8 @@ int main(int argc, char **argv)
 {
        __T_START;
 
+       libperf_init(libperf_print);
+
        test_stat_cpu();
        test_stat_thread();
        test_stat_thread_enable();
index 2c648fe..1b6c428 100644 (file)
@@ -1,10 +1,18 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <stdarg.h>
+#include <stdio.h>
 #include <linux/perf_event.h>
 #include <perf/cpumap.h>
 #include <perf/threadmap.h>
 #include <perf/evsel.h>
 #include <internal/tests.h>
 
+static int libperf_print(enum libperf_print_level level,
+                        const char *fmt, va_list ap)
+{
+       return vfprintf(stderr, fmt, ap);
+}
+
 static int test_stat_cpu(void)
 {
        struct perf_cpu_map *cpus;
@@ -116,6 +124,8 @@ int main(int argc, char **argv)
 {
        __T_START;
 
+       libperf_init(libperf_print);
+
        test_stat_cpu();
        test_stat_thread();
        test_stat_thread_enable();
index 10a4f4c..8c5f472 100644 (file)
@@ -1,13 +1,23 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <stdarg.h>
+#include <stdio.h>
 #include <perf/threadmap.h>
 #include <internal/tests.h>
 
+static int libperf_print(enum libperf_print_level level,
+                        const char *fmt, va_list ap)
+{
+       return vfprintf(stderr, fmt, ap);
+}
+
 int main(int argc, char **argv)
 {
        struct perf_thread_map *threads;
 
        __T_START;
 
+       libperf_init(libperf_print);
+
        threads = perf_thread_map__new_dummy();
        if (!threads)
                return -1;
index 1193b92..27f94b0 100644 (file)
@@ -12,6 +12,7 @@
 #include "util/build-id.h"
 #include "util/cache.h"
 #include "util/env.h"
+#include <internal/lib.h> // page_size
 #include <subcmd/exec-cmd.h>
 #include "util/config.h"
 #include <subcmd/run-command.h>
 #include "util/bpf-loader.h"
 #include "util/debug.h"
 #include "util/event.h"
-#include "util/util.h"
+#include "util/util.h" // usage()
 #include "ui/ui.h"
 #include "perf-sys.h"
 #include <api/fs/fs.h>
 #include <api/fs/tracing_path.h>
+#include <perf/core.h>
 #include <errno.h>
 #include <pthread.h>
 #include <signal.h>
@@ -428,6 +430,12 @@ void pthread__unblock_sigwinch(void)
        pthread_sigmask(SIG_UNBLOCK, &set, NULL);
 }
 
+static int libperf_print(enum libperf_print_level level,
+                        const char *fmt, va_list ap)
+{
+       return eprintf(level, verbose, fmt, ap);
+}
+
 int main(int argc, const char **argv)
 {
        int err;
@@ -438,8 +446,7 @@ int main(int argc, const char **argv)
        exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
        pager_init(PERF_PAGER_ENVIRONMENT);
 
-       /* The page_size is placed in util object. */
-       page_size = sysconf(_SC_PAGE_SIZE);
+       libperf_init(libperf_print);
 
        cmd = extract_argv0_path(argv[0]);
        if (!cmd)
index e62b09b..de7efa2 100644 (file)
@@ -30,9 +30,9 @@ the topic. Eg: "Floating-point.json".
 All the topic JSON files for a CPU model/family should be in a separate
 sub directory. Thus for the Silvermont X86 CPU:
 
-       $ ls tools/perf/pmu-events/arch/x86/Silvermont_core
-       Cache.json      Memory.json     Virtual-Memory.json
-       Frontend.json   Pipeline.json
+       $ ls tools/perf/pmu-events/arch/x86/silvermont
+       cache.json     memory.json    virtual-memory.json
+       frontend.json  pipeline.json
 
 The JSONs folder for a CPU model/family may be placed in the root arch
 folder, or may be placed in a vendor sub-folder under the arch folder
@@ -94,7 +94,7 @@ users to specify events by their name:
 
 where 'pm_1plus_ppc_cmpl' is a Power8 PMU event.
 
-However some errors in processing may cause the perf build to fail.
+However some errors in processing may cause the alias build to fail.
 
 Mapfile format
 ===============
@@ -119,7 +119,7 @@ where:
 
        Header line
                The header line is the first line in the file, which is
-               always _IGNORED_. It can empty.
+               always _IGNORED_. It can be empty.
 
        CPUID:
                CPUID is an arch-specific char string, that can be used
@@ -138,15 +138,15 @@ where:
                files, relative to the directory containing the mapfile.csv
 
        Type:
-               indicates whether the events or "core" or "uncore" events.
+               indicates whether the events are "core" or "uncore" events.
 
 
        Eg:
 
-       $ grep Silvermont tools/perf/pmu-events/arch/x86/mapfile.csv
-       GenuineIntel-6-37,V13,Silvermont_core,core
-       GenuineIntel-6-4D,V13,Silvermont_core,core
-       GenuineIntel-6-4C,V13,Silvermont_core,core
+       $ grep silvermont tools/perf/pmu-events/arch/x86/mapfile.csv
+       GenuineIntel-6-37,v13,silvermont,core
+       GenuineIntel-6-4D,v13,silvermont,core
+       GenuineIntel-6-4C,v13,silvermont,core
 
        i.e the three CPU models use the JSON files (i.e PMU events) listed
-       in the directory 'tools/perf/pmu-events/arch/x86/Silvermont_core'.
+       in the directory 'tools/perf/pmu-events/arch/x86/silvermont'.
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json
new file mode 100644 (file)
index 0000000..b5e5d05
--- /dev/null
@@ -0,0 +1,14 @@
+[
+    {
+        "PublicDescription": "Mispredicted or not predicted branch speculatively executed. This event counts any predictable branch instruction which is mispredicted either due to dynamic misprediction or because the MMU is off and the branches are statically predicted not taken.",
+        "EventCode": "0x10",
+        "EventName": "BR_MIS_PRED",
+        "BriefDescription": "Mispredicted or not predicted branch speculatively executed."
+    },
+    {
+        "PublicDescription": "Predictable branch speculatively executed. This event counts all predictable branches.",
+        "EventCode": "0x12",
+        "EventName": "BR_PRED",
+        "BriefDescription": "Predictable branch speculatively executed."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json
new file mode 100644 (file)
index 0000000..fce7309
--- /dev/null
@@ -0,0 +1,24 @@
+[
+    {
+        "EventCode": "0x11",
+        "EventName": "CPU_CYCLES",
+        "BriefDescription": "The number of core clock cycles."
+    },
+    {
+        "PublicDescription": "Bus access. This event counts for every beat of data transferred over the data channels between the core and the SCU. If both read and write data beats are transferred on a given cycle, this event is counted twice on that cycle. This event counts the sum of BUS_ACCESS_RD and BUS_ACCESS_WR.",
+        "EventCode": "0x19",
+        "EventName": "BUS_ACCESS",
+        "BriefDescription": "Bus access."
+    },
+    {
+        "EventCode": "0x1D",
+        "EventName": "BUS_CYCLES",
+        "BriefDescription": "Bus cycles. This event duplicates CPU_CYCLES."
+    },
+    {
+        "ArchStdEvent":  "BUS_ACCESS_RD"
+    },
+    {
+        "ArchStdEvent":  "BUS_ACCESS_WR"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json
new file mode 100644 (file)
index 0000000..2459408
--- /dev/null
@@ -0,0 +1,207 @@
+[
+    {
+        "PublicDescription": "L1 instruction cache refill. This event counts any instruction fetch which misses in the cache.",
+        "EventCode": "0x01",
+        "EventName": "L1I_CACHE_REFILL",
+        "BriefDescription": "L1 instruction cache refill"
+    },
+    {
+        "PublicDescription": "L1 instruction TLB refill. This event counts any refill of the instruction L1 TLB from the L2 TLB. This includes refills that result in a translation fault.",
+        "EventCode": "0x02",
+        "EventName": "L1I_TLB_REFILL",
+        "BriefDescription": "L1 instruction TLB refill"
+    },
+    {
+        "PublicDescription": "L1 data cache refill. This event counts any load or store operation or page table walk access which causes data to be read from outside the L1, including accesses which do not allocate into L1.",
+        "EventCode": "0x03",
+        "EventName": "L1D_CACHE_REFILL",
+        "BriefDescription": "L1 data cache refill"
+    },
+    {
+        "PublicDescription": "L1 data cache access. This event counts any load or store operation or page table walk access which looks up in the L1 data cache. In particular, any access which could count the L1D_CACHE_REFILL event causes this event to count.",
+        "EventCode": "0x04",
+        "EventName": "L1D_CACHE",
+        "BriefDescription": "L1 data cache access"
+    },
+    {
+        "PublicDescription": "L1 data TLB refill. This event counts any refill of the data L1 TLB from the L2 TLB. This includes refills that result in a translation fault.",
+        "EventCode": "0x05",
+        "EventName": "L1D_TLB_REFILL",
+        "BriefDescription": "L1 data TLB refill"
+    },
+    {
+        "PublicDescription": "Level 1 instruction cache access or Level 0 Macro-op cache access. This event counts any instruction fetch which accesses the L1 instruction cache or L0 Macro-op cache.",
+        "EventCode": "0x14",
+        "EventName": "L1I_CACHE",
+        "BriefDescription": "L1 instruction cache access"
+    },
+    {
+        "PublicDescription": "L1 data cache Write-Back. This event counts any write-back of data from the L1 data cache to L2 or L3. This counts both victim line evictions and snoops, including cache maintenance operations.",
+        "EventCode": "0x15",
+        "EventName": "L1D_CACHE_WB",
+        "BriefDescription": "L1 data cache Write-Back"
+    },
+    {
+        "PublicDescription": "L2 data cache access. This event counts any transaction from L1 which looks up in the L2 cache, and any write-back from the L1 to the L2. Snoops from outside the core and cache maintenance operations are not counted.",
+        "EventCode": "0x16",
+        "EventName": "L2D_CACHE",
+        "BriefDescription": "L2 data cache access"
+    },
+    {
+        "PublicDescription": "L2 data cache refill. This event counts any cacheable transaction from L1 which causes data to be read from outside the core. L2 refills caused by stashes into L2 should not be counted",
+        "EventCode": "0x17",
+        "EventName": "L2D_CACHE_REFILL",
+        "BriefDescription": "L2 data cache refill"
+    },
+    {
+        "PublicDescription": "L2 data cache write-back. This event counts any write-back of data from the L2 cache to outside the core. This includes snoops to the L2 which return data, regardless of whether they cause an invalidation. Invalidations from the L2 which do not write data outside of the core and snoops which return data from the L1 are not counted",
+        "EventCode": "0x18",
+        "EventName": "L2D_CACHE_WB",
+        "BriefDescription": "L2 data cache write-back"
+    },
+    {
+        "PublicDescription": "L2 data cache allocation without refill. This event counts any full cache line write into the L2 cache which does not cause a linefill, including write-backs from L1 to L2 and full-line writes which do not allocate into L1.",
+        "EventCode": "0x20",
+        "EventName": "L2D_CACHE_ALLOCATE",
+        "BriefDescription": "L2 data cache allocation without refill"
+    },
+    {
+        "PublicDescription": "Level 1 data TLB access. This event counts any load or store operation which accesses the data L1 TLB. If both a load and a store are executed on a cycle, this event counts twice. This event counts regardless of whether the MMU is enabled.",
+        "EventCode": "0x25",
+        "EventName": "L1D_TLB",
+        "BriefDescription": "Level 1 data TLB access."
+    },
+    {
+        "PublicDescription": "Level 1 instruction TLB access. This event counts any instruction fetch which accesses the instruction L1 TLB.This event counts regardless of whether the MMU is enabled.",
+        "EventCode": "0x26",
+        "EventName": "L1I_TLB",
+        "BriefDescription": "Level 1 instruction TLB access"
+    },
+    {
+        "PublicDescription": "This event counts any full cache line write into the L3 cache which does not cause a linefill, including write-backs from L2 to L3 and full-line writes which do not allocate into L2",
+        "EventCode": "0x29",
+        "EventName": "L3D_CACHE_ALLOCATE",
+        "BriefDescription": "Allocation without refill"
+    },
+    {
+        "PublicDescription": "Attributable Level 3 unified cache refill. This event counts for any cacheable read transaction returning datafrom the SCU for which the data source was outside the cluster. Transactions such as ReadUnique are counted here as 'read' transactions, even though they can be generated by store instructions.",
+        "EventCode": "0x2A",
+        "EventName": "L3D_CACHE_REFILL",
+        "BriefDescription": "Attributable Level 3 unified cache refill."
+    },
+    {
+        "PublicDescription": "Attributable Level 3 unified cache access. This event counts for any cacheable read transaction returning datafrom the SCU, or for any cacheable write to the SCU.",
+        "EventCode": "0x2B",
+        "EventName": "L3D_CACHE",
+        "BriefDescription": "Attributable Level 3 unified cache access."
+    },
+    {
+        "PublicDescription": "Attributable L2 data or unified TLB refill. This event counts on anyrefill of the L2 TLB, caused by either an instruction or data access.This event does not count if the MMU is disabled.",
+        "EventCode": "0x2D",
+        "EventName": "L2D_TLB_REFILL",
+        "BriefDescription": "Attributable L2 data or unified TLB refill"
+    },
+    {
+        "PublicDescription": "Attributable L2 data or unified TLB access. This event counts on any access to the L2 TLB (caused by a refill of any of the L1 TLBs). This event does not count if the MMU is disabled.",
+        "EventCode": "0x2F",
+        "EventName": "L2D_TLB",
+        "BriefDescription": "Attributable L2 data or unified TLB access"
+    },
+    {
+        "PublicDescription": "Access to data TLB that caused a page table walk. This event counts on any data access which causes L2D_TLB_REFILL to count.",
+        "EventCode": "0x34",
+        "EventName": "DTLB_WALK",
+        "BriefDescription": "Access to data TLB that caused a page table walk."
+    },
+    {
+        "PublicDescription": "Access to instruction TLB that caused a page table walk. This event counts on any instruction access which causes L2D_TLB_REFILL to count.",
+        "EventCode": "0x35",
+        "EventName": "ITLB_WALK",
+        "BriefDescription": "Access to instruction TLB that caused a page table walk."
+    },
+    {
+        "EventCode": "0x36",
+        "EventName": "LL_CACHE_RD",
+        "BriefDescription": "Last level cache access, read"
+    },
+    {
+        "EventCode": "0x37",
+        "EventName": "LL_CACHE_MISS_RD",
+        "BriefDescription": "Last level cache miss, read"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_INVAL"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_RD"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_INNER"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_OUTER"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_RD"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_WR"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WB_CLEAN"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WB_VICTIM"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WR"
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_RD"
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_RD"
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_WR"
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_WR"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_INVAL"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_RD"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_RD"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_WR"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_CLEAN"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_VICTIM"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WR"
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_RD"
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_REFILL_RD"
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_REFILL_WR"
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_WR"
+    },
+    {
+        "ArchStdEvent": "L3D_CACHE_RD"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json
new file mode 100644 (file)
index 0000000..98d29c8
--- /dev/null
@@ -0,0 +1,52 @@
+[
+    {
+        "EventCode": "0x09",
+        "EventName": "EXC_TAKEN",
+        "BriefDescription": "Exception taken."
+    },
+    {
+        "PublicDescription": "Local memory error. This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs",
+        "EventCode": "0x1A",
+        "EventName": "MEMORY_ERROR",
+        "BriefDescription": "Local memory error."
+    },
+    {
+        "ArchStdEvent": "EXC_DABORT"
+    },
+    {
+        "ArchStdEvent": "EXC_FIQ"
+    },
+    {
+        "ArchStdEvent": "EXC_HVC"
+    },
+    {
+        "ArchStdEvent": "EXC_IRQ"
+    },
+    {
+        "ArchStdEvent": "EXC_PABORT"
+    },
+    {
+        "ArchStdEvent": "EXC_SMC"
+    },
+    {
+        "ArchStdEvent": "EXC_SVC"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_DABORT"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_FIQ"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_IRQ"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_OTHER"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_PABORT"
+    },
+    {
+        "ArchStdEvent": "EXC_UNDEF"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json
new file mode 100644 (file)
index 0000000..c153ac7
--- /dev/null
@@ -0,0 +1,108 @@
+[
+    {
+        "PublicDescription": "Software increment. Instruction architecturally executed (condition code check pass).",
+        "EventCode": "0x00",
+        "EventName": "SW_INCR",
+        "BriefDescription": "Software increment."
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed. This event counts all retired instructions, including those that fail their condition check.",
+        "EventCode": "0x08",
+        "EventName": "INST_RETIRED",
+        "BriefDescription": "Instruction architecturally executed."
+    },
+    {
+        "EventCode": "0x0A",
+        "EventName": "EXC_RETURN",
+        "BriefDescription": "Instruction architecturally executed, condition code check pass, exception return."
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR. This event only counts writes to CONTEXTIDR in AArch32 state, and via the CONTEXTIDR_EL1 mnemonic in AArch64 state.",
+        "EventCode": "0x0B",
+        "EventName": "CID_WRITE_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR."
+    },
+    {
+        "EventCode": "0x1B",
+        "EventName": "INST_SPEC",
+        "BriefDescription": "Operation speculatively executed"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, condition code check pass, write to TTBR. This event only counts writes to TTBR0/TTBR1 in AArch32 state and TTBR0_EL1/TTBR1_EL1 in AArch64 state.",
+        "EventCode": "0x1C",
+        "EventName": "TTBR_WRITE_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, condition code check pass, write to TTBR"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, branch. This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches.",
+        "EventCode": "0x21",
+        "EventName": "BR_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, branch."
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, mispredicted branch. This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush.",
+        "EventCode": "0x22",
+        "EventName": "BR_MIS_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, mispredicted branch."
+    },
+    {
+        "ArchStdEvent": "ASE_SPEC"
+    },
+    {
+        "ArchStdEvent": "BR_IMMED_SPEC"
+    },
+    {
+        "ArchStdEvent": "BR_INDIRECT_SPEC"
+    },
+    {
+        "ArchStdEvent": "BR_RETURN_SPEC"
+    },
+    {
+        "ArchStdEvent": "CRYPTO_SPEC"
+    },
+    {
+        "ArchStdEvent": "DMB_SPEC"
+    },
+    {
+        "ArchStdEvent": "DP_SPEC"
+    },
+    {
+        "ArchStdEvent": "DSB_SPEC"
+    },
+    {
+        "ArchStdEvent": "ISB_SPEC"
+    },
+    {
+        "ArchStdEvent": "LDREX_SPEC"
+    },
+    {
+        "ArchStdEvent": "LDST_SPEC"
+    },
+    {
+        "ArchStdEvent": "LD_SPEC"
+    },
+    {
+        "ArchStdEvent": "PC_WRITE_SPEC"
+    },
+    {
+        "ArchStdEvent": "RC_LD_SPEC"
+    },
+    {
+        "ArchStdEvent": "RC_ST_SPEC"
+    },
+    {
+        "ArchStdEvent": "STREX_FAIL_SPEC"
+    },
+    {
+        "ArchStdEvent": "STREX_PASS_SPEC"
+    },
+    {
+        "ArchStdEvent": "STREX_SPEC"
+    },
+    {
+        "ArchStdEvent": "ST_SPEC"
+    },
+    {
+        "ArchStdEvent": "VFP_SPEC"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json
new file mode 100644 (file)
index 0000000..b866432
--- /dev/null
@@ -0,0 +1,23 @@
+[
+    {
+        "PublicDescription": "Data memory access. This event counts memory accesses due to load or store instructions. This event counts the sum of MEM_ACCESS_RD and MEM_ACCESS_WR.",
+        "EventCode": "0x13",
+        "EventName": "MEM_ACCESS",
+        "BriefDescription": "Data memory access"
+    },
+    {
+         "ArchStdEvent": "MEM_ACCESS_RD"
+    },
+    {
+         "ArchStdEvent": "MEM_ACCESS_WR"
+    },
+    {
+         "ArchStdEvent": "UNALIGNED_LD_SPEC"
+    },
+    {
+         "ArchStdEvent": "UNALIGNED_ST_SPEC"
+    },
+    {
+         "ArchStdEvent": "UNALIGNED_LDST_SPEC"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json
new file mode 100644 (file)
index 0000000..8bde029
--- /dev/null
@@ -0,0 +1,7 @@
+[
+    {
+        "EventCode": "0x31",
+        "EventName": "REMOTE_ACCESS",
+        "BriefDescription": "Access to another socket in a multi-socket system"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json
new file mode 100644 (file)
index 0000000..010a647
--- /dev/null
@@ -0,0 +1,14 @@
+[
+    {
+        "PublicDescription": "No operation issued because of the frontend. The counter counts on any cycle when there are no fetched instructions available to dispatch.",
+        "EventCode": "0x23",
+        "EventName": "STALL_FRONTEND",
+        "BriefDescription": "No operation issued because of the frontend."
+    },
+    {
+        "PublicDescription": "No operation issued because of the backend. The counter counts on any cycle fetched instructions are not dispatched due to resource constraints.",
+        "EventCode": "0x24",
+        "EventName": "STALL_BACKEND",
+        "BriefDescription": "No operation issued because of the backend."
+    }
+]
index 927fcdd..0d60914 100644 (file)
@@ -16,6 +16,8 @@
 0x00000000420f1000,v1,arm/cortex-a53,core
 0x00000000410fd070,v1,arm/cortex-a57-a72,core
 0x00000000410fd080,v1,arm/cortex-a57-a72,core
+0x00000000410fd0b0,v1,arm/cortex-a76-n1,core
+0x00000000410fd0c0,v1,arm/cortex-a76-n1,core
 0x00000000420f5160,v1,cavium/thunderx2,core
 0x00000000430f0af0,v1,cavium/thunderx2,core
 0x00000000480fd010,v1,hisilicon/hip08,core
index 9dc2f6b..b2a3df0 100644 (file)
     "BriefDescription": "L3 Load Prefetches",
     "PublicDescription": ""
   },
-  {,
-    "EventCode": "0xa29084",
-    "EventName": "PM_L3_P0_GRP_PUMP",
-    "BriefDescription": "L3 pf sent with grp scope port 0",
-    "PublicDescription": ""
-  },
-  {,
-    "EventCode": "0x528084",
-    "EventName": "PM_L3_P0_LCO_DATA",
-    "BriefDescription": "lco sent with data port 0",
-    "PublicDescription": ""
-  },
-  {,
-    "EventCode": "0x518080",
-    "EventName": "PM_L3_P0_LCO_NO_DATA",
-    "BriefDescription": "dataless l3 lco sent port 0",
-    "PublicDescription": ""
-  },
-  {,
-    "EventCode": "0xa4908c",
-    "EventName": "PM_L3_P0_LCO_RTY",
-    "BriefDescription": "L3 LCO received retry port 0",
-    "PublicDescription": ""
-  },
   {,
     "EventCode": "0x84908d",
     "EventName": "PM_L3_PF0_ALLOC",
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json b/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json
deleted file mode 100644 (file)
index 17fb524..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-[
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "0",
-               "EventName": "CPU_CYCLES",
-               "BriefDescription": "CPU Cycles",
-               "PublicDescription": "Cycle Count"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "1",
-               "EventName": "INSTRUCTIONS",
-               "BriefDescription": "Instructions",
-               "PublicDescription": "Instruction Count"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "2",
-               "EventName": "L1I_DIR_WRITES",
-               "BriefDescription": "L1I Directory Writes",
-               "PublicDescription": "Level-1 I-Cache Directory Write Count"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "3",
-               "EventName": "L1I_PENALTY_CYCLES",
-               "BriefDescription": "L1I Penalty Cycles",
-               "PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "4",
-               "EventName": "L1D_DIR_WRITES",
-               "BriefDescription": "L1D Directory Writes",
-               "PublicDescription": "Level-1 D-Cache Directory Write Count"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "5",
-               "EventName": "L1D_PENALTY_CYCLES",
-               "BriefDescription": "L1D Penalty Cycles",
-               "PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "32",
-               "EventName": "PROBLEM_STATE_CPU_CYCLES",
-               "BriefDescription": "Problem-State CPU Cycles",
-               "PublicDescription": "Problem-State Cycle Count"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "33",
-               "EventName": "PROBLEM_STATE_INSTRUCTIONS",
-               "BriefDescription": "Problem-State Instructions",
-               "PublicDescription": "Problem-State Instruction Count"
-       },
-]
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json
deleted file mode 100644 (file)
index db286f1..0000000
+++ /dev/null
@@ -1,114 +0,0 @@
-[
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "64",
-               "EventName": "PRNG_FUNCTIONS",
-               "BriefDescription": "PRNG Functions",
-               "PublicDescription": "Total number of the PRNG functions issued by the CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "65",
-               "EventName": "PRNG_CYCLES",
-               "BriefDescription": "PRNG Cycles",
-               "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "66",
-               "EventName": "PRNG_BLOCKED_FUNCTIONS",
-               "BriefDescription": "PRNG Blocked Functions",
-               "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "67",
-               "EventName": "PRNG_BLOCKED_CYCLES",
-               "BriefDescription": "PRNG Blocked Cycles",
-               "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "68",
-               "EventName": "SHA_FUNCTIONS",
-               "BriefDescription": "SHA Functions",
-               "PublicDescription": "Total number of SHA functions issued by the CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "69",
-               "EventName": "SHA_CYCLES",
-               "BriefDescription": "SHA Cycles",
-               "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "70",
-               "EventName": "SHA_BLOCKED_FUNCTIONS",
-               "BriefDescription": "SHA Blocked Functions",
-               "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "71",
-               "EventName": "SHA_BLOCKED_CYCLES",
-               "BriefDescription": "SHA Bloced Cycles",
-               "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "72",
-               "EventName": "DEA_FUNCTIONS",
-               "BriefDescription": "DEA Functions",
-               "PublicDescription": "Total number of the DEA functions issued by the CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "73",
-               "EventName": "DEA_CYCLES",
-               "BriefDescription": "DEA Cycles",
-               "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "74",
-               "EventName": "DEA_BLOCKED_FUNCTIONS",
-               "BriefDescription": "DEA Blocked Functions",
-               "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "75",
-               "EventName": "DEA_BLOCKED_CYCLES",
-               "BriefDescription": "DEA Blocked Cycles",
-               "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "76",
-               "EventName": "AES_FUNCTIONS",
-               "BriefDescription": "AES Functions",
-               "PublicDescription": "Total number of AES functions issued by the CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "77",
-               "EventName": "AES_CYCLES",
-               "BriefDescription": "AES Cycles",
-               "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "78",
-               "EventName": "AES_BLOCKED_FUNCTIONS",
-               "BriefDescription": "AES Blocked Functions",
-               "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "79",
-               "EventName": "AES_BLOCKED_CYCLES",
-               "BriefDescription": "AES Blocked Cycles",
-               "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
-       },
-]
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json
deleted file mode 100644 (file)
index 5e36bc2..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-[
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "80",
-               "EventName": "ECC_FUNCTION_COUNT",
-               "BriefDescription": "ECC Function Count",
-               "PublicDescription": "Long ECC function Count"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "81",
-               "EventName": "ECC_CYCLES_COUNT",
-               "BriefDescription": "ECC Cycles Count",
-               "PublicDescription": "Long ECC Function cycles count"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "82",
-               "EventName": "ECC_BLOCKED_FUNCTION_COUNT",
-               "BriefDescription": "Ecc Blocked Function Count",
-               "PublicDescription": "Long ECC blocked function count"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "83",
-               "EventName": "ECC_BLOCKED_CYCLES_COUNT",
-               "BriefDescription": "ECC Blocked Cycles Count",
-               "PublicDescription": "Long ECC blocked cycles count"
-       },
-]
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json b/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json
deleted file mode 100644 (file)
index 89e0707..0000000
+++ /dev/null
@@ -1,373 +0,0 @@
-[
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "128",
-               "EventName": "L1D_RO_EXCL_WRITES",
-               "BriefDescription": "L1D Read-only Exclusive Writes",
-               "PublicDescription": "A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "129",
-               "EventName": "DTLB2_WRITES",
-               "BriefDescription": "DTLB2 Writes",
-               "PublicDescription": "A translation has been written into The Translation Lookaside Buffer 2 (TLB2) and the request was made by the data cache"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "130",
-               "EventName": "DTLB2_MISSES",
-               "BriefDescription": "DTLB2 Misses",
-               "PublicDescription": "A TLB2 miss is in progress for a request made by the data cache. Incremented by one for every TLB2 miss in progress for the Level-1 Data cache on this cycle"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "131",
-               "EventName": "DTLB2_HPAGE_WRITES",
-               "BriefDescription": "DTLB2 One-Megabyte Page Writes",
-               "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "132",
-               "EventName": "DTLB2_GPAGE_WRITES",
-               "BriefDescription": "DTLB2 Two-Gigabyte Page Writes",
-               "PublicDescription": "A translation entry for a two-gigabyte page was written into the Level-2 TLB"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "133",
-               "EventName": "L1D_L2D_SOURCED_WRITES",
-               "BriefDescription": "L1D L2D Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "134",
-               "EventName": "ITLB2_WRITES",
-               "BriefDescription": "ITLB2 Writes",
-               "PublicDescription": "A translation entry has been written into the Translation Lookaside Buffer 2 (TLB2) and the request was made by the instruction cache"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "135",
-               "EventName": "ITLB2_MISSES",
-               "BriefDescription": "ITLB2 Misses",
-               "PublicDescription": "A TLB2 miss is in progress for a request made by the instruction cache. Incremented by one for every TLB2 miss in progress for the Level-1 Instruction cache in a cycle"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "136",
-               "EventName": "L1I_L2I_SOURCED_WRITES",
-               "BriefDescription": "L1I L2I Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "137",
-               "EventName": "TLB2_PTE_WRITES",
-               "BriefDescription": "TLB2 PTE Writes",
-               "PublicDescription": "A translation entry was written into the Page Table Entry array in the Level-2 TLB"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "138",
-               "EventName": "TLB2_CRSTE_WRITES",
-               "BriefDescription": "TLB2 CRSTE Writes",
-               "PublicDescription": "Translation entries were written into the Combined Region and Segment Table Entry array and the Page Table Entry array in the Level-2 TLB"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "139",
-               "EventName": "TLB2_ENGINES_BUSY",
-               "BriefDescription": "TLB2 Engines Busy",
-               "PublicDescription": "The number of Level-2 TLB translation engines busy in a cycle"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "140",
-               "EventName": "TX_C_TEND",
-               "BriefDescription": "Completed TEND instructions in constrained TX mode",
-               "PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "141",
-               "EventName": "TX_NC_TEND",
-               "BriefDescription": "Completed TEND instructions in non-constrained TX mode",
-               "PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "143",
-               "EventName": "L1C_TLB2_MISSES",
-               "BriefDescription": "L1C TLB2 Misses",
-               "PublicDescription": "Increments by one for any cycle where a level-1 cache or level-2 TLB miss is in progress"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "144",
-               "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
-               "BriefDescription": "L1D On-Chip L3 Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "145",
-               "EventName": "L1D_ONCHIP_MEMORY_SOURCED_WRITES",
-               "BriefDescription": "L1D On-Chip Memory Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "146",
-               "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
-               "BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "147",
-               "EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES",
-               "BriefDescription": "L1D On-Cluster L3 Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Cluster Level-3 cache withountervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "148",
-               "EventName": "L1D_ONCLUSTER_MEMORY_SOURCED_WRITES",
-               "BriefDescription": "L1D On-Cluster Memory Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster memory"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "149",
-               "EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES_IV",
-               "BriefDescription": "L1D On-Cluster L3 Sourced Writes with Intervention",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache with intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "150",
-               "EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES",
-               "BriefDescription": "L1D Off-Cluster L3 Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "151",
-               "EventName": "L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES",
-               "BriefDescription": "L1D Off-Cluster Memory Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Cluster memory"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "152",
-               "EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV",
-               "BriefDescription": "L1D Off-Cluster L3 Sourced Writes with Intervention",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "153",
-               "EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES",
-               "BriefDescription": "L1D Off-Drawer L3 Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "154",
-               "EventName": "L1D_OFFDRAWER_MEMORY_SOURCED_WRITES",
-               "BriefDescription": "L1D Off-Drawer Memory Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer memory"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "155",
-               "EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES_IV",
-               "BriefDescription": "L1D Off-Drawer L3 Sourced Writes with Intervention",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "156",
-               "EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES",
-               "BriefDescription": "L1D On-Drawer L4 Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "157",
-               "EventName": "L1D_OFFDRAWER_L4_SOURCED_WRITES",
-               "BriefDescription": "L1D Off-Drawer L4 Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "158",
-               "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_RO",
-               "BriefDescription": "L1D On-Chip L3 Sourced Writes read-only",
-               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip L3 but a read-only invalidate was done to remove other copies of the cache line"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "162",
-               "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
-               "BriefDescription": "L1I On-Chip L3 Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache without intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "163",
-               "EventName": "L1I_ONCHIP_MEMORY_SOURCED_WRITES",
-               "BriefDescription": "L1I On-Chip Memory Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from On-Chip memory"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "164",
-               "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
-               "BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache with intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "165",
-               "EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES",
-               "BriefDescription": "L1I On-Cluster L3 Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache without intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "166",
-               "EventName": "L1I_ONCLUSTER_MEMORY_SOURCED_WRITES",
-               "BriefDescription": "L1I On-Cluster Memory Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster memory"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "167",
-               "EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES_IV",
-               "BriefDescription": "L1I On-Cluster L3 Sourced Writes with Intervention",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Cluster Level-3 cache with intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "168",
-               "EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES",
-               "BriefDescription": "L1I Off-Cluster L3 Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "169",
-               "EventName": "L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES",
-               "BriefDescription": "L1I Off-Cluster Memory Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Cluster memory"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "170",
-               "EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV",
-               "BriefDescription": "L1I Off-Cluster L3 Sourced Writes with Intervention",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "171",
-               "EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES",
-               "BriefDescription": "L1I Off-Drawer L3 Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "172",
-               "EventName": "L1I_OFFDRAWER_MEMORY_SOURCED_WRITES",
-               "BriefDescription": "L1I Off-Drawer Memory Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer memory"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "173",
-               "EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES_IV",
-               "BriefDescription": "L1I Off-Drawer L3 Sourced Writes with Intervention",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "174",
-               "EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES",
-               "BriefDescription": "L1I On-Drawer L4 Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "175",
-               "EventName": "L1I_OFFDRAWER_L4_SOURCED_WRITES",
-               "BriefDescription": "L1I Off-Drawer L4 Sourced Writes",
-               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "224",
-               "EventName": "BCD_DFP_EXECUTION_SLOTS",
-               "BriefDescription": "BCD DFP Execution Slots",
-               "PublicDescription": "Count of floating point execution slots used for finished Binary Coded Decimal to Decimal Floating Point conversions. Instructions: CDZT, CXZT, CZDT, CZXT"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "225",
-               "EventName": "VX_BCD_EXECUTION_SLOTS",
-               "BriefDescription": "VX BCD Execution Slots",
-               "PublicDescription": "Count of floating point execution slots used for finished vector arithmetic Binary Coded Decimal instructions. Instructions: VAP, VSP, VMPVMSP, VDP, VSDP, VRP, VLIP, VSRP, VPSOPVCP, VTP, VPKZ, VUPKZ, VCVB, VCVBG, VCVDVCVDG"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "226",
-               "EventName": "DECIMAL_INSTRUCTIONS",
-               "BriefDescription": "Decimal Instructions",
-               "PublicDescription": "Decimal instructions dispatched. Instructions: CVB, CVD, AP, CP, DP, ED, EDMK, MP, SRP, SP, ZAP"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "232",
-               "EventName": "LAST_HOST_TRANSLATIONS",
-               "BriefDescription": "Last host translation done",
-               "PublicDescription": "Last Host Translation done"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "243",
-               "EventName": "TX_NC_TABORT",
-               "BriefDescription": "Aborted transactions in non-constrained TX mode",
-               "PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "244",
-               "EventName": "TX_C_TABORT_NO_SPECIAL",
-               "BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
-               "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "245",
-               "EventName": "TX_C_TABORT_SPECIAL",
-               "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
-               "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "448",
-               "EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
-               "BriefDescription": "Cycle count with one thread active",
-               "PublicDescription": "Cycle count with one thread active"
-       },
-       {
-               "Unit": "CPU-M-CF",
-               "EventCode": "449",
-               "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE",
-               "BriefDescription": "Cycle count with two threads active",
-               "PublicDescription": "Cycle count with two threads active"
-       },
-]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/basic.json b/tools/perf/pmu-events/arch/s390/cf_z15/basic.json
new file mode 100644 (file)
index 0000000..17fb524
--- /dev/null
@@ -0,0 +1,58 @@
+[
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "0",
+               "EventName": "CPU_CYCLES",
+               "BriefDescription": "CPU Cycles",
+               "PublicDescription": "Cycle Count"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "1",
+               "EventName": "INSTRUCTIONS",
+               "BriefDescription": "Instructions",
+               "PublicDescription": "Instruction Count"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "2",
+               "EventName": "L1I_DIR_WRITES",
+               "BriefDescription": "L1I Directory Writes",
+               "PublicDescription": "Level-1 I-Cache Directory Write Count"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "3",
+               "EventName": "L1I_PENALTY_CYCLES",
+               "BriefDescription": "L1I Penalty Cycles",
+               "PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "4",
+               "EventName": "L1D_DIR_WRITES",
+               "BriefDescription": "L1D Directory Writes",
+               "PublicDescription": "Level-1 D-Cache Directory Write Count"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "5",
+               "EventName": "L1D_PENALTY_CYCLES",
+               "BriefDescription": "L1D Penalty Cycles",
+               "PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "32",
+               "EventName": "PROBLEM_STATE_CPU_CYCLES",
+               "BriefDescription": "Problem-State CPU Cycles",
+               "PublicDescription": "Problem-State Cycle Count"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "33",
+               "EventName": "PROBLEM_STATE_INSTRUCTIONS",
+               "BriefDescription": "Problem-State Instructions",
+               "PublicDescription": "Problem-State Instruction Count"
+       },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json
new file mode 100644 (file)
index 0000000..db286f1
--- /dev/null
@@ -0,0 +1,114 @@
+[
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "64",
+               "EventName": "PRNG_FUNCTIONS",
+               "BriefDescription": "PRNG Functions",
+               "PublicDescription": "Total number of the PRNG functions issued by the CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "65",
+               "EventName": "PRNG_CYCLES",
+               "BriefDescription": "PRNG Cycles",
+               "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "66",
+               "EventName": "PRNG_BLOCKED_FUNCTIONS",
+               "BriefDescription": "PRNG Blocked Functions",
+               "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "67",
+               "EventName": "PRNG_BLOCKED_CYCLES",
+               "BriefDescription": "PRNG Blocked Cycles",
+               "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "68",
+               "EventName": "SHA_FUNCTIONS",
+               "BriefDescription": "SHA Functions",
+               "PublicDescription": "Total number of SHA functions issued by the CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "69",
+               "EventName": "SHA_CYCLES",
+               "BriefDescription": "SHA Cycles",
+               "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "70",
+               "EventName": "SHA_BLOCKED_FUNCTIONS",
+               "BriefDescription": "SHA Blocked Functions",
+               "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "71",
+               "EventName": "SHA_BLOCKED_CYCLES",
+               "BriefDescription": "SHA Bloced Cycles",
+               "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "72",
+               "EventName": "DEA_FUNCTIONS",
+               "BriefDescription": "DEA Functions",
+               "PublicDescription": "Total number of the DEA functions issued by the CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "73",
+               "EventName": "DEA_CYCLES",
+               "BriefDescription": "DEA Cycles",
+               "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "74",
+               "EventName": "DEA_BLOCKED_FUNCTIONS",
+               "BriefDescription": "DEA Blocked Functions",
+               "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "75",
+               "EventName": "DEA_BLOCKED_CYCLES",
+               "BriefDescription": "DEA Blocked Cycles",
+               "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "76",
+               "EventName": "AES_FUNCTIONS",
+               "BriefDescription": "AES Functions",
+               "PublicDescription": "Total number of AES functions issued by the CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "77",
+               "EventName": "AES_CYCLES",
+               "BriefDescription": "AES Cycles",
+               "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "78",
+               "EventName": "AES_BLOCKED_FUNCTIONS",
+               "BriefDescription": "AES Blocked Functions",
+               "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "79",
+               "EventName": "AES_BLOCKED_CYCLES",
+               "BriefDescription": "AES Blocked Cycles",
+               "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+       },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json
new file mode 100644 (file)
index 0000000..5e36bc2
--- /dev/null
@@ -0,0 +1,30 @@
+[
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "80",
+               "EventName": "ECC_FUNCTION_COUNT",
+               "BriefDescription": "ECC Function Count",
+               "PublicDescription": "Long ECC function Count"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "81",
+               "EventName": "ECC_CYCLES_COUNT",
+               "BriefDescription": "ECC Cycles Count",
+               "PublicDescription": "Long ECC Function cycles count"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "82",
+               "EventName": "ECC_BLOCKED_FUNCTION_COUNT",
+               "BriefDescription": "Ecc Blocked Function Count",
+               "PublicDescription": "Long ECC blocked function count"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "83",
+               "EventName": "ECC_BLOCKED_CYCLES_COUNT",
+               "BriefDescription": "ECC Blocked Cycles Count",
+               "PublicDescription": "Long ECC blocked cycles count"
+       },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
new file mode 100644 (file)
index 0000000..89e0707
--- /dev/null
@@ -0,0 +1,373 @@
+[
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "128",
+               "EventName": "L1D_RO_EXCL_WRITES",
+               "BriefDescription": "L1D Read-only Exclusive Writes",
+               "PublicDescription": "A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "129",
+               "EventName": "DTLB2_WRITES",
+               "BriefDescription": "DTLB2 Writes",
+               "PublicDescription": "A translation has been written into The Translation Lookaside Buffer 2 (TLB2) and the request was made by the data cache"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "130",
+               "EventName": "DTLB2_MISSES",
+               "BriefDescription": "DTLB2 Misses",
+               "PublicDescription": "A TLB2 miss is in progress for a request made by the data cache. Incremented by one for every TLB2 miss in progress for the Level-1 Data cache on this cycle"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "131",
+               "EventName": "DTLB2_HPAGE_WRITES",
+               "BriefDescription": "DTLB2 One-Megabyte Page Writes",
+               "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "132",
+               "EventName": "DTLB2_GPAGE_WRITES",
+               "BriefDescription": "DTLB2 Two-Gigabyte Page Writes",
+               "PublicDescription": "A translation entry for a two-gigabyte page was written into the Level-2 TLB"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "133",
+               "EventName": "L1D_L2D_SOURCED_WRITES",
+               "BriefDescription": "L1D L2D Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "134",
+               "EventName": "ITLB2_WRITES",
+               "BriefDescription": "ITLB2 Writes",
+               "PublicDescription": "A translation entry has been written into the Translation Lookaside Buffer 2 (TLB2) and the request was made by the instruction cache"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "135",
+               "EventName": "ITLB2_MISSES",
+               "BriefDescription": "ITLB2 Misses",
+               "PublicDescription": "A TLB2 miss is in progress for a request made by the instruction cache. Incremented by one for every TLB2 miss in progress for the Level-1 Instruction cache in a cycle"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "136",
+               "EventName": "L1I_L2I_SOURCED_WRITES",
+               "BriefDescription": "L1I L2I Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "137",
+               "EventName": "TLB2_PTE_WRITES",
+               "BriefDescription": "TLB2 PTE Writes",
+               "PublicDescription": "A translation entry was written into the Page Table Entry array in the Level-2 TLB"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "138",
+               "EventName": "TLB2_CRSTE_WRITES",
+               "BriefDescription": "TLB2 CRSTE Writes",
+               "PublicDescription": "Translation entries were written into the Combined Region and Segment Table Entry array and the Page Table Entry array in the Level-2 TLB"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "139",
+               "EventName": "TLB2_ENGINES_BUSY",
+               "BriefDescription": "TLB2 Engines Busy",
+               "PublicDescription": "The number of Level-2 TLB translation engines busy in a cycle"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "140",
+               "EventName": "TX_C_TEND",
+               "BriefDescription": "Completed TEND instructions in constrained TX mode",
+               "PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "141",
+               "EventName": "TX_NC_TEND",
+               "BriefDescription": "Completed TEND instructions in non-constrained TX mode",
+               "PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "143",
+               "EventName": "L1C_TLB2_MISSES",
+               "BriefDescription": "L1C TLB2 Misses",
+               "PublicDescription": "Increments by one for any cycle where a level-1 cache or level-2 TLB miss is in progress"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "144",
+               "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+               "BriefDescription": "L1D On-Chip L3 Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "145",
+               "EventName": "L1D_ONCHIP_MEMORY_SOURCED_WRITES",
+               "BriefDescription": "L1D On-Chip Memory Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "146",
+               "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
+               "BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "147",
+               "EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES",
+               "BriefDescription": "L1D On-Cluster L3 Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Cluster Level-3 cache withountervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "148",
+               "EventName": "L1D_ONCLUSTER_MEMORY_SOURCED_WRITES",
+               "BriefDescription": "L1D On-Cluster Memory Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster memory"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "149",
+               "EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES_IV",
+               "BriefDescription": "L1D On-Cluster L3 Sourced Writes with Intervention",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache with intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "150",
+               "EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES",
+               "BriefDescription": "L1D Off-Cluster L3 Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "151",
+               "EventName": "L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES",
+               "BriefDescription": "L1D Off-Cluster Memory Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Cluster memory"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "152",
+               "EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV",
+               "BriefDescription": "L1D Off-Cluster L3 Sourced Writes with Intervention",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "153",
+               "EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES",
+               "BriefDescription": "L1D Off-Drawer L3 Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "154",
+               "EventName": "L1D_OFFDRAWER_MEMORY_SOURCED_WRITES",
+               "BriefDescription": "L1D Off-Drawer Memory Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer memory"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "155",
+               "EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES_IV",
+               "BriefDescription": "L1D Off-Drawer L3 Sourced Writes with Intervention",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "156",
+               "EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES",
+               "BriefDescription": "L1D On-Drawer L4 Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "157",
+               "EventName": "L1D_OFFDRAWER_L4_SOURCED_WRITES",
+               "BriefDescription": "L1D Off-Drawer L4 Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "158",
+               "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_RO",
+               "BriefDescription": "L1D On-Chip L3 Sourced Writes read-only",
+               "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip L3 but a read-only invalidate was done to remove other copies of the cache line"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "162",
+               "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+               "BriefDescription": "L1I On-Chip L3 Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache without intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "163",
+               "EventName": "L1I_ONCHIP_MEMORY_SOURCED_WRITES",
+               "BriefDescription": "L1I On-Chip Memory Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from On-Chip memory"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "164",
+               "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
+               "BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache with intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "165",
+               "EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES",
+               "BriefDescription": "L1I On-Cluster L3 Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache without intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "166",
+               "EventName": "L1I_ONCLUSTER_MEMORY_SOURCED_WRITES",
+               "BriefDescription": "L1I On-Cluster Memory Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster memory"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "167",
+               "EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES_IV",
+               "BriefDescription": "L1I On-Cluster L3 Sourced Writes with Intervention",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Cluster Level-3 cache with intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "168",
+               "EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES",
+               "BriefDescription": "L1I Off-Cluster L3 Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "169",
+               "EventName": "L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES",
+               "BriefDescription": "L1I Off-Cluster Memory Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Cluster memory"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "170",
+               "EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV",
+               "BriefDescription": "L1I Off-Cluster L3 Sourced Writes with Intervention",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "171",
+               "EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES",
+               "BriefDescription": "L1I Off-Drawer L3 Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "172",
+               "EventName": "L1I_OFFDRAWER_MEMORY_SOURCED_WRITES",
+               "BriefDescription": "L1I Off-Drawer Memory Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer memory"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "173",
+               "EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES_IV",
+               "BriefDescription": "L1I Off-Drawer L3 Sourced Writes with Intervention",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "174",
+               "EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES",
+               "BriefDescription": "L1I On-Drawer L4 Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "175",
+               "EventName": "L1I_OFFDRAWER_L4_SOURCED_WRITES",
+               "BriefDescription": "L1I Off-Drawer L4 Sourced Writes",
+               "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "224",
+               "EventName": "BCD_DFP_EXECUTION_SLOTS",
+               "BriefDescription": "BCD DFP Execution Slots",
+               "PublicDescription": "Count of floating point execution slots used for finished Binary Coded Decimal to Decimal Floating Point conversions. Instructions: CDZT, CXZT, CZDT, CZXT"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "225",
+               "EventName": "VX_BCD_EXECUTION_SLOTS",
+               "BriefDescription": "VX BCD Execution Slots",
+               "PublicDescription": "Count of floating point execution slots used for finished vector arithmetic Binary Coded Decimal instructions. Instructions: VAP, VSP, VMPVMSP, VDP, VSDP, VRP, VLIP, VSRP, VPSOPVCP, VTP, VPKZ, VUPKZ, VCVB, VCVBG, VCVDVCVDG"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "226",
+               "EventName": "DECIMAL_INSTRUCTIONS",
+               "BriefDescription": "Decimal Instructions",
+               "PublicDescription": "Decimal instructions dispatched. Instructions: CVB, CVD, AP, CP, DP, ED, EDMK, MP, SRP, SP, ZAP"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "232",
+               "EventName": "LAST_HOST_TRANSLATIONS",
+               "BriefDescription": "Last host translation done",
+               "PublicDescription": "Last Host Translation done"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "243",
+               "EventName": "TX_NC_TABORT",
+               "BriefDescription": "Aborted transactions in non-constrained TX mode",
+               "PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "244",
+               "EventName": "TX_C_TABORT_NO_SPECIAL",
+               "BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
+               "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "245",
+               "EventName": "TX_C_TABORT_SPECIAL",
+               "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
+               "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "448",
+               "EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
+               "BriefDescription": "Cycle count with one thread active",
+               "PublicDescription": "Cycle count with one thread active"
+       },
+       {
+               "Unit": "CPU-M-CF",
+               "EventCode": "449",
+               "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE",
+               "BriefDescription": "Cycle count with two threads active",
+               "PublicDescription": "Cycle count with two threads active"
+       },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z15/transaction.json
new file mode 100644 (file)
index 0000000..1a0034f
--- /dev/null
@@ -0,0 +1,7 @@
+[
+  {
+    "BriefDescription": "Transaction count",
+    "MetricName": "transaction",
+    "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL"
+  }
+]
index bd3fc57..61641a3 100644 (file)
@@ -4,4 +4,4 @@ Family-model,Version,Filename,EventType
 ^IBM.282[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_zec12,core
 ^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core
 ^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core
-^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_m8561,core
+^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_z15,core
index fad4af9..6221a84 100644 (file)
     "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.",
     "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.",
     "UMask": "0x1"
+  },
+  {
+    "EventName": "l3_request_g1.caching_l3_cache_accesses",
+    "EventCode": "0x01",
+    "BriefDescription": "Caching: L3 cache accesses",
+    "UMask": "0x80",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "l3_lookup_state.all_l3_req_typs",
+    "EventCode": "0x04",
+    "BriefDescription": "All L3 Request Types",
+    "UMask": "0xff",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "l3_comb_clstr_state.other_l3_miss_typs",
+    "EventCode": "0x06",
+    "BriefDescription": "Other L3 Miss Request Types",
+    "UMask": "0xfe",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "l3_comb_clstr_state.request_miss",
+    "EventCode": "0x06",
+    "BriefDescription": "L3 cache misses",
+    "UMask": "0x01",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "xi_sys_fill_latency",
+    "EventCode": "0x90",
+    "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.",
+    "UMask": "0x00",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs",
+    "EventCode": "0x9a",
+    "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.",
+    "UMask": "0x3f",
+    "Unit": "L3PMC"
   }
 ]
index 7b285b0..1079544 100644 (file)
@@ -13,7 +13,7 @@
   {
     "EventName": "ex_ret_brn",
     "EventCode": "0xc2",
-    "BriefDescription": "[Retired Branch Instructions.",
+    "BriefDescription": "Retired Branch Instructions.",
     "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts."
   },
   {
index d413761..e283726 100644 (file)
@@ -239,6 +239,7 @@ static struct map {
        { "hisi_sccl,ddrc", "hisi_sccl,ddrc" },
        { "hisi_sccl,hha", "hisi_sccl,hha" },
        { "hisi_sccl,l3c", "hisi_sccl,l3c" },
+       { "L3PMC", "amd_l3" },
        {}
 };
 
@@ -449,12 +450,12 @@ static struct fixed {
        const char *name;
        const char *event;
 } fixed[] = {
-       { "inst_retired.any", "event=0xc0" },
-       { "inst_retired.any_p", "event=0xc0" },
-       { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" },
-       { "cpu_clk_unhalted.thread", "event=0x3c" },
-       { "cpu_clk_unhalted.core", "event=0x3c" },
-       { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" },
+       { "inst_retired.any", "event=0xc0,period=2000003" },
+       { "inst_retired.any_p", "event=0xc0,period=2000003" },
+       { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03,period=2000003" },
+       { "cpu_clk_unhalted.thread", "event=0x3c,period=2000003" },
+       { "cpu_clk_unhalted.core", "event=0x3c,period=2000003" },
+       { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1,period=2000003" },
        { NULL, NULL},
 };
 
index a637a4a..338cd9f 100644 (file)
@@ -10,6 +10,7 @@
 #include "tests.h"
 #include "debug.h"
 #include "parse-events.h"
+#include "util/mmap.h"
 #include <errno.h>
 #include <linux/string.h>
 
@@ -32,8 +33,8 @@ static int count_samples(struct evlist *evlist, int *sample_count,
 {
        int i;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
-               struct perf_mmap *map = &evlist->overwrite_mmap[i];
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
+               struct mmap *map = &evlist->overwrite_mmap[i];
                union perf_event *event;
 
                perf_mmap__read_init(map);
@@ -63,9 +64,9 @@ static int do_test(struct evlist *evlist, int mmap_pages,
        int err;
        char sbuf[STRERR_BUFSIZE];
 
-       err = perf_evlist__mmap(evlist, mmap_pages);
+       err = evlist__mmap(evlist, mmap_pages);
        if (err < 0) {
-               pr_debug("perf_evlist__mmap: %s\n",
+               pr_debug("evlist__mmap: %s\n",
                         str_error_r(errno, sbuf, sizeof(sbuf)));
                return TEST_FAIL;
        }
@@ -75,7 +76,7 @@ static int do_test(struct evlist *evlist, int mmap_pages,
        evlist__disable(evlist);
 
        err = count_samples(evlist, sample_count, comm_count);
-       perf_evlist__munmap(evlist);
+       evlist__munmap(evlist);
        return err;
 }
 
index db2aadf..96c1373 100644 (file)
@@ -2,8 +2,8 @@
 #include <linux/compiler.h>
 #include <linux/bitmap.h>
 #include <perf/cpumap.h>
+#include <internal/cpumap.h>
 #include "tests.h"
-#include "cpumap.h"
 #include "debug.h"
 
 #define NBITS 100
index fc102e4..1eb0bff 100644 (file)
@@ -19,6 +19,7 @@
 #include "llvm.h"
 #include "debug.h"
 #include "parse-events.h"
+#include "util/mmap.h"
 #define NR_ITERS       111
 #define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test"
 
@@ -167,9 +168,9 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
                goto out_delete_evlist;
        }
 
-       err = perf_evlist__mmap(evlist, opts.mmap_pages);
+       err = evlist__mmap(evlist, opts.mmap_pages);
        if (err < 0) {
-               pr_debug("perf_evlist__mmap: %s\n",
+               pr_debug("evlist__mmap: %s\n",
                         str_error_r(errno, sbuf, sizeof(sbuf)));
                goto out_delete_evlist;
        }
@@ -178,9 +179,9 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
        (*func)();
        evlist__disable(evlist);
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
                union perf_event *event;
-               struct perf_mmap *md;
+               struct mmap *md;
 
                md = &evlist->mmap[i];
                if (perf_mmap__read_init(md) < 0)
index f45fe11..2577d3e 100644 (file)
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "tests.h"
-#include "debug.h"
-#include "util.h"
 #include "c++/clang-c.h"
 #include <linux/kernel.h>
 
index c1c29e0..f5764a3 100644 (file)
 #include "evlist.h"
 #include "evsel.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "machine.h"
 #include "map.h"
 #include "symbol.h"
 #include "event.h"
 #include "record.h"
+#include "util/mmap.h"
+#include "util/synthetic-events.h"
 #include "thread.h"
 
 #include "tests.h"
@@ -419,10 +420,10 @@ static int process_events(struct machine *machine, struct evlist *evlist,
                          struct state *state)
 {
        union perf_event *event;
-       struct perf_mmap *md;
+       struct mmap *md;
        int i, ret;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
                md = &evlist->mmap[i];
                if (perf_mmap__read_init(md) < 0)
                        continue;
@@ -651,7 +652,7 @@ static int do_test_code_reading(bool try_kcore)
 
                perf_evlist__config(evlist, &opts, NULL);
 
-               evsel = perf_evlist__first(evlist);
+               evsel = evlist__first(evlist);
 
                evsel->core.attr.comm = 1;
                evsel->core.attr.disabled = 1;
@@ -685,9 +686,9 @@ static int do_test_code_reading(bool try_kcore)
                break;
        }
 
-       ret = perf_evlist__mmap(evlist, UINT_MAX);
+       ret = evlist__mmap(evlist, UINT_MAX);
        if (ret < 0) {
-               pr_debug("perf_evlist__mmap failed\n");
+               pr_debug("evlist__mmap failed\n");
                goto out_put;
        }
 
index 39493de..8a0d236 100644 (file)
@@ -3,6 +3,7 @@
 #include <stdio.h>
 #include "cpumap.h"
 #include "event.h"
+#include "util/synthetic-events.h"
 #include <string.h>
 #include <linux/bitops.h>
 #include <perf/cpumap.h>
index a4874d4..627c1aa 100644 (file)
@@ -10,7 +10,6 @@
 #include <sys/resource.h>
 #include <api/fs/fs.h>
 #include "dso.h"
-#include "util.h"
 #include "machine.h"
 #include "symbol.h"
 #include "tests.h"
index 4125255..4f4ecbc 100644 (file)
@@ -15,6 +15,7 @@
 #include "symbol.h"
 #include "thread.h"
 #include "callchain.h"
+#include "util/synthetic-events.h"
 
 #if defined (__x86_64__) || defined (__i386__) || defined (__powerpc__)
 #include "arch-tests.h"
index d824a72..1ee8704 100644 (file)
@@ -9,7 +9,6 @@
 #include "tests.h"
 #include "evlist.h"
 #include "evsel.h"
-#include "util.h"
 #include "debug.h"
 #include "parse-events.h"
 #include "thread_map.h"
@@ -17,7 +16,7 @@
 
 static int attach__enable_on_exec(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__last(evlist);
+       struct evsel *evsel = evlist__last(evlist);
        struct target target = {
                .uid = UINT_MAX,
        };
@@ -59,7 +58,7 @@ static int detach__enable_on_exec(struct evlist *evlist)
 
 static int attach__current_disabled(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__last(evlist);
+       struct evsel *evsel = evlist__last(evlist);
        struct perf_thread_map *threads;
        int err;
 
@@ -85,7 +84,7 @@ static int attach__current_disabled(struct evlist *evlist)
 
 static int attach__current_enabled(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__last(evlist);
+       struct evsel *evsel = evlist__last(evlist);
        struct perf_thread_map *threads;
        int err;
 
@@ -105,14 +104,14 @@ static int attach__current_enabled(struct evlist *evlist)
 
 static int detach__disable(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__last(evlist);
+       struct evsel *evsel = evlist__last(evlist);
 
        return evsel__enable(evsel);
 }
 
 static int attach__cpu_disabled(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__last(evlist);
+       struct evsel *evsel = evlist__last(evlist);
        struct perf_cpu_map *cpus;
        int err;
 
@@ -141,7 +140,7 @@ static int attach__cpu_disabled(struct evlist *evlist)
 
 static int attach__cpu_enabled(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__last(evlist);
+       struct evsel *evsel = evlist__last(evlist);
        struct perf_cpu_map *cpus;
        int err;
 
@@ -181,7 +180,7 @@ static int test_times(int (attach)(struct evlist *),
                goto out_err;
        }
 
-       evsel = perf_evlist__last(evlist);
+       evsel = evlist__last(evlist);
        evsel->core.attr.read_format |=
                PERF_FORMAT_TOTAL_TIME_ENABLED |
                PERF_FORMAT_TOTAL_TIME_RUNNING;
index cac4290..c727379 100644 (file)
@@ -2,10 +2,12 @@
 #include <linux/compiler.h>
 #include <perf/cpumap.h>
 #include <string.h>
+#include "cpumap.h"
 #include "evlist.h"
 #include "evsel.h"
 #include "header.h"
 #include "machine.h"
+#include "util/synthetic-events.h"
 #include "tool.h"
 #include "tests.h"
 #include "debug.h"
@@ -90,12 +92,12 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu
        evlist = perf_evlist__new_default();
        TEST_ASSERT_VAL("failed to get evlist", evlist);
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
 
-       TEST_ASSERT_VAL("failed to allos ids",
-                       !perf_evsel__alloc_id(evsel, 1, 1));
+       TEST_ASSERT_VAL("failed to allocate ids",
+                       !perf_evsel__alloc_id(&evsel->core, 1, 1));
 
-       perf_evlist__id_add(evlist, evsel, 0, 0, 123);
+       perf_evlist__id_add(&evlist->core, &evsel->core, 0, 0, 123);
 
        evsel->unit = strdup("KRAVA");
 
index 5330f10..956205b 100644 (file)
@@ -34,7 +34,7 @@ static int perf_evsel__roundtrip_cache_name_test(void)
        }
 
        idx = 0;
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
 
        for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
                for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
index de110d8..6f34d08 100644 (file)
@@ -2,6 +2,7 @@
 #include <inttypes.h>
 #include "util/debug.h"
 #include "util/dso.h"
+#include "util/event.h" // struct perf_sample
 #include "util/map.h"
 #include "util/symbol.h"
 #include "util/sort.h"
@@ -10,6 +11,7 @@
 #include "util/thread.h"
 #include "tests/hists_common.h"
 #include <linux/kernel.h>
+#include <linux/perf_event.h>
 
 static struct {
        u32 pid;
index fa55b7b..6367c8f 100644 (file)
@@ -721,7 +721,7 @@ int test__hists_cumulate(struct test *test __maybe_unused, int subtest __maybe_u
        if (verbose > 1)
                machine__fprintf(machine, stderr);
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
 
        for (i = 0; i < ARRAY_SIZE(testcases); i++) {
                err = testcases[i](evsel, machine);
index 8be4d0b..a024d3f 100644 (file)
@@ -8,6 +8,7 @@
 #include "machine.h"
 #include "parse-events.h"
 #include "hists_common.h"
+#include "util/mmap.h"
 #include <errno.h>
 #include <linux/kernel.h>
 
@@ -310,8 +311,8 @@ int test__hists_link(struct test *test __maybe_unused, int subtest __maybe_unuse
                        print_hists_in(hists);
        }
 
-       first = perf_evlist__first(evlist);
-       evsel = perf_evlist__last(evlist);
+       first = evlist__first(evlist);
+       evsel = evlist__last(evlist);
 
        first_hists = evsel__hists(first);
        hists = evsel__hists(evsel);
index 3f6dfa2..38f804f 100644 (file)
@@ -608,7 +608,7 @@ int test__hists_output(struct test *test __maybe_unused, int subtest __maybe_unu
        if (verbose > 1)
                machine__fprintf(machine, stderr);
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
 
        for (i = 0; i < ARRAY_SIZE(testcases); i++) {
                err = testcases[i](evsel, machine);
index 9f0762d..92c7d59 100644 (file)
@@ -12,8 +12,8 @@
 #include "evsel.h"
 #include "record.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "tests.h"
+#include "util/mmap.h"
 
 #define CHECK__(x) {                           \
        while ((x) < 0) {                       \
 static int find_comm(struct evlist *evlist, const char *comm)
 {
        union perf_event *event;
-       struct perf_mmap *md;
+       struct mmap *md;
        int i, found;
 
        found = 0;
-       for (i = 0; i < evlist->nr_mmaps; i++) {
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
                md = &evlist->mmap[i];
                if (perf_mmap__read_init(md) < 0)
                        continue;
@@ -93,7 +93,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un
 
        perf_evlist__config(evlist, &opts, NULL);
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
 
        evsel->core.attr.comm = 1;
        evsel->core.attr.disabled = 1;
@@ -105,7 +105,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un
                goto out_err;
        }
 
-       CHECK__(perf_evlist__mmap(evlist, UINT_MAX));
+       CHECK__(evlist__mmap(evlist, UINT_MAX));
 
        /*
         * First, test that a 'comm' event can be found when the event is
@@ -132,7 +132,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un
 
        evlist__enable(evlist);
 
-       evsel = perf_evlist__last(evlist);
+       evsel = evlist__last(evlist);
 
        CHECK__(evsel__disable(evsel));
 
@@ -143,7 +143,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un
 
        found = find_comm(evlist, comm);
        if (found != 1) {
-               pr_debug("Seconf time, failed to find tracking event.\n");
+               pr_debug("Second time, failed to find tracking event.\n");
                goto out_err;
        }
 
index 022e4c9..ae6cda8 100644 (file)
@@ -7,7 +7,6 @@
 #include "llvm.h"
 #include "tests.h"
 #include "debug.h"
-#include "util.h"
 
 #ifdef HAVE_LIBBPF_SUPPORT
 static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
index 70c4847..c850d16 100644 (file)
@@ -100,7 +100,7 @@ make_install_info   := install-info
 make_install_pdf    := install-pdf
 make_install_prefix       := install prefix=/tmp/krava
 make_install_prefix_slash := install prefix=/tmp/krava/
-make_static         := LDFLAGS=-static
+make_static         := LDFLAGS=-static NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX32=1 NO_JVMTI=1
 
 # all the NO_* variable combined
 make_minimal        := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
@@ -327,6 +327,10 @@ make_kernelsrc_tools:
        (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
        test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
+make_libperf:
+       @echo "- make -C lib";
+       make -C lib clean >$@ 2>&1; make -C lib >>$@ 2>&1 && rm $@
+
 FEATURES_DUMP_FILE := $(FULL_O)/BUILD_TEST_FEATURE_DUMP
 FEATURES_DUMP_FILE_STATIC := $(FULL_O)/BUILD_TEST_FEATURE_DUMP_STATIC
 
@@ -365,5 +369,5 @@ $(foreach t,$(run),$(if $(findstring make_static,$(t)),\
                        $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE))))
 endif
 
-.PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools
+.PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools make_libperf
 endif # ifndef MK
index 7672ade..a258bd5 100644 (file)
@@ -4,7 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
 #include <perf/cpumap.h>
-#include "cpumap.h"
+#include <internal/cpumap.h>
 #include "debug.h"
 #include "env.h"
 #include "mem2node.h"
index 85e1d73..3a22dce 100644 (file)
@@ -10,8 +10,8 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "tests.h"
+#include "util/mmap.h"
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
@@ -43,7 +43,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
                     expected_nr_events[nsyscalls], i, j;
        struct evsel *evsels[nsyscalls], *evsel;
        char sbuf[STRERR_BUFSIZE];
-       struct perf_mmap *md;
+       struct mmap *md;
 
        threads = thread_map__new(-1, getpid(), UINT_MAX);
        if (threads == NULL) {
@@ -53,7 +53,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 
        cpus = perf_cpu_map__new(NULL);
        if (cpus == NULL) {
-               pr_debug("cpu_map__new\n");
+               pr_debug("perf_cpu_map__new\n");
                goto out_free_threads;
        }
 
@@ -100,7 +100,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
                expected_nr_events[i] = 1 + rand() % 127;
        }
 
-       if (perf_evlist__mmap(evlist, 128) < 0) {
+       if (evlist__mmap(evlist, 128) < 0) {
                pr_debug("failed to mmap events: %d (%s)\n", errno,
                         str_error_r(errno, sbuf, sizeof(sbuf)));
                goto out_delete_evlist;
index 360d70d..8d9d4cb 100644 (file)
@@ -8,13 +8,15 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include "debug.h"
+#include "event.h"
 #include "tests.h"
 #include "machine.h"
 #include "thread_map.h"
 #include "map.h"
 #include "symbol.h"
+#include "util/synthetic-events.h"
 #include "thread.h"
-#include "util.h"
+#include <internal/lib.h> // page_size
 
 #define THREADS 4
 
index 9171f77..93c1765 100644 (file)
@@ -14,7 +14,8 @@
 #include "evsel.h"
 #include "tests.h"
 #include "thread_map.h"
-#include "cpumap.h"
+#include <perf/cpumap.h>
+#include <internal/cpumap.h>
 #include "debug.h"
 #include "stat.h"
 #include "util/counts.h"
@@ -37,7 +38,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int
 
        cpus = perf_cpu_map__new(NULL);
        if (cpus == NULL) {
-               pr_debug("cpu_map__new\n");
+               pr_debug("perf_cpu_map__new\n");
                goto out_thread_map_delete;
        }
 
index b71167b..2b5c468 100644 (file)
@@ -11,6 +11,7 @@
 #include "record.h"
 #include "tests.h"
 #include "debug.h"
+#include "util/mmap.h"
 #include <errno.h>
 
 #ifndef O_DIRECTORY
@@ -69,9 +70,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
                goto out_delete_evlist;
        }
 
-       err = perf_evlist__mmap(evlist, UINT_MAX);
+       err = evlist__mmap(evlist, UINT_MAX);
        if (err < 0) {
-               pr_debug("perf_evlist__mmap: %s\n",
+               pr_debug("evlist__mmap: %s\n",
                         str_error_r(errno, sbuf, sizeof(sbuf)));
                goto out_delete_evlist;
        }
@@ -86,9 +87,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
        while (1) {
                int before = nr_events;
 
-               for (i = 0; i < evlist->nr_mmaps; i++) {
+               for (i = 0; i < evlist->core.nr_mmaps; i++) {
                        union perf_event *event;
-                       struct perf_mmap *md;
+                       struct mmap *md;
 
                        md = &evlist->mmap[i];
                        if (perf_mmap__read_init(md) < 0)
@@ -126,7 +127,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
                }
 
                if (nr_events == before)
-                       perf_evlist__poll(evlist, 10);
+                       evlist__poll(evlist, 10);
 
                if (++nr_polls > 5) {
                        pr_debug("%s: no events!\n", __func__);
index 02ba696..25e0ed2 100644 (file)
@@ -6,7 +6,6 @@
 #include "tests.h"
 #include "debug.h"
 #include "pmu.h"
-#include "util.h"
 #include <dirent.h>
 #include <errno.h>
 #include <sys/types.h>
@@ -47,7 +46,7 @@ static bool kvm_s390_create_vm_valid(void)
 
 static int test__checkevent_tracepoint(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
@@ -78,7 +77,7 @@ static int test__checkevent_tracepoint_multi(struct evlist *evlist)
 
 static int test__checkevent_raw(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
@@ -88,7 +87,7 @@ static int test__checkevent_raw(struct evlist *evlist)
 
 static int test__checkevent_numeric(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type);
@@ -98,7 +97,7 @@ static int test__checkevent_numeric(struct evlist *evlist)
 
 static int test__checkevent_symbolic_name(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
@@ -109,7 +108,7 @@ static int test__checkevent_symbolic_name(struct evlist *evlist)
 
 static int test__checkevent_symbolic_name_config(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
@@ -130,7 +129,7 @@ static int test__checkevent_symbolic_name_config(struct evlist *evlist)
 
 static int test__checkevent_symbolic_alias(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
@@ -141,7 +140,7 @@ static int test__checkevent_symbolic_alias(struct evlist *evlist)
 
 static int test__checkevent_genhw(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type);
@@ -151,7 +150,7 @@ static int test__checkevent_genhw(struct evlist *evlist)
 
 static int test__checkevent_breakpoint(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
@@ -165,7 +164,7 @@ static int test__checkevent_breakpoint(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_x(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
@@ -178,7 +177,7 @@ static int test__checkevent_breakpoint_x(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_r(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type",
@@ -193,7 +192,7 @@ static int test__checkevent_breakpoint_r(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_w(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type",
@@ -208,7 +207,7 @@ static int test__checkevent_breakpoint_w(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_rw(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type",
@@ -223,7 +222,7 @@ static int test__checkevent_breakpoint_rw(struct evlist *evlist)
 
 static int test__checkevent_tracepoint_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
@@ -254,7 +253,7 @@ test__checkevent_tracepoint_multi_modifier(struct evlist *evlist)
 
 static int test__checkevent_raw_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
@@ -266,7 +265,7 @@ static int test__checkevent_raw_modifier(struct evlist *evlist)
 
 static int test__checkevent_numeric_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -278,7 +277,7 @@ static int test__checkevent_numeric_modifier(struct evlist *evlist)
 
 static int test__checkevent_symbolic_name_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -290,7 +289,7 @@ static int test__checkevent_symbolic_name_modifier(struct evlist *evlist)
 
 static int test__checkevent_exclude_host_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
        TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
@@ -300,7 +299,7 @@ static int test__checkevent_exclude_host_modifier(struct evlist *evlist)
 
 static int test__checkevent_exclude_guest_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
        TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
@@ -310,7 +309,7 @@ static int test__checkevent_exclude_guest_modifier(struct evlist *evlist)
 
 static int test__checkevent_symbolic_alias_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -322,7 +321,7 @@ static int test__checkevent_symbolic_alias_modifier(struct evlist *evlist)
 
 static int test__checkevent_genhw_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
@@ -334,7 +333,7 @@ static int test__checkevent_genhw_modifier(struct evlist *evlist)
 
 static int test__checkevent_exclude_idle_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude idle", evsel->core.attr.exclude_idle);
        TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
@@ -349,7 +348,7 @@ static int test__checkevent_exclude_idle_modifier(struct evlist *evlist)
 
 static int test__checkevent_exclude_idle_modifier_1(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude idle", evsel->core.attr.exclude_idle);
        TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
@@ -364,7 +363,7 @@ static int test__checkevent_exclude_idle_modifier_1(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
 
        TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
@@ -379,7 +378,7 @@ static int test__checkevent_breakpoint_modifier(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
@@ -393,7 +392,7 @@ static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -407,7 +406,7 @@ static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -421,7 +420,7 @@ static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
@@ -436,7 +435,7 @@ static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist)
 static int test__checkevent_pmu(struct evlist *evlist)
 {
 
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
@@ -454,7 +453,7 @@ static int test__checkevent_pmu(struct evlist *evlist)
 
 static int test__checkevent_list(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries);
 
@@ -493,7 +492,7 @@ static int test__checkevent_list(struct evlist *evlist)
 
 static int test__checkevent_pmu_name(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        /* cpu/config=1,name=krava/u */
        TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
@@ -514,7 +513,7 @@ static int test__checkevent_pmu_name(struct evlist *evlist)
 
 static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        /* cpu/config=1,call-graph=fp,time,period=100000/ */
        TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
@@ -547,7 +546,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
 
 static int test__checkevent_pmu_events(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
@@ -565,7 +564,7 @@ static int test__checkevent_pmu_events(struct evlist *evlist)
 
 static int test__checkevent_pmu_events_mix(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        /* pmu-event:u */
        TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
@@ -643,7 +642,7 @@ static int test__group1(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
        /* instructions:k */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -685,7 +684,7 @@ static int test__group2(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
        /* faults + :ku modifier */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_SW_PAGE_FAULTS == evsel->core.attr.config);
@@ -740,7 +739,7 @@ static int test__group3(struct evlist *evlist __maybe_unused)
        TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
 
        /* group1 syscalls:sys_enter_openat:H */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong sample_type",
                PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type);
@@ -832,7 +831,7 @@ static int test__group4(struct evlist *evlist __maybe_unused)
        TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
        /* cycles:u + p */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -876,7 +875,7 @@ static int test__group5(struct evlist *evlist __maybe_unused)
        TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
 
        /* cycles + G */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -962,7 +961,7 @@ static int test__group_gh1(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
        /* cycles + :H group modifier */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -1002,7 +1001,7 @@ static int test__group_gh2(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
        /* cycles + :G group modifier */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -1042,7 +1041,7 @@ static int test__group_gh3(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
        /* cycles:G + :u group modifier */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -1082,7 +1081,7 @@ static int test__group_gh4(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
        /* cycles:G + :uG group modifier */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -1121,7 +1120,7 @@ static int test__leader_sample1(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries);
 
        /* cycles - sampling group leader */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -1174,7 +1173,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused)
        TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
 
        /* instructions - sampling group leader */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -1208,7 +1207,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused)
 
 static int test__checkevent_pinned_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -1226,7 +1225,7 @@ static int test__pinned_group(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries);
 
        /* cycles - group leader */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -1252,7 +1251,7 @@ static int test__pinned_group(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_len(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
@@ -1267,7 +1266,7 @@ static int test__checkevent_breakpoint_len(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_len_w(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
@@ -1283,7 +1282,7 @@ static int test__checkevent_breakpoint_len_w(struct evlist *evlist)
 static int
 test__checkevent_breakpoint_len_rw_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -1295,7 +1294,7 @@ test__checkevent_breakpoint_len_rw_modifier(struct evlist *evlist)
 
 static int test__checkevent_precise_max_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
@@ -1306,7 +1305,7 @@ static int test__checkevent_precise_max_modifier(struct evlist *evlist)
 
 static int test__checkevent_config_symbol(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "insn") == 0);
        return 0;
@@ -1314,7 +1313,7 @@ static int test__checkevent_config_symbol(struct evlist *evlist)
 
 static int test__checkevent_config_raw(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "rawpmu") == 0);
        return 0;
@@ -1322,7 +1321,7 @@ static int test__checkevent_config_raw(struct evlist *evlist)
 
 static int test__checkevent_config_num(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "numpmu") == 0);
        return 0;
@@ -1330,7 +1329,7 @@ static int test__checkevent_config_num(struct evlist *evlist)
 
 static int test__checkevent_config_cache(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "cachepmu") == 0);
        return 0;
@@ -1343,7 +1342,7 @@ static bool test__intel_pt_valid(void)
 
 static int test__intel_pt(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "intel_pt//u") == 0);
        return 0;
@@ -1351,7 +1350,7 @@ static int test__intel_pt(struct evlist *evlist)
 
 static int test__checkevent_complex_name(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong complex name parsing", strcmp(evsel->name, "COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks") == 0);
        return 0;
@@ -1359,7 +1358,7 @@ static int test__checkevent_complex_name(struct evlist *evlist)
 
 static int test__sym_event_slash(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE);
        TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES);
@@ -1369,7 +1368,7 @@ static int test__sym_event_slash(struct evlist *evlist)
 
 static int test__sym_event_dc(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE);
        TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES);
index 8284752..adf3c9c 100644 (file)
@@ -1,4 +1,3 @@
-// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <stddef.h>
@@ -8,7 +7,6 @@
 #include "event.h"
 #include "evlist.h"
 #include "header.h"
-#include "util.h"
 #include "debug.h"
 
 static int process_event(struct evlist **pevlist, union perf_event *event)
index a693bcf..dd865e0 100644 (file)
@@ -4,7 +4,6 @@
 
 #include "tests.h"
 #include "debug.h"
-#include "util.h"
 #include "perf-hooks.h"
 
 static void sigsegv_handler(int sig __maybe_unused)
@@ -20,12 +19,11 @@ static void sigsegv_handler(int sig __maybe_unused)
 static void the_hook(void *_hook_flags)
 {
        int *hook_flags = _hook_flags;
-       int *p = NULL;
 
        *hook_flags = 1234;
 
        /* Generate a segfault, test perf_hooks__recover */
-       *p = 0;
+       raise(SIGSEGV);
 }
 
 int test__perf_hooks(struct test *test __maybe_unused, int subtest __maybe_unused)
index e1b4229..437426b 100644 (file)
@@ -11,6 +11,7 @@
 #include "debug.h"
 #include "record.h"
 #include "tests.h"
+#include "util/mmap.h"
 
 static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
 {
@@ -103,7 +104,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
        /*
         * Config the evsels, setting attr->comm on the first one, etc.
         */
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
        perf_evsel__set_sample_bit(evsel, CPU);
        perf_evsel__set_sample_bit(evsel, TID);
        perf_evsel__set_sample_bit(evsel, TIME);
@@ -143,9 +144,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
         * fds in the same CPU to be injected in the same mmap ring buffer
         * (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)).
         */
-       err = perf_evlist__mmap(evlist, opts.mmap_pages);
+       err = evlist__mmap(evlist, opts.mmap_pages);
        if (err < 0) {
-               pr_debug("perf_evlist__mmap: %s\n",
+               pr_debug("evlist__mmap: %s\n",
                         str_error_r(errno, sbuf, sizeof(sbuf)));
                goto out_delete_evlist;
        }
@@ -164,9 +165,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
        while (1) {
                int before = total_events;
 
-               for (i = 0; i < evlist->nr_mmaps; i++) {
+               for (i = 0; i < evlist->core.nr_mmaps; i++) {
                        union perf_event *event;
-                       struct perf_mmap *md;
+                       struct mmap *md;
 
                        md = &evlist->mmap[i];
                        if (perf_mmap__read_init(md) < 0)
@@ -286,7 +287,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
                 * perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does.
                 */
                if (total_events == before && false)
-                       perf_evlist__poll(evlist, -1);
+                       evlist__poll(evlist, -1);
 
                sleep(1);
                if (++wakeups > 5) {
index 14a7889..74379ff 100644 (file)
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "parse-events.h"
 #include "pmu.h"
-#include "util.h"
 #include "tests.h"
 #include <errno.h>
 #include <stdio.h>
index 5fcc068..3a02426 100644 (file)
@@ -9,10 +9,10 @@
 
 #include "map_symbol.h"
 #include "branch.h"
-#include "util.h"
 #include "event.h"
 #include "evsel.h"
 #include "debug.h"
+#include "util/synthetic-events.h"
 
 #include "tests.h"
 
index cf1bd57..60f0e9e 100644 (file)
@@ -3,6 +3,7 @@
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <sys/epoll.h>
 #include <util/symbol.h>
 #include <linux/filter.h>
index cc10b41..c191150 100644 (file)
@@ -5,6 +5,7 @@
 #include "stat.h"
 #include "counts.h"
 #include "debug.h"
+#include "util/synthetic-events.h"
 
 static bool has_term(struct perf_record_stat_config *config,
                     u64 tag, u64 val)
index 97694a0..84519df 100644 (file)
@@ -12,6 +12,7 @@
 #include "util/evsel.h"
 #include "util/evlist.h"
 #include "util/cpumap.h"
+#include "util/mmap.h"
 #include "util/thread_map.h"
 #include <perf/evlist.h>
 
@@ -42,7 +43,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
        };
        struct perf_cpu_map *cpus;
        struct perf_thread_map *threads;
-       struct perf_mmap *md;
+       struct mmap *md;
 
        attr.sample_freq = 500;
 
@@ -82,7 +83,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
                goto out_delete_evlist;
        }
 
-       err = perf_evlist__mmap(evlist, 128);
+       err = evlist__mmap(evlist, 128);
        if (err < 0) {
                pr_debug("failed to mmap event: %d (%s)\n", errno,
                         str_error_r(errno, sbuf, sizeof(sbuf)));
index 1a60fa1..ffa592e 100644 (file)
@@ -14,9 +14,9 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "record.h"
 #include "tests.h"
+#include "util/mmap.h"
 
 static int spin_sleep(void)
 {
@@ -144,7 +144,7 @@ static int process_sample_event(struct evlist *evlist,
                        return err;
                /*
                 * Check for no missing sched_switch events i.e. that the
-                * evsel->system_wide flag has worked.
+                * evsel->core.system_wide flag has worked.
                 */
                if (switch_tracking->tids[cpu] != -1 &&
                    switch_tracking->tids[cpu] != prev_tid) {
@@ -264,10 +264,10 @@ static int process_events(struct evlist *evlist,
        unsigned pos, cnt = 0;
        LIST_HEAD(events);
        struct event_node *events_array, *node;
-       struct perf_mmap *md;
+       struct mmap *md;
        int i, ret;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
                md = &evlist->mmap[i];
                if (perf_mmap__read_init(md) < 0)
                        continue;
@@ -316,7 +316,7 @@ out_free_nodes:
  *
  * This function implements a test that checks that sched_switch events and
  * tracking events can be recorded for a workload (current process) using the
- * evsel->system_wide and evsel->tracking flags (respectively) with other events
+ * evsel->core.system_wide and evsel->tracking flags (respectively) with other events
  * sometimes enabled or disabled.
  */
 int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_unused)
@@ -367,7 +367,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
                goto out_err;
        }
 
-       cpu_clocks_evsel = perf_evlist__last(evlist);
+       cpu_clocks_evsel = evlist__last(evlist);
 
        /* Second event */
        err = parse_events(evlist, "cycles:u", NULL);
@@ -376,7 +376,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
                goto out_err;
        }
 
-       cycles_evsel = perf_evlist__last(evlist);
+       cycles_evsel = evlist__last(evlist);
 
        /* Third event */
        if (!perf_evlist__can_select_event(evlist, sched_switch)) {
@@ -391,22 +391,22 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
                goto out_err;
        }
 
-       switch_evsel = perf_evlist__last(evlist);
+       switch_evsel = evlist__last(evlist);
 
        perf_evsel__set_sample_bit(switch_evsel, CPU);
        perf_evsel__set_sample_bit(switch_evsel, TIME);
 
-       switch_evsel->system_wide = true;
+       switch_evsel->core.system_wide = true;
        switch_evsel->no_aux_samples = true;
        switch_evsel->immediate = true;
 
        /* Test moving an event to the front */
-       if (cycles_evsel == perf_evlist__first(evlist)) {
+       if (cycles_evsel == evlist__first(evlist)) {
                pr_debug("cycles event already at front");
                goto out_err;
        }
        perf_evlist__to_front(evlist, cycles_evsel);
-       if (cycles_evsel != perf_evlist__first(evlist)) {
+       if (cycles_evsel != evlist__first(evlist)) {
                pr_debug("Failed to move cycles event to front");
                goto out_err;
        }
@@ -421,7 +421,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
                goto out_err;
        }
 
-       tracking_evsel = perf_evlist__last(evlist);
+       tracking_evsel = evlist__last(evlist);
 
        perf_evlist__set_tracking_event(evlist, tracking_evsel);
 
@@ -434,7 +434,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
        perf_evlist__config(evlist, &opts, NULL);
 
        /* Check moved event is still at the front */
-       if (cycles_evsel != perf_evlist__first(evlist)) {
+       if (cycles_evsel != evlist__first(evlist)) {
                pr_debug("Front event no longer at front");
                goto out_err;
        }
@@ -461,9 +461,9 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
                goto out;
        }
 
-       err = perf_evlist__mmap(evlist, UINT_MAX);
+       err = evlist__mmap(evlist, UINT_MAX);
        if (err) {
-               pr_debug("perf_evlist__mmap failed!\n");
+               pr_debug("evlist__mmap failed!\n");
                goto out_err;
        }
 
index f610e8c..bce3a4c 100644 (file)
@@ -4,12 +4,13 @@
 #include "evsel.h"
 #include "target.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "tests.h"
+#include "util/mmap.h"
 
 #include <errno.h>
 #include <signal.h>
 #include <linux/string.h>
+#include <perf/cpumap.h>
 #include <perf/evlist.h>
 
 static int exited;
@@ -51,7 +52,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
        char sbuf[STRERR_BUFSIZE];
        struct perf_cpu_map *cpus;
        struct perf_thread_map *threads;
-       struct perf_mmap *md;
+       struct mmap *md;
 
        signal(SIGCHLD, sig_handler);
 
@@ -87,7 +88,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
                goto out_delete_evlist;
        }
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
        evsel->core.attr.task = 1;
 #ifdef __s390x__
        evsel->core.attr.sample_freq = 1000000;
@@ -106,7 +107,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
                goto out_delete_evlist;
        }
 
-       if (perf_evlist__mmap(evlist, 128) < 0) {
+       if (evlist__mmap(evlist, 128) < 0) {
                pr_debug("failed to mmap events: %d (%s)\n", errno,
                         str_error_r(errno, sbuf, sizeof(sbuf)));
                goto out_delete_evlist;
@@ -129,7 +130,7 @@ retry:
 
 out_init:
        if (!exited || !nr_exit) {
-               perf_evlist__poll(evlist, -1);
+               evlist__poll(evlist, -1);
                goto retry;
        }
 
index 39168c5..28f51c4 100644 (file)
@@ -8,6 +8,7 @@
 #include "thread_map.h"
 #include "debug.h"
 #include "event.h"
+#include "util/synthetic-events.h"
 #include <linux/zalloc.h>
 #include <perf/event.h>
 
index a4f9f51..4a80049 100644 (file)
@@ -3,11 +3,12 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <perf/cpumap.h>
+#include "cpumap.h"
 #include "tests.h"
-#include "util.h"
 #include "session.h"
 #include "evlist.h"
 #include "debug.h"
+#include <linux/err.h>
 
 #define TEMPL "/tmp/perf-test-XXXXXX"
 #define DATA_SIZE      10
@@ -39,7 +40,7 @@ static int session_write_header(char *path)
        };
 
        session = perf_session__new(&data, false, NULL);
-       TEST_ASSERT_VAL("can't get session", session);
+       TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
 
        session->evlist = perf_evlist__new_default();
        TEST_ASSERT_VAL("can't get evlist", session->evlist);
@@ -70,7 +71,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
        int i;
 
        session = perf_session__new(&data, false, NULL);
-       TEST_ASSERT_VAL("can't get session", session);
+       TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
 
        /* On platforms with large numbers of CPUs process_cpu_topology()
         * might issue an error while reading the perf.data file section
index 01f434c..aa296ff 100644 (file)
@@ -7,7 +7,7 @@
 #include "dso.h"
 #include "map.h"
 #include "symbol.h"
-#include "util.h"
+#include <internal/lib.h> // page_size
 #include "tests.h"
 #include "debug.h"
 #include "machine.h"
index f93d40b..781afe4 100644 (file)
@@ -1,5 +1,4 @@
 // SPDX-License-Identifier: GPL-2.0
-#include "../util/util.h"
 #include "../util/string2.h"
 #include "../util/config.h"
 #include "libslang.h"
index ac74ed2..82207db 100644 (file)
@@ -2,7 +2,6 @@
 #include "../browser.h"
 #include "../helpline.h"
 #include "../ui.h"
-#include "../util.h"
 #include "../../util/annotate.h"
 #include "../../util/debug.h"
 #include "../../util/dso.h"
index 0f59a70..57e6e43 100644 (file)
@@ -1,5 +1,4 @@
 // SPDX-License-Identifier: GPL-2.0
-#include "util/debug.h"
 #include "ui/browser.h"
 #include "ui/keysyms.h"
 #include "ui/ui.h"
index 589168c..7a7187e 100644 (file)
@@ -3319,13 +3319,13 @@ browse_hists:
                        switch (key) {
                        case K_TAB:
                                if (pos->core.node.next == &evlist->core.entries)
-                                       pos = perf_evlist__first(evlist);
+                                       pos = evlist__first(evlist);
                                else
                                        pos = perf_evsel__next(pos);
                                goto browse_hists;
                        case K_UNTAB:
                                if (pos->core.node.prev == &evlist->core.entries)
-                                       pos = perf_evlist__last(evlist);
+                                       pos = evlist__last(evlist);
                                else
                                        pos = perf_evsel__prev(pos);
                                goto browse_hists;
@@ -3417,7 +3417,7 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help,
 
 single_entry:
        if (nr_entries == 1) {
-               struct evsel *first = perf_evlist__first(evlist);
+               struct evsel *first = evlist__first(evlist);
 
                return perf_evsel__hists_browse(first, nr_entries, help,
                                                false, hbt, min_pcnt,
index 893b065..3d49b91 100644 (file)
@@ -5,7 +5,6 @@
 #include <stdlib.h>
 #include <string.h>
 #include <linux/bitops.h>
-#include "../../util/util.h"
 #include "../../util/debug.h"
 #include "../../util/map.h"
 #include "../../util/dso.h"
index f16a38f..76d356a 100644 (file)
@@ -7,7 +7,7 @@
 #include "config.h"
 #include "time-utils.h"
 #include "../util.h"
-#include "../../util/util.h"
+#include "../../util/util.h" // perf_exe()
 #include "../../perf.h"
 #include <stdlib.h>
 #include <string.h>
index 586a21a..fc733a6 100644 (file)
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "../../builtin.h"
 #include "../../perf.h"
-#include "../../util/util.h"
+#include "../../util/util.h" // perf_exe()
+#include "../util.h"
 #include "../../util/hist.h"
 #include "../../util/debug.h"
 #include "../../util/symbol.h"
index e166da9..e40a006 100644 (file)
@@ -6,7 +6,6 @@
 #include "gtk.h"
 #include "../ui.h"
 #include "../helpline.h"
-#include "../../util/debug.h"
 
 static void gtk_helpline_pop(void)
 {
index 6c2efc1..ed1a97b 100644 (file)
@@ -8,6 +8,7 @@
 #include "../string2.h"
 #include "gtk.h"
 #include <signal.h>
+#include <stdlib.h>
 #include <linux/string.h>
 
 #define MAX_COLUMNS                    32
index b6ad885..eea6fcd 100644 (file)
@@ -3,7 +3,6 @@
 
 #include "gtk.h"
 #include "../progress.h"
-#include "util.h"
 
 static GtkWidget *dialog;
 static GtkWidget *progress;
index 1a2616b..f5eee4d 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "gtk.h"
-#include "../../util/debug.h"
+#include <linux/compiler.h>
+#include "../util.h"
 
 extern struct perf_error_ops perf_gtk_eops;
 
index c2c5589..c47f5c3 100644 (file)
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "../util.h"
-#include "../../util/debug.h"
 #include "gtk.h"
 
 #include <stdlib.h>
index 54bcd08..911182b 100644 (file)
@@ -3,10 +3,8 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include "../util/debug.h"
 #include "helpline.h"
 #include "ui.h"
-#include "../util/util.h"
 
 char ui_helpline__current[512];
 
index 3e533de..f736755 100644 (file)
@@ -8,7 +8,6 @@
 #include "../util/callchain.h"
 #include "../util/debug.h"
 #include "../util/hist.h"
-#include "../util/util.h"
 #include "../util/sort.h"
 #include "../util/evsel.h"
 #include "../util/evlist.h"
index c7a86b4..700335c 100644 (file)
@@ -1,11 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <pthread.h>
 #include <dlfcn.h>
+#include <unistd.h>
 
 #include <subcmd/pager.h>
 #include "../util/debug.h"
 #include "../util/hist.h"
-#include "../util/util.h"
 #include "ui.h"
 
 pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
index 832ca6c..5365606 100644 (file)
@@ -5,6 +5,7 @@
 
 #include "../../util/callchain.h"
 #include "../../util/debug.h"
+#include "../../util/event.h"
 #include "../../util/hist.h"
 #include "../../util/map.h"
 #include "../../util/map_groups.h"
index 5f188f6..298d6af 100644 (file)
@@ -6,7 +6,6 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 
-#include "../../util/debug.h"
 #include "../helpline.h"
 #include "../ui.h"
 #include "../libslang.h"
index 56651a4..e9bfe85 100644 (file)
@@ -2,13 +2,13 @@
 #include <signal.h>
 #include <stdbool.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <linux/kernel.h>
 #ifdef HAVE_BACKTRACE_SUPPORT
 #include <execinfo.h>
 #endif
 
 #include "../../util/debug.h"
-#include "../../util/util.h"
 #include "../../perf.h"
 #include "../browser.h"
 #include "../helpline.h"
index 087d9ab..b98dd0e 100644 (file)
@@ -5,7 +5,6 @@
 #include <stdlib.h>
 #include <sys/ttydefaults.h>
 
-#include "../../util/debug.h"
 #include "../browser.h"
 #include "../keysyms.h"
 #include "../helpline.h"
index 0b4d8e0..8dcfca1 100644 (file)
@@ -3,6 +3,7 @@ perf-y += block-range.o
 perf-y += build-id.o
 perf-y += cacheline.o
 perf-y += config.o
+perf-y += copyfile.o
 perf-y += ctype.o
 perf-y += db-export.o
 perf-y += env.o
@@ -10,6 +11,7 @@ perf-y += event.o
 perf-y += evlist.o
 perf-y += evsel.o
 perf-y += evsel_fprintf.o
+perf-y += perf_event_attr_fprintf.o
 perf-y += evswitch.o
 perf-y += find_bit.o
 perf-y += get_current_dir_name.o
@@ -86,6 +88,7 @@ perf-y += stat-display.o
 perf-y += record.o
 perf-y += srcline.o
 perf-y += srccode.o
+perf-y += synthetic-events.o
 perf-y += data.o
 perf-y += tsc.o
 perf-y += cloexec.o
index 1748f52..e42bf57 100644 (file)
@@ -14,7 +14,7 @@
 #include <bpf/btf.h>
 #include <bpf/libbpf.h>
 #include <linux/btf.h>
-#include "util.h"
+#include "util.h" // hex_width()
 #include "ui/ui.h"
 #include "sort.h"
 #include "build-id.h"
@@ -34,6 +34,7 @@
 #include "bpf-event.h"
 #include "block-range.h"
 #include "string2.h"
+#include "util/event.h"
 #include "arch/common.h"
 #include <regex.h>
 #include <pthread.h>
@@ -1630,6 +1631,19 @@ int symbol__strerror_disassemble(struct symbol *sym __maybe_unused, struct map *
        case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF:
                scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation");
                break;
+       case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP:
+               scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions.");
+               break;
+       case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING:
+               scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization.");
+               break;
+       case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE:
+               scnprintf(buf, buflen, "Invalid BPF file: %s.", dso->long_name);
+               break;
+       case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF:
+               scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.",
+                         dso->long_name);
+               break;
        default:
                scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum);
                break;
@@ -1661,7 +1675,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
 
        build_id_path = strdup(filename);
        if (!build_id_path)
-               return -1;
+               return ENOMEM;
 
        /*
         * old style build-id cache has name of XX/XXXXXXX.. while
@@ -1712,13 +1726,13 @@ static int symbol__disassemble_bpf(struct symbol *sym,
        char tpath[PATH_MAX];
        size_t buf_size;
        int nr_skip = 0;
-       int ret = -1;
        char *buf;
        bfd *bfdf;
+       int ret;
        FILE *s;
 
        if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO)
-               return -1;
+               return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE;
 
        pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
                  sym->name, sym->start, sym->end - sym->start);
@@ -1731,8 +1745,10 @@ static int symbol__disassemble_bpf(struct symbol *sym,
        assert(bfd_check_format(bfdf, bfd_object));
 
        s = open_memstream(&buf, &buf_size);
-       if (!s)
+       if (!s) {
+               ret = errno;
                goto out;
+       }
        init_disassemble_info(&info, s,
                              (fprintf_ftype) fprintf);
 
@@ -1741,8 +1757,10 @@ static int symbol__disassemble_bpf(struct symbol *sym,
 
        info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env,
                                                 dso->bpf_prog.id);
-       if (!info_node)
+       if (!info_node) {
+               ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF;
                goto out;
+       }
        info_linear = info_node->info_linear;
        sub_id = dso->bpf_prog.sub_id;
 
@@ -2070,11 +2088,11 @@ int symbol__annotate(struct symbol *sym, struct map *map,
        int err;
 
        if (!arch_name)
-               return -1;
+               return errno;
 
        args.arch = arch = arch__find(arch_name);
        if (arch == NULL)
-               return -ENOTSUP;
+               return ENOTSUP;
 
        if (parch)
                *parch = arch;
@@ -2970,7 +2988,7 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct evsel *evsel,
 
        notes->offsets = zalloc(size * sizeof(struct annotation_line *));
        if (notes->offsets == NULL)
-               return -1;
+               return ENOMEM;
 
        if (perf_evsel__is_group_event(evsel))
                nr_pcnt = evsel->core.nr_members;
@@ -2996,7 +3014,7 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct evsel *evsel,
 
 out_free_offsets:
        zfree(&notes->offsets);
-       return -1;
+       return err;
 }
 
 #define ANNOTATION__CFG(n) \
index d94be91..d76fd0e 100644 (file)
@@ -370,6 +370,10 @@ enum symbol_disassemble_errno {
 
        SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX       = __SYMBOL_ANNOTATE_ERRNO__START,
        SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF,
+       SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING,
+       SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP,
+       SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE,
+       SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF,
 
        __SYMBOL_ANNOTATE_ERRNO__END,
 };
index 8a7340f..53be12b 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/log2.h>
 #include <linux/zalloc.h>
 
-#include "cpumap.h"
 #include "color.h"
 #include "evsel.h"
 #include "machine.h"
index 6f25224..8470dfe 100644 (file)
@@ -31,8 +31,8 @@
 #include "map.h"
 #include "pmu.h"
 #include "evsel.h"
-#include "cpumap.h"
 #include "symbol.h"
+#include "util/synthetic-events.h"
 #include "thread_map.h"
 #include "asm/bug.h"
 #include "auxtrace.h"
 #include "intel-bts.h"
 #include "arm-spe.h"
 #include "s390-cpumsf.h"
-#include "util.h"
+#include "util/mmap.h"
 
 #include <linux/ctype.h>
+#include <linux/kernel.h>
 #include "symbol/kallsyms.h"
+#include <internal/lib.h>
 
 static bool auxtrace__dont_decode(struct perf_session *session)
 {
@@ -1226,7 +1228,7 @@ int perf_event__process_auxtrace_error(struct perf_session *session,
        return 0;
 }
 
-static int __auxtrace_mmap__read(struct perf_mmap *map,
+static int __auxtrace_mmap__read(struct mmap *map,
                                 struct auxtrace_record *itr,
                                 struct perf_tool *tool, process_auxtrace_t fn,
                                 bool snapshot, size_t snapshot_size)
@@ -1337,13 +1339,13 @@ static int __auxtrace_mmap__read(struct perf_mmap *map,
        return 1;
 }
 
-int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr,
+int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr,
                        struct perf_tool *tool, process_auxtrace_t fn)
 {
        return __auxtrace_mmap__read(map, itr, tool, fn, false, 0);
 }
 
-int auxtrace_mmap__read_snapshot(struct perf_mmap *map,
+int auxtrace_mmap__read_snapshot(struct mmap *map,
                                 struct auxtrace_record *itr,
                                 struct perf_tool *tool, process_auxtrace_t fn,
                                 size_t snapshot_size)
index 37e70dc..f201f36 100644 (file)
 #include <errno.h>
 #include <stdbool.h>
 #include <stddef.h>
+#include <stdio.h> // FILE
 #include <linux/list.h>
 #include <linux/perf_event.h>
 #include <linux/types.h>
 #include <asm/bitsperlong.h>
 #include <asm/barrier.h>
 
-#include "event.h"
-
 union perf_event;
 struct perf_session;
 struct evlist;
 struct perf_tool;
-struct perf_mmap;
+struct mmap;
+struct perf_sample;
 struct option;
 struct record_opts;
+struct perf_record_auxtrace_error;
 struct perf_record_auxtrace_info;
 struct events_stats;
 
@@ -444,14 +445,14 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
                                   bool per_cpu);
 
 typedef int (*process_auxtrace_t)(struct perf_tool *tool,
-                                 struct perf_mmap *map,
+                                 struct mmap *map,
                                  union perf_event *event, void *data1,
                                  size_t len1, void *data2, size_t len2);
 
-int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr,
+int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr,
                        struct perf_tool *tool, process_auxtrace_t fn);
 
-int auxtrace_mmap__read_snapshot(struct perf_mmap *map,
+int auxtrace_mmap__read_snapshot(struct mmap *map,
                                 struct auxtrace_record *itr,
                                 struct perf_tool *tool, process_auxtrace_t fn,
                                 size_t snapshot_size);
@@ -524,10 +525,6 @@ void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int
                          int code, int cpu, pid_t pid, pid_t tid, u64 ip,
                          const char *msg, u64 timestamp);
 
-int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
-                                        struct perf_tool *tool,
-                                        struct perf_session *session,
-                                        perf_event__handler_t process);
 int perf_event__process_auxtrace_info(struct perf_session *session,
                                      union perf_event *event);
 s64 perf_event__process_auxtrace(struct perf_session *session,
@@ -604,15 +601,6 @@ void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused)
 {
 }
 
-static inline int
-perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
-                                    struct perf_tool *tool __maybe_unused,
-                                    struct perf_session *session __maybe_unused,
-                                    perf_event__handler_t process __maybe_unused)
-{
-       return -EINVAL;
-}
-
 static inline
 int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused,
                             struct evlist *evlist __maybe_unused,
index 7a3d4b1..f7ed5d1 100644 (file)
@@ -16,6 +16,7 @@
 #include "map.h"
 #include "evlist.h"
 #include "record.h"
+#include "util/synthetic-events.h"
 
 #define ptr_to_u64(ptr)    ((__u64)(unsigned long)(ptr))
 
index a01c2fd..81fdc88 100644 (file)
@@ -6,9 +6,9 @@
 #include <linux/rbtree.h>
 #include <pthread.h>
 #include <api/fd/array.h>
-#include "event.h"
 #include <stdio.h>
 
+struct bpf_prog_info;
 struct machine;
 union perf_event;
 struct perf_env;
@@ -33,11 +33,6 @@ struct btf_node {
 #ifdef HAVE_LIBBPF_SUPPORT
 int machine__process_bpf(struct machine *machine, union perf_event *event,
                         struct perf_sample *sample);
-
-int perf_event__synthesize_bpf_events(struct perf_session *session,
-                                     perf_event__handler_t process,
-                                     struct machine *machine,
-                                     struct record_opts *opts);
 int bpf_event__add_sb_event(struct evlist **evlist,
                                 struct perf_env *env);
 void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
@@ -51,14 +46,6 @@ static inline int machine__process_bpf(struct machine *machine __maybe_unused,
        return 0;
 }
 
-static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused,
-                                                   perf_event__handler_t process __maybe_unused,
-                                                   struct machine *machine __maybe_unused,
-                                                   struct record_opts *opts __maybe_unused)
-{
-       return 0;
-}
-
 static inline int bpf_event__add_sb_event(struct evlist **evlist __maybe_unused,
                                          struct perf_env *env __maybe_unused)
 {
index 37283e8..10c187b 100644 (file)
@@ -1568,7 +1568,7 @@ struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name)
                        return ERR_PTR(-err);
                }
 
-               evsel = perf_evlist__last(evlist);
+               evsel = evlist__last(evlist);
        }
 
        bpf__for_each_map_named(map, obj, tmp, name) {
index 9d1e090..2285b1e 100644 (file)
@@ -1,5 +1,3 @@
-#include "util/util.h"
-#include "util/debug.h"
 #include "util/map_symbol.h"
 #include "util/branch.h"
 #include <linux/kernel.h>
index 06f66da..88e00d2 100644 (file)
@@ -1,8 +1,15 @@
 #ifndef _PERF_BRANCH_H
 #define _PERF_BRANCH_H 1
-
+/*
+ * The linux/stddef.h isn't need here, but is needed for __always_inline used
+ * in files included from uapi/linux/perf_event.h such as
+ * /usr/include/linux/swab.h and /usr/include/linux/byteorder/little_endian.h,
+ * detected in at least musl libc, used in Alpine Linux. -acme
+ */
 #include <stdio.h>
 #include <stdint.h>
+#include <linux/compiler.h>
+#include <linux/stddef.h>
 #include <linux/perf_event.h>
 #include <linux/types.h>
 
index e5fb777..c076fc7 100644 (file)
@@ -7,12 +7,13 @@
  * Copyright (C) 2009, 2010 Red Hat Inc.
  * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo <acme@redhat.com>
  */
-#include "util.h"
+#include "util.h" // lsdir(), mkdir_p(), rm_rf()
 #include <dirent.h>
 #include <errno.h>
 #include <stdio.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include "util/copyfile.h"
 #include "dso.h"
 #include "build-id.h"
 #include "event.h"
index c14646c..9a9b56e 100644 (file)
@@ -23,6 +23,7 @@
 
 #include "debug.h"
 #include "dso.h"
+#include "event.h"
 #include "hist.h"
 #include "sort.h"
 #include "machine.h"
index b042cee..83398e5 100644 (file)
@@ -4,12 +4,15 @@
 
 #include <linux/list.h>
 #include <linux/rbtree.h>
-#include "event.h"
 #include "map_symbol.h"
 #include "branch.h"
 
+struct addr_location;
 struct evsel;
+struct ip_callchain;
 struct map;
+struct perf_sample;
+struct thread;
 
 #define HELP_PAD "\t\t\t\t"
 
index 4e904fc..a12872f 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <errno.h>
 #include <sched.h>
-#include "util.h"
+#include "util.h" // for sched_getcpu()
 #include "../perf-sys.h"
 #include "cloexec.h"
 #include "event.h"
diff --git a/tools/perf/util/copyfile.c b/tools/perf/util/copyfile.c
new file mode 100644 (file)
index 0000000..47e03de
--- /dev/null
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/copyfile.h"
+#include "util/namespaces.h"
+#include <internal/lib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+static int slow_copyfile(const char *from, const char *to, struct nsinfo *nsi)
+{
+       int err = -1;
+       char *line = NULL;
+       size_t n;
+       FILE *from_fp, *to_fp;
+       struct nscookie nsc;
+
+       nsinfo__mountns_enter(nsi, &nsc);
+       from_fp = fopen(from, "r");
+       nsinfo__mountns_exit(&nsc);
+       if (from_fp == NULL)
+               goto out;
+
+       to_fp = fopen(to, "w");
+       if (to_fp == NULL)
+               goto out_fclose_from;
+
+       while (getline(&line, &n, from_fp) > 0)
+               if (fputs(line, to_fp) == EOF)
+                       goto out_fclose_to;
+       err = 0;
+out_fclose_to:
+       fclose(to_fp);
+       free(line);
+out_fclose_from:
+       fclose(from_fp);
+out:
+       return err;
+}
+
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
+{
+       void *ptr;
+       loff_t pgoff;
+
+       pgoff = off_in & ~(page_size - 1);
+       off_in -= pgoff;
+
+       ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff);
+       if (ptr == MAP_FAILED)
+               return -1;
+
+       while (size) {
+               ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out);
+               if (ret < 0 && errno == EINTR)
+                       continue;
+               if (ret <= 0)
+                       break;
+
+               size -= ret;
+               off_in += ret;
+               off_out += ret;
+       }
+       munmap(ptr, off_in + size);
+
+       return size ? -1 : 0;
+}
+
+static int copyfile_mode_ns(const char *from, const char *to, mode_t mode,
+                           struct nsinfo *nsi)
+{
+       int fromfd, tofd;
+       struct stat st;
+       int err;
+       char *tmp = NULL, *ptr = NULL;
+       struct nscookie nsc;
+
+       nsinfo__mountns_enter(nsi, &nsc);
+       err = stat(from, &st);
+       nsinfo__mountns_exit(&nsc);
+       if (err)
+               goto out;
+       err = -1;
+
+       /* extra 'x' at the end is to reserve space for '.' */
+       if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) {
+               tmp = NULL;
+               goto out;
+       }
+       ptr = strrchr(tmp, '/');
+       if (!ptr)
+               goto out;
+       ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1);
+       *ptr = '.';
+
+       tofd = mkstemp(tmp);
+       if (tofd < 0)
+               goto out;
+
+       if (st.st_size == 0) { /* /proc? do it slowly... */
+               err = slow_copyfile(from, tmp, nsi);
+               if (!err && fchmod(tofd, mode))
+                       err = -1;
+               goto out_close_to;
+       }
+
+       if (fchmod(tofd, mode))
+               goto out_close_to;
+
+       nsinfo__mountns_enter(nsi, &nsc);
+       fromfd = open(from, O_RDONLY);
+       nsinfo__mountns_exit(&nsc);
+       if (fromfd < 0)
+               goto out_close_to;
+
+       err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size);
+
+       close(fromfd);
+out_close_to:
+       close(tofd);
+       if (!err)
+               err = link(tmp, to);
+       unlink(tmp);
+out:
+       free(tmp);
+       return err;
+}
+
+int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi)
+{
+       return copyfile_mode_ns(from, to, 0755, nsi);
+}
+
+int copyfile_mode(const char *from, const char *to, mode_t mode)
+{
+       return copyfile_mode_ns(from, to, mode, NULL);
+}
+
+int copyfile(const char *from, const char *to)
+{
+       return copyfile_mode(from, to, 0755);
+}
diff --git a/tools/perf/util/copyfile.h b/tools/perf/util/copyfile.h
new file mode 100644 (file)
index 0000000..e85d2f2
--- /dev/null
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef PERF_COPYFILE_H_
+#define PERF_COPYFILE_H_
+
+#include <linux/types.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+struct nsinfo;
+
+int copyfile(const char *from, const char *to);
+int copyfile_mode(const char *from, const char *to, mode_t mode);
+int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi);
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size);
+
+#endif // PERF_COPYFILE_H_
index 37d7c49..cd92a99 100644 (file)
@@ -17,7 +17,6 @@
 #include "cs-etm.h"
 #include "cs-etm-decoder.h"
 #include "intlist.h"
-#include "util.h"
 
 /* use raw logging */
 #ifdef CS_DEBUG_RAW
index 707afdb..4ba0f87 100644 (file)
@@ -35,7 +35,7 @@
 #include "thread.h"
 #include "thread-stack.h"
 #include <tools/libc_compat.h>
-#include "util.h"
+#include "util/synthetic-events.h"
 
 #define MAX_TIMESTAMP (~0ULL)
 
@@ -1298,7 +1298,7 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
        attr.read_format = evsel->core.attr.read_format;
 
        /* create new id val to be a fixed offset from evsel id */
-       id = evsel->id[0] + 1000000000;
+       id = evsel->core.id[0] + 1000000000;
 
        if (!id)
                id = 1;
index 0c26844..dbc772b 100644 (file)
@@ -30,6 +30,7 @@
 #include "machine.h"
 #include "config.h"
 #include <linux/ctype.h>
+#include <linux/err.h>
 
 #define pr_N(n, fmt, ...) \
        eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
@@ -1619,8 +1620,10 @@ int bt_convert__perf2ctf(const char *input, const char *path,
        err = -1;
        /* perf.data session */
        session = perf_session__new(&data, 0, &c.tool);
-       if (!session)
+       if (IS_ERR(session)) {
+               err = PTR_ERR(session);
                goto free_writer;
+       }
 
        if (c.queue_size) {
                ordered_events__set_alloc_size(&session->ordered_events,
index e75c3a2..88fba2b 100644 (file)
 #include <dirent.h>
 
 #include "data.h"
-#include "util.h"
+#include "util.h" // rm_rf_perf_data()
 #include "debug.h"
 #include "header.h"
+#include <internal/lib.h>
 
 static void close_dir(struct perf_data_file *files, int nr)
 {
index a1b59bd..e55114f 100644 (file)
@@ -17,7 +17,6 @@
 #include "event.h"
 #include "debug.h"
 #include "print_binary.h"
-#include "util.h"
 #include "target.h"
 #include "ui/helpline.h"
 #include "ui/ui.h"
index b2deee9..d25ae1c 100644 (file)
@@ -3,9 +3,9 @@
 #ifndef __PERF_DEBUG_H
 #define __PERF_DEBUG_H
 
+#include <stdarg.h>
 #include <stdbool.h>
 #include <linux/compiler.h>
-#include "../ui/util.h"
 
 extern int verbose;
 extern bool quiet, dump_trace;
index 763328c..6fb7f34 100644 (file)
@@ -3,7 +3,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include "debug.h"
 #include "symbol.h"
 
 #include "demangle-java.h"
index 423afbb..a659fc6 100644 (file)
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <string.h>
-#include "util.h"
 #include "debug.h"
 
 #include "demangle-rust.h"
index db55edd..1b49ece 100644 (file)
@@ -5,7 +5,6 @@
  * Written by: Masami Hiramatsu <mhiramat@kernel.org>
  */
 
-#include <util.h>
 #include <debug.h>
 #include <dwarf-regs.h>
 #include <elf.h>
index d8e083d..db40906 100644 (file)
@@ -4,9 +4,10 @@
 
 #include <linux/types.h>
 #include <linux/rbtree.h>
-#include "cpumap.h"
 #include "rwsem.h"
 
+struct perf_cpu_map;
+
 struct cpu_topology_map {
        int     socket_id;
        int     die_id;
index f4afbb8..fc1e5a9 100644 (file)
@@ -1,16 +1,16 @@
-#include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <inttypes.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
+#include <perf/cpumap.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
-#include <api/fs/fs.h>
 #include <linux/perf_event.h>
 #include <linux/zalloc.h>
+#include "cpumap.h"
 #include "dso.h"
 #include "event.h"
 #include "debug.h"
@@ -24,6 +24,7 @@
 #include "time-utils.h"
 #include <linux/ctype.h>
 #include "map.h"
+#include "util/namespaces.h"
 #include "symbol.h"
 #include "symbol/kallsyms.h"
 #include "asm/bug.h"
@@ -33,8 +34,6 @@
 #include "tool.h"
 #include "../perf.h"
 
-#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500
-
 static const char *perf_event__names[] = {
        [0]                                     = "TOTAL",
        [PERF_RECORD_MMAP]                      = "MMAP",
@@ -75,18 +74,6 @@ static const char *perf_event__names[] = {
        [PERF_RECORD_COMPRESSED]                = "COMPRESSED",
 };
 
-static const char *perf_ns__names[] = {
-       [NET_NS_INDEX]          = "net",
-       [UTS_NS_INDEX]          = "uts",
-       [IPC_NS_INDEX]          = "ipc",
-       [PID_NS_INDEX]          = "pid",
-       [USER_NS_INDEX]         = "user",
-       [MNT_NS_INDEX]          = "mnt",
-       [CGROUP_NS_INDEX]       = "cgroup",
-};
-
-unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT;
-
 const char *perf_event__name(unsigned int id)
 {
        if (id >= ARRAY_SIZE(perf_event__names))
@@ -96,775 +83,6 @@ const char *perf_event__name(unsigned int id)
        return perf_event__names[id];
 }
 
-static const char *perf_ns__name(unsigned int id)
-{
-       if (id >= ARRAY_SIZE(perf_ns__names))
-               return "UNKNOWN";
-       return perf_ns__names[id];
-}
-
-int perf_tool__process_synth_event(struct perf_tool *tool,
-                                  union perf_event *event,
-                                  struct machine *machine,
-                                  perf_event__handler_t process)
-{
-       struct perf_sample synth_sample = {
-       .pid       = -1,
-       .tid       = -1,
-       .time      = -1,
-       .stream_id = -1,
-       .cpu       = -1,
-       .period    = 1,
-       .cpumode   = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK,
-       };
-
-       return process(tool, event, &synth_sample, machine);
-};
-
-/*
- * Assumes that the first 4095 bytes of /proc/pid/stat contains
- * the comm, tgid and ppid.
- */
-static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
-                                   pid_t *tgid, pid_t *ppid)
-{
-       char filename[PATH_MAX];
-       char bf[4096];
-       int fd;
-       size_t size = 0;
-       ssize_t n;
-       char *name, *tgids, *ppids;
-
-       *tgid = -1;
-       *ppid = -1;
-
-       snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
-
-       fd = open(filename, O_RDONLY);
-       if (fd < 0) {
-               pr_debug("couldn't open %s\n", filename);
-               return -1;
-       }
-
-       n = read(fd, bf, sizeof(bf) - 1);
-       close(fd);
-       if (n <= 0) {
-               pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n",
-                          pid);
-               return -1;
-       }
-       bf[n] = '\0';
-
-       name = strstr(bf, "Name:");
-       tgids = strstr(bf, "Tgid:");
-       ppids = strstr(bf, "PPid:");
-
-       if (name) {
-               char *nl;
-
-               name = skip_spaces(name + 5);  /* strlen("Name:") */
-               nl = strchr(name, '\n');
-               if (nl)
-                       *nl = '\0';
-
-               size = strlen(name);
-               if (size >= len)
-                       size = len - 1;
-               memcpy(comm, name, size);
-               comm[size] = '\0';
-       } else {
-               pr_debug("Name: string not found for pid %d\n", pid);
-       }
-
-       if (tgids) {
-               tgids += 5;  /* strlen("Tgid:") */
-               *tgid = atoi(tgids);
-       } else {
-               pr_debug("Tgid: string not found for pid %d\n", pid);
-       }
-
-       if (ppids) {
-               ppids += 5;  /* strlen("PPid:") */
-               *ppid = atoi(ppids);
-       } else {
-               pr_debug("PPid: string not found for pid %d\n", pid);
-       }
-
-       return 0;
-}
-
-static int perf_event__prepare_comm(union perf_event *event, pid_t pid,
-                                   struct machine *machine,
-                                   pid_t *tgid, pid_t *ppid)
-{
-       size_t size;
-
-       *ppid = -1;
-
-       memset(&event->comm, 0, sizeof(event->comm));
-
-       if (machine__is_host(machine)) {
-               if (perf_event__get_comm_ids(pid, event->comm.comm,
-                                            sizeof(event->comm.comm),
-                                            tgid, ppid) != 0) {
-                       return -1;
-               }
-       } else {
-               *tgid = machine->pid;
-       }
-
-       if (*tgid < 0)
-               return -1;
-
-       event->comm.pid = *tgid;
-       event->comm.header.type = PERF_RECORD_COMM;
-
-       size = strlen(event->comm.comm) + 1;
-       size = PERF_ALIGN(size, sizeof(u64));
-       memset(event->comm.comm + size, 0, machine->id_hdr_size);
-       event->comm.header.size = (sizeof(event->comm) -
-                               (sizeof(event->comm.comm) - size) +
-                               machine->id_hdr_size);
-       event->comm.tid = pid;
-
-       return 0;
-}
-
-pid_t perf_event__synthesize_comm(struct perf_tool *tool,
-                                        union perf_event *event, pid_t pid,
-                                        perf_event__handler_t process,
-                                        struct machine *machine)
-{
-       pid_t tgid, ppid;
-
-       if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0)
-               return -1;
-
-       if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
-               return -1;
-
-       return tgid;
-}
-
-static void perf_event__get_ns_link_info(pid_t pid, const char *ns,
-                                        struct perf_ns_link_info *ns_link_info)
-{
-       struct stat64 st;
-       char proc_ns[128];
-
-       sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns);
-       if (stat64(proc_ns, &st) == 0) {
-               ns_link_info->dev = st.st_dev;
-               ns_link_info->ino = st.st_ino;
-       }
-}
-
-int perf_event__synthesize_namespaces(struct perf_tool *tool,
-                                     union perf_event *event,
-                                     pid_t pid, pid_t tgid,
-                                     perf_event__handler_t process,
-                                     struct machine *machine)
-{
-       u32 idx;
-       struct perf_ns_link_info *ns_link_info;
-
-       if (!tool || !tool->namespace_events)
-               return 0;
-
-       memset(&event->namespaces, 0, (sizeof(event->namespaces) +
-              (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
-              machine->id_hdr_size));
-
-       event->namespaces.pid = tgid;
-       event->namespaces.tid = pid;
-
-       event->namespaces.nr_namespaces = NR_NAMESPACES;
-
-       ns_link_info = event->namespaces.link_info;
-
-       for (idx = 0; idx < event->namespaces.nr_namespaces; idx++)
-               perf_event__get_ns_link_info(pid, perf_ns__name(idx),
-                                            &ns_link_info[idx]);
-
-       event->namespaces.header.type = PERF_RECORD_NAMESPACES;
-
-       event->namespaces.header.size = (sizeof(event->namespaces) +
-                       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
-                       machine->id_hdr_size);
-
-       if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
-               return -1;
-
-       return 0;
-}
-
-static int perf_event__synthesize_fork(struct perf_tool *tool,
-                                      union perf_event *event,
-                                      pid_t pid, pid_t tgid, pid_t ppid,
-                                      perf_event__handler_t process,
-                                      struct machine *machine)
-{
-       memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
-
-       /*
-        * for main thread set parent to ppid from status file. For other
-        * threads set parent pid to main thread. ie., assume main thread
-        * spawns all threads in a process
-       */
-       if (tgid == pid) {
-               event->fork.ppid = ppid;
-               event->fork.ptid = ppid;
-       } else {
-               event->fork.ppid = tgid;
-               event->fork.ptid = tgid;
-       }
-       event->fork.pid  = tgid;
-       event->fork.tid  = pid;
-       event->fork.header.type = PERF_RECORD_FORK;
-       event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC;
-
-       event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
-
-       if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
-               return -1;
-
-       return 0;
-}
-
-int perf_event__synthesize_mmap_events(struct perf_tool *tool,
-                                      union perf_event *event,
-                                      pid_t pid, pid_t tgid,
-                                      perf_event__handler_t process,
-                                      struct machine *machine,
-                                      bool mmap_data)
-{
-       char filename[PATH_MAX];
-       FILE *fp;
-       unsigned long long t;
-       bool truncation = false;
-       unsigned long long timeout = proc_map_timeout * 1000000ULL;
-       int rc = 0;
-       const char *hugetlbfs_mnt = hugetlbfs__mountpoint();
-       int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0;
-
-       if (machine__is_default_guest(machine))
-               return 0;
-
-       snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps",
-                machine->root_dir, pid, pid);
-
-       fp = fopen(filename, "r");
-       if (fp == NULL) {
-               /*
-                * We raced with a task exiting - just return:
-                */
-               pr_debug("couldn't open %s\n", filename);
-               return -1;
-       }
-
-       event->header.type = PERF_RECORD_MMAP2;
-       t = rdclock();
-
-       while (1) {
-               char bf[BUFSIZ];
-               char prot[5];
-               char execname[PATH_MAX];
-               char anonstr[] = "//anon";
-               unsigned int ino;
-               size_t size;
-               ssize_t n;
-
-               if (fgets(bf, sizeof(bf), fp) == NULL)
-                       break;
-
-               if ((rdclock() - t) > timeout) {
-                       pr_warning("Reading %s time out. "
-                                  "You may want to increase "
-                                  "the time limit by --proc-map-timeout\n",
-                                  filename);
-                       truncation = true;
-                       goto out;
-               }
-
-               /* ensure null termination since stack will be reused. */
-               strcpy(execname, "");
-
-               /* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-               n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n",
-                      &event->mmap2.start, &event->mmap2.len, prot,
-                      &event->mmap2.pgoff, &event->mmap2.maj,
-                      &event->mmap2.min,
-                      &ino, execname);
-
-               /*
-                * Anon maps don't have the execname.
-                */
-               if (n < 7)
-                       continue;
-
-               event->mmap2.ino = (u64)ino;
-
-               /*
-                * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
-                */
-               if (machine__is_host(machine))
-                       event->header.misc = PERF_RECORD_MISC_USER;
-               else
-                       event->header.misc = PERF_RECORD_MISC_GUEST_USER;
-
-               /* map protection and flags bits */
-               event->mmap2.prot = 0;
-               event->mmap2.flags = 0;
-               if (prot[0] == 'r')
-                       event->mmap2.prot |= PROT_READ;
-               if (prot[1] == 'w')
-                       event->mmap2.prot |= PROT_WRITE;
-               if (prot[2] == 'x')
-                       event->mmap2.prot |= PROT_EXEC;
-
-               if (prot[3] == 's')
-                       event->mmap2.flags |= MAP_SHARED;
-               else
-                       event->mmap2.flags |= MAP_PRIVATE;
-
-               if (prot[2] != 'x') {
-                       if (!mmap_data || prot[0] != 'r')
-                               continue;
-
-                       event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
-               }
-
-out:
-               if (truncation)
-                       event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
-
-               if (!strcmp(execname, ""))
-                       strcpy(execname, anonstr);
-
-               if (hugetlbfs_mnt_len &&
-                   !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
-                       strcpy(execname, anonstr);
-                       event->mmap2.flags |= MAP_HUGETLB;
-               }
-
-               size = strlen(execname) + 1;
-               memcpy(event->mmap2.filename, execname, size);
-               size = PERF_ALIGN(size, sizeof(u64));
-               event->mmap2.len -= event->mmap.start;
-               event->mmap2.header.size = (sizeof(event->mmap2) -
-                                       (sizeof(event->mmap2.filename) - size));
-               memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
-               event->mmap2.header.size += machine->id_hdr_size;
-               event->mmap2.pid = tgid;
-               event->mmap2.tid = pid;
-
-               if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
-                       rc = -1;
-                       break;
-               }
-
-               if (truncation)
-                       break;
-       }
-
-       fclose(fp);
-       return rc;
-}
-
-int perf_event__synthesize_modules(struct perf_tool *tool,
-                                  perf_event__handler_t process,
-                                  struct machine *machine)
-{
-       int rc = 0;
-       struct map *pos;
-       struct maps *maps = machine__kernel_maps(machine);
-       union perf_event *event = zalloc((sizeof(event->mmap) +
-                                         machine->id_hdr_size));
-       if (event == NULL) {
-               pr_debug("Not enough memory synthesizing mmap event "
-                        "for kernel modules\n");
-               return -1;
-       }
-
-       event->header.type = PERF_RECORD_MMAP;
-
-       /*
-        * kernel uses 0 for user space maps, see kernel/perf_event.c
-        * __perf_event_mmap
-        */
-       if (machine__is_host(machine))
-               event->header.misc = PERF_RECORD_MISC_KERNEL;
-       else
-               event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
-
-       for (pos = maps__first(maps); pos; pos = map__next(pos)) {
-               size_t size;
-
-               if (!__map__is_kmodule(pos))
-                       continue;
-
-               size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
-               event->mmap.header.type = PERF_RECORD_MMAP;
-               event->mmap.header.size = (sizeof(event->mmap) -
-                                       (sizeof(event->mmap.filename) - size));
-               memset(event->mmap.filename + size, 0, machine->id_hdr_size);
-               event->mmap.header.size += machine->id_hdr_size;
-               event->mmap.start = pos->start;
-               event->mmap.len   = pos->end - pos->start;
-               event->mmap.pid   = machine->pid;
-
-               memcpy(event->mmap.filename, pos->dso->long_name,
-                      pos->dso->long_name_len + 1);
-               if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
-                       rc = -1;
-                       break;
-               }
-       }
-
-       free(event);
-       return rc;
-}
-
-static int __event__synthesize_thread(union perf_event *comm_event,
-                                     union perf_event *mmap_event,
-                                     union perf_event *fork_event,
-                                     union perf_event *namespaces_event,
-                                     pid_t pid, int full,
-                                     perf_event__handler_t process,
-                                     struct perf_tool *tool,
-                                     struct machine *machine,
-                                     bool mmap_data)
-{
-       char filename[PATH_MAX];
-       DIR *tasks;
-       struct dirent *dirent;
-       pid_t tgid, ppid;
-       int rc = 0;
-
-       /* special case: only send one comm event using passed in pid */
-       if (!full) {
-               tgid = perf_event__synthesize_comm(tool, comm_event, pid,
-                                                  process, machine);
-
-               if (tgid == -1)
-                       return -1;
-
-               if (perf_event__synthesize_namespaces(tool, namespaces_event, pid,
-                                                     tgid, process, machine) < 0)
-                       return -1;
-
-               /*
-                * send mmap only for thread group leader
-                * see thread__init_map_groups
-                */
-               if (pid == tgid &&
-                   perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
-                                                      process, machine, mmap_data))
-                       return -1;
-
-               return 0;
-       }
-
-       if (machine__is_default_guest(machine))
-               return 0;
-
-       snprintf(filename, sizeof(filename), "%s/proc/%d/task",
-                machine->root_dir, pid);
-
-       tasks = opendir(filename);
-       if (tasks == NULL) {
-               pr_debug("couldn't open %s\n", filename);
-               return 0;
-       }
-
-       while ((dirent = readdir(tasks)) != NULL) {
-               char *end;
-               pid_t _pid;
-
-               _pid = strtol(dirent->d_name, &end, 10);
-               if (*end)
-                       continue;
-
-               rc = -1;
-               if (perf_event__prepare_comm(comm_event, _pid, machine,
-                                            &tgid, &ppid) != 0)
-                       break;
-
-               if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
-                                               ppid, process, machine) < 0)
-                       break;
-
-               if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid,
-                                                     tgid, process, machine) < 0)
-                       break;
-
-               /*
-                * Send the prepared comm event
-                */
-               if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0)
-                       break;
-
-               rc = 0;
-               if (_pid == pid) {
-                       /* process the parent's maps too */
-                       rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
-                                               process, machine, mmap_data);
-                       if (rc)
-                               break;
-               }
-       }
-
-       closedir(tasks);
-       return rc;
-}
-
-int perf_event__synthesize_thread_map(struct perf_tool *tool,
-                                     struct perf_thread_map *threads,
-                                     perf_event__handler_t process,
-                                     struct machine *machine,
-                                     bool mmap_data)
-{
-       union perf_event *comm_event, *mmap_event, *fork_event;
-       union perf_event *namespaces_event;
-       int err = -1, thread, j;
-
-       comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
-       if (comm_event == NULL)
-               goto out;
-
-       mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
-       if (mmap_event == NULL)
-               goto out_free_comm;
-
-       fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
-       if (fork_event == NULL)
-               goto out_free_mmap;
-
-       namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
-                                 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
-                                 machine->id_hdr_size);
-       if (namespaces_event == NULL)
-               goto out_free_fork;
-
-       err = 0;
-       for (thread = 0; thread < threads->nr; ++thread) {
-               if (__event__synthesize_thread(comm_event, mmap_event,
-                                              fork_event, namespaces_event,
-                                              perf_thread_map__pid(threads, thread), 0,
-                                              process, tool, machine,
-                                              mmap_data)) {
-                       err = -1;
-                       break;
-               }
-
-               /*
-                * comm.pid is set to thread group id by
-                * perf_event__synthesize_comm
-                */
-               if ((int) comm_event->comm.pid != perf_thread_map__pid(threads, thread)) {
-                       bool need_leader = true;
-
-                       /* is thread group leader in thread_map? */
-                       for (j = 0; j < threads->nr; ++j) {
-                               if ((int) comm_event->comm.pid == perf_thread_map__pid(threads, j)) {
-                                       need_leader = false;
-                                       break;
-                               }
-                       }
-
-                       /* if not, generate events for it */
-                       if (need_leader &&
-                           __event__synthesize_thread(comm_event, mmap_event,
-                                                      fork_event, namespaces_event,
-                                                      comm_event->comm.pid, 0,
-                                                      process, tool, machine,
-                                                      mmap_data)) {
-                               err = -1;
-                               break;
-                       }
-               }
-       }
-       free(namespaces_event);
-out_free_fork:
-       free(fork_event);
-out_free_mmap:
-       free(mmap_event);
-out_free_comm:
-       free(comm_event);
-out:
-       return err;
-}
-
-static int __perf_event__synthesize_threads(struct perf_tool *tool,
-                                           perf_event__handler_t process,
-                                           struct machine *machine,
-                                           bool mmap_data,
-                                           struct dirent **dirent,
-                                           int start,
-                                           int num)
-{
-       union perf_event *comm_event, *mmap_event, *fork_event;
-       union perf_event *namespaces_event;
-       int err = -1;
-       char *end;
-       pid_t pid;
-       int i;
-
-       comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
-       if (comm_event == NULL)
-               goto out;
-
-       mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
-       if (mmap_event == NULL)
-               goto out_free_comm;
-
-       fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
-       if (fork_event == NULL)
-               goto out_free_mmap;
-
-       namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
-                                 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
-                                 machine->id_hdr_size);
-       if (namespaces_event == NULL)
-               goto out_free_fork;
-
-       for (i = start; i < start + num; i++) {
-               if (!isdigit(dirent[i]->d_name[0]))
-                       continue;
-
-               pid = (pid_t)strtol(dirent[i]->d_name, &end, 10);
-               /* only interested in proper numerical dirents */
-               if (*end)
-                       continue;
-               /*
-                * We may race with exiting thread, so don't stop just because
-                * one thread couldn't be synthesized.
-                */
-               __event__synthesize_thread(comm_event, mmap_event, fork_event,
-                                          namespaces_event, pid, 1, process,
-                                          tool, machine, mmap_data);
-       }
-       err = 0;
-
-       free(namespaces_event);
-out_free_fork:
-       free(fork_event);
-out_free_mmap:
-       free(mmap_event);
-out_free_comm:
-       free(comm_event);
-out:
-       return err;
-}
-
-struct synthesize_threads_arg {
-       struct perf_tool *tool;
-       perf_event__handler_t process;
-       struct machine *machine;
-       bool mmap_data;
-       struct dirent **dirent;
-       int num;
-       int start;
-};
-
-static void *synthesize_threads_worker(void *arg)
-{
-       struct synthesize_threads_arg *args = arg;
-
-       __perf_event__synthesize_threads(args->tool, args->process,
-                                        args->machine, args->mmap_data,
-                                        args->dirent,
-                                        args->start, args->num);
-       return NULL;
-}
-
-int perf_event__synthesize_threads(struct perf_tool *tool,
-                                  perf_event__handler_t process,
-                                  struct machine *machine,
-                                  bool mmap_data,
-                                  unsigned int nr_threads_synthesize)
-{
-       struct synthesize_threads_arg *args = NULL;
-       pthread_t *synthesize_threads = NULL;
-       char proc_path[PATH_MAX];
-       struct dirent **dirent;
-       int num_per_thread;
-       int m, n, i, j;
-       int thread_nr;
-       int base = 0;
-       int err = -1;
-
-
-       if (machine__is_default_guest(machine))
-               return 0;
-
-       snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
-       n = scandir(proc_path, &dirent, 0, alphasort);
-       if (n < 0)
-               return err;
-
-       if (nr_threads_synthesize == UINT_MAX)
-               thread_nr = sysconf(_SC_NPROCESSORS_ONLN);
-       else
-               thread_nr = nr_threads_synthesize;
-
-       if (thread_nr <= 1) {
-               err = __perf_event__synthesize_threads(tool, process,
-                                                      machine, mmap_data,
-                                                      dirent, base, n);
-               goto free_dirent;
-       }
-       if (thread_nr > n)
-               thread_nr = n;
-
-       synthesize_threads = calloc(sizeof(pthread_t), thread_nr);
-       if (synthesize_threads == NULL)
-               goto free_dirent;
-
-       args = calloc(sizeof(*args), thread_nr);
-       if (args == NULL)
-               goto free_threads;
-
-       num_per_thread = n / thread_nr;
-       m = n % thread_nr;
-       for (i = 0; i < thread_nr; i++) {
-               args[i].tool = tool;
-               args[i].process = process;
-               args[i].machine = machine;
-               args[i].mmap_data = mmap_data;
-               args[i].dirent = dirent;
-       }
-       for (i = 0; i < m; i++) {
-               args[i].num = num_per_thread + 1;
-               args[i].start = i * args[i].num;
-       }
-       if (i != 0)
-               base = args[i-1].start + args[i-1].num;
-       for (j = i; j < thread_nr; j++) {
-               args[j].num = num_per_thread;
-               args[j].start = base + (j - i) * args[i].num;
-       }
-
-       for (i = 0; i < thread_nr; i++) {
-               if (pthread_create(&synthesize_threads[i], NULL,
-                                  synthesize_threads_worker, &args[i]))
-                       goto out_join;
-       }
-       err = 0;
-out_join:
-       for (i = 0; i < thread_nr; i++)
-               pthread_join(synthesize_threads[i], NULL);
-       free(args);
-free_threads:
-       free(synthesize_threads);
-free_dirent:
-       for (i = 0; i < n; i++)
-               zfree(&dirent[i]);
-       free(dirent);
-
-       return err;
-}
-
 struct process_symbol_args {
        const char *name;
        u64        start;
@@ -899,327 +117,6 @@ int kallsyms__get_function_start(const char *kallsyms_filename,
        return 0;
 }
 
-int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused,
-                                             perf_event__handler_t process __maybe_unused,
-                                             struct machine *machine __maybe_unused)
-{
-       return 0;
-}
-
-static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
-                                               perf_event__handler_t process,
-                                               struct machine *machine)
-{
-       size_t size;
-       struct map *map = machine__kernel_map(machine);
-       struct kmap *kmap;
-       int err;
-       union perf_event *event;
-
-       if (map == NULL)
-               return -1;
-
-       kmap = map__kmap(map);
-       if (!kmap->ref_reloc_sym)
-               return -1;
-
-       /*
-        * We should get this from /sys/kernel/sections/.text, but till that is
-        * available use this, and after it is use this as a fallback for older
-        * kernels.
-        */
-       event = zalloc((sizeof(event->mmap) + machine->id_hdr_size));
-       if (event == NULL) {
-               pr_debug("Not enough memory synthesizing mmap event "
-                        "for kernel modules\n");
-               return -1;
-       }
-
-       if (machine__is_host(machine)) {
-               /*
-                * kernel uses PERF_RECORD_MISC_USER for user space maps,
-                * see kernel/perf_event.c __perf_event_mmap
-                */
-               event->header.misc = PERF_RECORD_MISC_KERNEL;
-       } else {
-               event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
-       }
-
-       size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
-                       "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
-       size = PERF_ALIGN(size, sizeof(u64));
-       event->mmap.header.type = PERF_RECORD_MMAP;
-       event->mmap.header.size = (sizeof(event->mmap) -
-                       (sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
-       event->mmap.pgoff = kmap->ref_reloc_sym->addr;
-       event->mmap.start = map->start;
-       event->mmap.len   = map->end - event->mmap.start;
-       event->mmap.pid   = machine->pid;
-
-       err = perf_tool__process_synth_event(tool, event, machine, process);
-       free(event);
-
-       return err;
-}
-
-int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
-                                      perf_event__handler_t process,
-                                      struct machine *machine)
-{
-       int err;
-
-       err = __perf_event__synthesize_kernel_mmap(tool, process, machine);
-       if (err < 0)
-               return err;
-
-       return perf_event__synthesize_extra_kmaps(tool, process, machine);
-}
-
-int perf_event__synthesize_thread_map2(struct perf_tool *tool,
-                                     struct perf_thread_map *threads,
-                                     perf_event__handler_t process,
-                                     struct machine *machine)
-{
-       union perf_event *event;
-       int i, err, size;
-
-       size  = sizeof(event->thread_map);
-       size += threads->nr * sizeof(event->thread_map.entries[0]);
-
-       event = zalloc(size);
-       if (!event)
-               return -ENOMEM;
-
-       event->header.type = PERF_RECORD_THREAD_MAP;
-       event->header.size = size;
-       event->thread_map.nr = threads->nr;
-
-       for (i = 0; i < threads->nr; i++) {
-               struct perf_record_thread_map_entry *entry = &event->thread_map.entries[i];
-               char *comm = perf_thread_map__comm(threads, i);
-
-               if (!comm)
-                       comm = (char *) "";
-
-               entry->pid = perf_thread_map__pid(threads, i);
-               strncpy((char *) &entry->comm, comm, sizeof(entry->comm));
-       }
-
-       err = process(tool, event, NULL, machine);
-
-       free(event);
-       return err;
-}
-
-static void synthesize_cpus(struct cpu_map_entries *cpus,
-                           struct perf_cpu_map *map)
-{
-       int i;
-
-       cpus->nr = map->nr;
-
-       for (i = 0; i < map->nr; i++)
-               cpus->cpu[i] = map->map[i];
-}
-
-static void synthesize_mask(struct perf_record_record_cpu_map *mask,
-                           struct perf_cpu_map *map, int max)
-{
-       int i;
-
-       mask->nr = BITS_TO_LONGS(max);
-       mask->long_size = sizeof(long);
-
-       for (i = 0; i < map->nr; i++)
-               set_bit(map->map[i], mask->mask);
-}
-
-static size_t cpus_size(struct perf_cpu_map *map)
-{
-       return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16);
-}
-
-static size_t mask_size(struct perf_cpu_map *map, int *max)
-{
-       int i;
-
-       *max = 0;
-
-       for (i = 0; i < map->nr; i++) {
-               /* bit possition of the cpu is + 1 */
-               int bit = map->map[i] + 1;
-
-               if (bit > *max)
-                       *max = bit;
-       }
-
-       return sizeof(struct perf_record_record_cpu_map) + BITS_TO_LONGS(*max) * sizeof(long);
-}
-
-void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int *max)
-{
-       size_t size_cpus, size_mask;
-       bool is_dummy = perf_cpu_map__empty(map);
-
-       /*
-        * Both array and mask data have variable size based
-        * on the number of cpus and their actual values.
-        * The size of the 'struct perf_record_cpu_map_data' is:
-        *
-        *   array = size of 'struct cpu_map_entries' +
-        *           number of cpus * sizeof(u64)
-        *
-        *   mask  = size of 'struct perf_record_record_cpu_map' +
-        *           maximum cpu bit converted to size of longs
-        *
-        * and finaly + the size of 'struct perf_record_cpu_map_data'.
-        */
-       size_cpus = cpus_size(map);
-       size_mask = mask_size(map, max);
-
-       if (is_dummy || (size_cpus < size_mask)) {
-               *size += size_cpus;
-               *type  = PERF_CPU_MAP__CPUS;
-       } else {
-               *size += size_mask;
-               *type  = PERF_CPU_MAP__MASK;
-       }
-
-       *size += sizeof(struct perf_record_cpu_map_data);
-       *size = PERF_ALIGN(*size, sizeof(u64));
-       return zalloc(*size);
-}
-
-void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf_cpu_map *map,
-                             u16 type, int max)
-{
-       data->type = type;
-
-       switch (type) {
-       case PERF_CPU_MAP__CPUS:
-               synthesize_cpus((struct cpu_map_entries *) data->data, map);
-               break;
-       case PERF_CPU_MAP__MASK:
-               synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max);
-       default:
-               break;
-       };
-}
-
-static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map)
-{
-       size_t size = sizeof(struct perf_record_cpu_map);
-       struct perf_record_cpu_map *event;
-       int max;
-       u16 type;
-
-       event = cpu_map_data__alloc(map, &size, &type, &max);
-       if (!event)
-               return NULL;
-
-       event->header.type = PERF_RECORD_CPU_MAP;
-       event->header.size = size;
-       event->data.type   = type;
-
-       cpu_map_data__synthesize(&event->data, map, type, max);
-       return event;
-}
-
-int perf_event__synthesize_cpu_map(struct perf_tool *tool,
-                                  struct perf_cpu_map *map,
-                                  perf_event__handler_t process,
-                                  struct machine *machine)
-{
-       struct perf_record_cpu_map *event;
-       int err;
-
-       event = cpu_map_event__new(map);
-       if (!event)
-               return -ENOMEM;
-
-       err = process(tool, (union perf_event *) event, NULL, machine);
-
-       free(event);
-       return err;
-}
-
-int perf_event__synthesize_stat_config(struct perf_tool *tool,
-                                      struct perf_stat_config *config,
-                                      perf_event__handler_t process,
-                                      struct machine *machine)
-{
-       struct perf_record_stat_config *event;
-       int size, i = 0, err;
-
-       size  = sizeof(*event);
-       size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0]));
-
-       event = zalloc(size);
-       if (!event)
-               return -ENOMEM;
-
-       event->header.type = PERF_RECORD_STAT_CONFIG;
-       event->header.size = size;
-       event->nr          = PERF_STAT_CONFIG_TERM__MAX;
-
-#define ADD(__term, __val)                                     \
-       event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term;   \
-       event->data[i].val = __val;                             \
-       i++;
-
-       ADD(AGGR_MODE,  config->aggr_mode)
-       ADD(INTERVAL,   config->interval)
-       ADD(SCALE,      config->scale)
-
-       WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX,
-                 "stat config terms unbalanced\n");
-#undef ADD
-
-       err = process(tool, (union perf_event *) event, NULL, machine);
-
-       free(event);
-       return err;
-}
-
-int perf_event__synthesize_stat(struct perf_tool *tool,
-                               u32 cpu, u32 thread, u64 id,
-                               struct perf_counts_values *count,
-                               perf_event__handler_t process,
-                               struct machine *machine)
-{
-       struct perf_record_stat event;
-
-       event.header.type = PERF_RECORD_STAT;
-       event.header.size = sizeof(event);
-       event.header.misc = 0;
-
-       event.id        = id;
-       event.cpu       = cpu;
-       event.thread    = thread;
-       event.val       = count->val;
-       event.ena       = count->ena;
-       event.run       = count->run;
-
-       return process(tool, (union perf_event *) &event, NULL, machine);
-}
-
-int perf_event__synthesize_stat_round(struct perf_tool *tool,
-                                     u64 evtime, u64 type,
-                                     perf_event__handler_t process,
-                                     struct machine *machine)
-{
-       struct perf_record_stat_round event;
-
-       event.header.type = PERF_RECORD_STAT_ROUND;
-       event.header.size = sizeof(event);
-       event.header.misc = 0;
-
-       event.time = evtime;
-       event.type = type;
-
-       return process(tool, (union perf_event *) &event, NULL, machine);
-}
-
 void perf_event__read_stat_config(struct perf_stat_config *config,
                                  struct perf_record_stat_config *event)
 {
index 47ad81d..a0a0c91 100644 (file)
@@ -279,54 +279,13 @@ enum {
 
 void perf_event__print_totals(void);
 
-struct perf_tool;
-struct perf_thread_map;
 struct perf_cpu_map;
+struct perf_record_stat_config;
 struct perf_stat_config;
-struct perf_counts_values;
-
-typedef int (*perf_event__handler_t)(struct perf_tool *tool,
-                                    union perf_event *event,
-                                    struct perf_sample *sample,
-                                    struct machine *machine);
+struct perf_tool;
 
-int perf_event__synthesize_thread_map(struct perf_tool *tool,
-                                     struct perf_thread_map *threads,
-                                     perf_event__handler_t process,
-                                     struct machine *machine, bool mmap_data);
-int perf_event__synthesize_thread_map2(struct perf_tool *tool,
-                                     struct perf_thread_map *threads,
-                                     perf_event__handler_t process,
-                                     struct machine *machine);
-int perf_event__synthesize_cpu_map(struct perf_tool *tool,
-                                  struct perf_cpu_map *cpus,
-                                  perf_event__handler_t process,
-                                  struct machine *machine);
-int perf_event__synthesize_threads(struct perf_tool *tool,
-                                  perf_event__handler_t process,
-                                  struct machine *machine, bool mmap_data,
-                                  unsigned int nr_threads_synthesize);
-int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
-                                      perf_event__handler_t process,
-                                      struct machine *machine);
-int perf_event__synthesize_stat_config(struct perf_tool *tool,
-                                      struct perf_stat_config *config,
-                                      perf_event__handler_t process,
-                                      struct machine *machine);
 void perf_event__read_stat_config(struct perf_stat_config *config,
                                  struct perf_record_stat_config *event);
-int perf_event__synthesize_stat(struct perf_tool *tool,
-                               u32 cpu, u32 thread, u64 id,
-                               struct perf_counts_values *count,
-                               perf_event__handler_t process,
-                               struct machine *machine);
-int perf_event__synthesize_stat_round(struct perf_tool *tool,
-                                     u64 time, u64 type,
-                                     perf_event__handler_t process,
-                                     struct machine *machine);
-int perf_event__synthesize_modules(struct perf_tool *tool,
-                                  perf_event__handler_t process,
-                                  struct machine *machine);
 
 int perf_event__process_comm(struct perf_tool *tool,
                             union perf_event *event,
@@ -380,10 +339,6 @@ int perf_event__process_bpf(struct perf_tool *tool,
                            union perf_event *event,
                            struct perf_sample *sample,
                            struct machine *machine);
-int perf_tool__process_synth_event(struct perf_tool *tool,
-                                  union perf_event *event,
-                                  struct machine *machine,
-                                  perf_event__handler_t process);
 int perf_event__process(struct perf_tool *tool,
                        union perf_event *event,
                        struct perf_sample *sample,
@@ -405,34 +360,6 @@ void thread__resolve(struct thread *thread, struct addr_location *al,
 
 const char *perf_event__name(unsigned int id);
 
-size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
-                                    u64 read_format);
-int perf_event__synthesize_sample(union perf_event *event, u64 type,
-                                 u64 read_format,
-                                 const struct perf_sample *sample);
-
-pid_t perf_event__synthesize_comm(struct perf_tool *tool,
-                                 union perf_event *event, pid_t pid,
-                                 perf_event__handler_t process,
-                                 struct machine *machine);
-
-int perf_event__synthesize_namespaces(struct perf_tool *tool,
-                                     union perf_event *event,
-                                     pid_t pid, pid_t tgid,
-                                     perf_event__handler_t process,
-                                     struct machine *machine);
-
-int perf_event__synthesize_mmap_events(struct perf_tool *tool,
-                                      union perf_event *event,
-                                      pid_t pid, pid_t tgid,
-                                      perf_event__handler_t process,
-                                      struct machine *machine,
-                                      bool mmap_data);
-
-int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
-                                      perf_event__handler_t process,
-                                      struct machine *machine);
-
 size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
index 095924a..de79c73 100644 (file)
 #include <inttypes.h>
 #include <poll.h>
 #include "cpumap.h"
+#include "util/mmap.h"
 #include "thread_map.h"
 #include "target.h"
 #include "evlist.h"
 #include "evsel.h"
 #include "debug.h"
 #include "units.h"
-#include "util.h"
+#include <internal/lib.h> // page_size
 #include "../perf.h"
 #include "asm/bug.h"
 #include "bpf-event.h"
@@ -49,18 +50,14 @@ int sigqueue(pid_t pid, int sig, const union sigval value);
 #endif
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
-#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
 
 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
                  struct perf_thread_map *threads)
 {
-       int i;
-
-       for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
-               INIT_HLIST_HEAD(&evlist->heads[i]);
        perf_evlist__init(&evlist->core);
        perf_evlist__set_maps(&evlist->core, cpus, threads);
-       fdarray__init(&evlist->pollfd, 64);
+       fdarray__init(&evlist->core.pollfd, 64);
        evlist->workload.pid = -1;
        evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
 }
@@ -108,7 +105,7 @@ struct evlist *perf_evlist__new_dummy(void)
  */
 void perf_evlist__set_id_pos(struct evlist *evlist)
 {
-       struct evsel *first = perf_evlist__first(evlist);
+       struct evsel *first = evlist__first(evlist);
 
        evlist->id_pos = first->id_pos;
        evlist->is_pos = first->is_pos;
@@ -124,7 +121,7 @@ static void perf_evlist__update_id_pos(struct evlist *evlist)
        perf_evlist__set_id_pos(evlist);
 }
 
-static void perf_evlist__purge(struct evlist *evlist)
+static void evlist__purge(struct evlist *evlist)
 {
        struct evsel *pos, *n;
 
@@ -137,11 +134,11 @@ static void perf_evlist__purge(struct evlist *evlist)
        evlist->core.nr_entries = 0;
 }
 
-void perf_evlist__exit(struct evlist *evlist)
+void evlist__exit(struct evlist *evlist)
 {
        zfree(&evlist->mmap);
        zfree(&evlist->overwrite_mmap);
-       fdarray__exit(&evlist->pollfd);
+       fdarray__exit(&evlist->core.pollfd);
 }
 
 void evlist__delete(struct evlist *evlist)
@@ -149,14 +146,14 @@ void evlist__delete(struct evlist *evlist)
        if (evlist == NULL)
                return;
 
-       perf_evlist__munmap(evlist);
+       evlist__munmap(evlist);
        evlist__close(evlist);
        perf_cpu_map__put(evlist->core.cpus);
        perf_thread_map__put(evlist->core.threads);
        evlist->core.cpus = NULL;
        evlist->core.threads = NULL;
-       perf_evlist__purge(evlist);
-       perf_evlist__exit(evlist);
+       evlist__purge(evlist);
+       evlist__exit(evlist);
        free(evlist);
 }
 
@@ -318,7 +315,7 @@ int perf_evlist__add_newtp(struct evlist *evlist,
 static int perf_evlist__nr_threads(struct evlist *evlist,
                                   struct evsel *evsel)
 {
-       if (evsel->system_wide)
+       if (evsel->core.system_wide)
                return 1;
        else
                return perf_thread_map__nr(evlist->core.threads);
@@ -401,128 +398,29 @@ int perf_evlist__enable_event_idx(struct evlist *evlist,
                return perf_evlist__enable_event_thread(evlist, evsel, idx);
 }
 
-int perf_evlist__alloc_pollfd(struct evlist *evlist)
+int evlist__add_pollfd(struct evlist *evlist, int fd)
 {
-       int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
-       int nr_threads = perf_thread_map__nr(evlist->core.threads);
-       int nfds = 0;
-       struct evsel *evsel;
-
-       evlist__for_each_entry(evlist, evsel) {
-               if (evsel->system_wide)
-                       nfds += nr_cpus;
-               else
-                       nfds += nr_cpus * nr_threads;
-       }
-
-       if (fdarray__available_entries(&evlist->pollfd) < nfds &&
-           fdarray__grow(&evlist->pollfd, nfds) < 0)
-               return -ENOMEM;
-
-       return 0;
-}
-
-static int __perf_evlist__add_pollfd(struct evlist *evlist, int fd,
-                                    struct perf_mmap *map, short revent)
-{
-       int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
-       /*
-        * Save the idx so that when we filter out fds POLLHUP'ed we can
-        * close the associated evlist->mmap[] entry.
-        */
-       if (pos >= 0) {
-               evlist->pollfd.priv[pos].ptr = map;
-
-               fcntl(fd, F_SETFL, O_NONBLOCK);
-       }
-
-       return pos;
-}
-
-int perf_evlist__add_pollfd(struct evlist *evlist, int fd)
-{
-       return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN);
+       return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN);
 }
 
 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
                                         void *arg __maybe_unused)
 {
-       struct perf_mmap *map = fda->priv[fd].ptr;
+       struct mmap *map = fda->priv[fd].ptr;
 
        if (map)
                perf_mmap__put(map);
 }
 
-int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
+int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
 {
-       return fdarray__filter(&evlist->pollfd, revents_and_mask,
+       return fdarray__filter(&evlist->core.pollfd, revents_and_mask,
                               perf_evlist__munmap_filtered, NULL);
 }
 
-int perf_evlist__poll(struct evlist *evlist, int timeout)
+int evlist__poll(struct evlist *evlist, int timeout)
 {
-       return fdarray__poll(&evlist->pollfd, timeout);
-}
-
-static void perf_evlist__id_hash(struct evlist *evlist,
-                                struct evsel *evsel,
-                                int cpu, int thread, u64 id)
-{
-       int hash;
-       struct perf_sample_id *sid = SID(evsel, cpu, thread);
-
-       sid->id = id;
-       sid->evsel = evsel;
-       hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
-       hlist_add_head(&sid->node, &evlist->heads[hash]);
-}
-
-void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel,
-                        int cpu, int thread, u64 id)
-{
-       perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
-       evsel->id[evsel->ids++] = id;
-}
-
-int perf_evlist__id_add_fd(struct evlist *evlist,
-                          struct evsel *evsel,
-                          int cpu, int thread, int fd)
-{
-       u64 read_data[4] = { 0, };
-       int id_idx = 1; /* The first entry is the counter value */
-       u64 id;
-       int ret;
-
-       ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
-       if (!ret)
-               goto add;
-
-       if (errno != ENOTTY)
-               return -1;
-
-       /* Legacy way to get event id.. All hail to old kernels! */
-
-       /*
-        * This way does not work with group format read, so bail
-        * out in that case.
-        */
-       if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
-               return -1;
-
-       if (!(evsel->core.attr.read_format & PERF_FORMAT_ID) ||
-           read(fd, &read_data, sizeof(read_data)) == -1)
-               return -1;
-
-       if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-               ++id_idx;
-       if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-               ++id_idx;
-
-       id = read_data[id_idx];
-
- add:
-       perf_evlist__id_add(evlist, evsel, cpu, thread, id);
-       return 0;
+       return perf_evlist__poll(&evlist->core, timeout);
 }
 
 static void perf_evlist__set_sid_idx(struct evlist *evlist,
@@ -535,7 +433,7 @@ static void perf_evlist__set_sid_idx(struct evlist *evlist,
                sid->cpu = evlist->core.cpus->map[cpu];
        else
                sid->cpu = -1;
-       if (!evsel->system_wide && evlist->core.threads && thread >= 0)
+       if (!evsel->core.system_wide && evlist->core.threads && thread >= 0)
                sid->tid = perf_thread_map__pid(evlist->core.threads, thread);
        else
                sid->tid = -1;
@@ -548,7 +446,7 @@ struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
        int hash;
 
        hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
-       head = &evlist->heads[hash];
+       head = &evlist->core.heads[hash];
 
        hlist_for_each_entry(sid, head, node)
                if (sid->id == id)
@@ -562,14 +460,14 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
        struct perf_sample_id *sid;
 
        if (evlist->core.nr_entries == 1 || !id)
-               return perf_evlist__first(evlist);
+               return evlist__first(evlist);
 
        sid = perf_evlist__id2sid(evlist, id);
        if (sid)
-               return sid->evsel;
+               return container_of(sid->evsel, struct evsel, core);
 
        if (!perf_evlist__sample_id_all(evlist))
-               return perf_evlist__first(evlist);
+               return evlist__first(evlist);
 
        return NULL;
 }
@@ -584,7 +482,7 @@ struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
 
        sid = perf_evlist__id2sid(evlist, id);
        if (sid)
-               return sid->evsel;
+               return container_of(sid->evsel, struct evsel, core);
 
        return NULL;
 }
@@ -613,7 +511,7 @@ static int perf_evlist__event2id(struct evlist *evlist,
 struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
                                            union perf_event *event)
 {
-       struct evsel *first = perf_evlist__first(evlist);
+       struct evsel *first = evlist__first(evlist);
        struct hlist_head *head;
        struct perf_sample_id *sid;
        int hash;
@@ -634,11 +532,11 @@ struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
                return first;
 
        hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
-       head = &evlist->heads[hash];
+       head = &evlist->core.heads[hash];
 
        hlist_for_each_entry(sid, head, node) {
                if (sid->id == id)
-                       return sid->evsel;
+                       return container_of(sid->evsel, struct evsel, core);
        }
        return NULL;
 }
@@ -650,8 +548,8 @@ static int perf_evlist__set_paused(struct evlist *evlist, bool value)
        if (!evlist->overwrite_mmap)
                return 0;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
-               int fd = evlist->overwrite_mmap[i].fd;
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
+               int fd = evlist->overwrite_mmap[i].core.fd;
                int err;
 
                if (fd < 0)
@@ -673,42 +571,42 @@ static int perf_evlist__resume(struct evlist *evlist)
        return perf_evlist__set_paused(evlist, false);
 }
 
-static void perf_evlist__munmap_nofree(struct evlist *evlist)
+static void evlist__munmap_nofree(struct evlist *evlist)
 {
        int i;
 
        if (evlist->mmap)
-               for (i = 0; i < evlist->nr_mmaps; i++)
+               for (i = 0; i < evlist->core.nr_mmaps; i++)
                        perf_mmap__munmap(&evlist->mmap[i]);
 
        if (evlist->overwrite_mmap)
-               for (i = 0; i < evlist->nr_mmaps; i++)
+               for (i = 0; i < evlist->core.nr_mmaps; i++)
                        perf_mmap__munmap(&evlist->overwrite_mmap[i]);
 }
 
-void perf_evlist__munmap(struct evlist *evlist)
+void evlist__munmap(struct evlist *evlist)
 {
-       perf_evlist__munmap_nofree(evlist);
+       evlist__munmap_nofree(evlist);
        zfree(&evlist->mmap);
        zfree(&evlist->overwrite_mmap);
 }
 
-static struct perf_mmap *perf_evlist__alloc_mmap(struct evlist *evlist,
-                                                bool overwrite)
+static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
+                                      bool overwrite)
 {
        int i;
-       struct perf_mmap *map;
+       struct mmap *map;
 
-       evlist->nr_mmaps = perf_cpu_map__nr(evlist->core.cpus);
+       evlist->core.nr_mmaps = perf_cpu_map__nr(evlist->core.cpus);
        if (perf_cpu_map__empty(evlist->core.cpus))
-               evlist->nr_mmaps = perf_thread_map__nr(evlist->core.threads);
-       map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+               evlist->core.nr_mmaps = perf_thread_map__nr(evlist->core.threads);
+       map = zalloc(evlist->core.nr_mmaps * sizeof(struct mmap));
        if (!map)
                return NULL;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
-               map[i].fd = -1;
-               map[i].overwrite = overwrite;
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
+               map[i].core.fd = -1;
+               map[i].core.overwrite = overwrite;
                /*
                 * When the perf_mmap() call is made we grab one refcount, plus
                 * one extra to let perf_mmap__consume() get the last
@@ -718,7 +616,7 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct evlist *evlist,
                 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
                 * thus does perf_mmap__get() on it.
                 */
-               refcount_set(&map[i].refcnt, 0);
+               refcount_set(&map[i].core.refcnt, 0);
        }
        return map;
 }
@@ -732,7 +630,7 @@ perf_evlist__should_poll(struct evlist *evlist __maybe_unused,
        return true;
 }
 
-static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx,
+static int evlist__mmap_per_evsel(struct evlist *evlist, int idx,
                                       struct mmap_params *mp, int cpu_idx,
                                       int thread, int *_output, int *_output_overwrite)
 {
@@ -741,7 +639,7 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx,
        int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx);
 
        evlist__for_each_entry(evlist, evsel) {
-               struct perf_mmap *maps = evlist->mmap;
+               struct mmap *maps = evlist->mmap;
                int *output = _output;
                int fd;
                int cpu;
@@ -752,7 +650,7 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx,
                        maps = evlist->overwrite_mmap;
 
                        if (!maps) {
-                               maps = perf_evlist__alloc_mmap(evlist, true);
+                               maps = evlist__alloc_mmap(evlist, true);
                                if (!maps)
                                        return -1;
                                evlist->overwrite_mmap = maps;
@@ -762,7 +660,7 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx,
                        mp->prot &= ~PROT_WRITE;
                }
 
-               if (evsel->system_wide && thread)
+               if (evsel->core.system_wide && thread)
                        continue;
 
                cpu = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu);
@@ -792,14 +690,14 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx,
                 * other events, so it should not need to be polled anyway.
                 * Therefore don't add it for polling.
                 */
-               if (!evsel->system_wide &&
-                   __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) {
+               if (!evsel->core.system_wide &&
+                    perf_evlist__add_pollfd(&evlist->core, fd, &maps[idx], revent) < 0) {
                        perf_mmap__put(&maps[idx]);
                        return -1;
                }
 
                if (evsel->core.attr.read_format & PERF_FORMAT_ID) {
-                       if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
+                       if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, cpu, thread,
                                                   fd) < 0)
                                return -1;
                        perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
@@ -810,7 +708,7 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx,
        return 0;
 }
 
-static int perf_evlist__mmap_per_cpu(struct evlist *evlist,
+static int evlist__mmap_per_cpu(struct evlist *evlist,
                                     struct mmap_params *mp)
 {
        int cpu, thread;
@@ -826,7 +724,7 @@ static int perf_evlist__mmap_per_cpu(struct evlist *evlist,
                                              true);
 
                for (thread = 0; thread < nr_threads; thread++) {
-                       if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
+                       if (evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
                                                        thread, &output, &output_overwrite))
                                goto out_unmap;
                }
@@ -835,11 +733,11 @@ static int perf_evlist__mmap_per_cpu(struct evlist *evlist,
        return 0;
 
 out_unmap:
-       perf_evlist__munmap_nofree(evlist);
+       evlist__munmap_nofree(evlist);
        return -1;
 }
 
-static int perf_evlist__mmap_per_thread(struct evlist *evlist,
+static int evlist__mmap_per_thread(struct evlist *evlist,
                                        struct mmap_params *mp)
 {
        int thread;
@@ -853,7 +751,7 @@ static int perf_evlist__mmap_per_thread(struct evlist *evlist,
                auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
                                              false);
 
-               if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
+               if (evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
                                                &output, &output_overwrite))
                        goto out_unmap;
        }
@@ -861,7 +759,7 @@ static int perf_evlist__mmap_per_thread(struct evlist *evlist,
        return 0;
 
 out_unmap:
-       perf_evlist__munmap_nofree(evlist);
+       evlist__munmap_nofree(evlist);
        return -1;
 }
 
@@ -888,7 +786,7 @@ unsigned long perf_event_mlock_kb_in_pages(void)
        return pages;
 }
 
-size_t perf_evlist__mmap_size(unsigned long pages)
+size_t evlist__mmap_size(unsigned long pages)
 {
        if (pages == UINT_MAX)
                pages = perf_event_mlock_kb_in_pages();
@@ -971,7 +869,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
 }
 
 /**
- * perf_evlist__mmap_ex - Create mmaps to receive events.
+ * evlist__mmap_ex - Create mmaps to receive events.
  * @evlist: list of events
  * @pages: map length in pages
  * @overwrite: overwrite older events?
@@ -979,7 +877,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
  * @auxtrace_overwrite - overwrite older auxtrace data?
  *
  * If @overwrite is %false the user needs to signal event consumption using
- * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
+ * perf_mmap__write_tail().  Using evlist__mmap_read() does this
  * automatically.
  *
  * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
@@ -987,7 +885,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
  *
  * Return: %0 on success, negative error code otherwise.
  */
-int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
+int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
                         unsigned int auxtrace_pages,
                         bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
                         int comp_level)
@@ -1004,36 +902,36 @@ int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
                                  .comp_level = comp_level };
 
        if (!evlist->mmap)
-               evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
+               evlist->mmap = evlist__alloc_mmap(evlist, false);
        if (!evlist->mmap)
                return -ENOMEM;
 
-       if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
+       if (evlist->core.pollfd.entries == NULL && perf_evlist__alloc_pollfd(&evlist->core) < 0)
                return -ENOMEM;
 
-       evlist->mmap_len = perf_evlist__mmap_size(pages);
-       pr_debug("mmap size %zuB\n", evlist->mmap_len);
-       mp.mask = evlist->mmap_len - page_size - 1;
+       evlist->core.mmap_len = evlist__mmap_size(pages);
+       pr_debug("mmap size %zuB\n", evlist->core.mmap_len);
+       mp.mask = evlist->core.mmap_len - page_size - 1;
 
-       auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
+       auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len,
                                   auxtrace_pages, auxtrace_overwrite);
 
        evlist__for_each_entry(evlist, evsel) {
                if ((evsel->core.attr.read_format & PERF_FORMAT_ID) &&
-                   evsel->sample_id == NULL &&
-                   perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
+                   evsel->core.sample_id == NULL &&
+                   perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr) < 0)
                        return -ENOMEM;
        }
 
        if (perf_cpu_map__empty(cpus))
-               return perf_evlist__mmap_per_thread(evlist, &mp);
+               return evlist__mmap_per_thread(evlist, &mp);
 
-       return perf_evlist__mmap_per_cpu(evlist, &mp);
+       return evlist__mmap_per_cpu(evlist, &mp);
 }
 
-int perf_evlist__mmap(struct evlist *evlist, unsigned int pages)
+int evlist__mmap(struct evlist *evlist, unsigned int pages)
 {
-       return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
+       return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
 }
 
 int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
@@ -1225,7 +1123,7 @@ u64 perf_evlist__combined_branch_type(struct evlist *evlist)
 
 bool perf_evlist__valid_read_format(struct evlist *evlist)
 {
-       struct evsel *first = perf_evlist__first(evlist), *pos = first;
+       struct evsel *first = evlist__first(evlist), *pos = first;
        u64 read_format = first->core.attr.read_format;
        u64 sample_type = first->core.attr.sample_type;
 
@@ -1243,15 +1141,9 @@ bool perf_evlist__valid_read_format(struct evlist *evlist)
        return true;
 }
 
-u64 perf_evlist__read_format(struct evlist *evlist)
-{
-       struct evsel *first = perf_evlist__first(evlist);
-       return first->core.attr.read_format;
-}
-
 u16 perf_evlist__id_hdr_size(struct evlist *evlist)
 {
-       struct evsel *first = perf_evlist__first(evlist);
+       struct evsel *first = evlist__first(evlist);
        struct perf_sample *data;
        u64 sample_type;
        u16 size = 0;
@@ -1284,7 +1176,7 @@ out:
 
 bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
 {
-       struct evsel *first = perf_evlist__first(evlist), *pos = first;
+       struct evsel *first = evlist__first(evlist), *pos = first;
 
        evlist__for_each_entry_continue(evlist, pos) {
                if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all)
@@ -1296,7 +1188,7 @@ bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
 
 bool perf_evlist__sample_id_all(struct evlist *evlist)
 {
-       struct evsel *first = perf_evlist__first(evlist);
+       struct evsel *first = evlist__first(evlist);
        return first->core.attr.sample_id_all;
 }
 
@@ -1529,19 +1421,6 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
        return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
 }
 
-size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp)
-{
-       struct evsel *evsel;
-       size_t printed = 0;
-
-       evlist__for_each_entry(evlist, evsel) {
-               printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
-                                  perf_evsel__name(evsel));
-       }
-
-       return printed + fprintf(fp, "\n");
-}
-
 int perf_evlist__strerror_open(struct evlist *evlist,
                               int err, char *buf, size_t size)
 {
@@ -1571,7 +1450,7 @@ int perf_evlist__strerror_open(struct evlist *evlist,
                                    "Hint:\tThe current value is %d.", value);
                break;
        case EINVAL: {
-               struct evsel *first = perf_evlist__first(evlist);
+               struct evsel *first = evlist__first(evlist);
                int max_freq;
 
                if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
@@ -1599,7 +1478,7 @@ out_default:
 int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
 {
        char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
-       int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
+       int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
 
        switch (err) {
        case EPERM:
@@ -1633,7 +1512,7 @@ void perf_evlist__to_front(struct evlist *evlist,
        struct evsel *evsel, *n;
        LIST_HEAD(move);
 
-       if (move_evsel == perf_evlist__first(evlist))
+       if (move_evsel == evlist__first(evlist))
                return;
 
        evlist__for_each_entry_safe(evlist, n, evsel) {
@@ -1754,7 +1633,7 @@ bool perf_evlist__exclude_kernel(struct evlist *evlist)
 void perf_evlist__force_leader(struct evlist *evlist)
 {
        if (!evlist->nr_groups) {
-               struct evsel *leader = perf_evlist__first(evlist);
+               struct evsel *leader = evlist__first(evlist);
 
                perf_evlist__set_leader(evlist);
                leader->forced_leader = true;
@@ -1780,7 +1659,7 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
                        is_open = false;
                if (c2->leader == leader) {
                        if (is_open)
-                               evsel__close(c2);
+                               perf_evsel__close(&c2->core);
                        c2->leader = c2;
                        c2->core.nr_members = 0;
                }
@@ -1844,10 +1723,10 @@ static void *perf_evlist__poll_thread(void *arg)
                        draining = true;
 
                if (!draining)
-                       perf_evlist__poll(evlist, 1000);
+                       evlist__poll(evlist, 1000);
 
-               for (i = 0; i < evlist->nr_mmaps; i++) {
-                       struct perf_mmap *map = &evlist->mmap[i];
+               for (i = 0; i < evlist->core.nr_mmaps; i++) {
+                       struct mmap *map = &evlist->mmap[i];
                        union perf_event *event;
 
                        if (perf_mmap__read_init(map))
@@ -1889,7 +1768,7 @@ int perf_evlist__start_sb_thread(struct evlist *evlist,
                        goto out_delete_evlist;
        }
 
-       if (perf_evlist__mmap(evlist, UINT_MAX))
+       if (evlist__mmap(evlist, UINT_MAX))
                goto out_delete_evlist;
 
        evlist__for_each_entry(evlist, counter) {
index a55f0f2..7cfe755 100644 (file)
@@ -7,11 +7,11 @@
 #include <linux/refcount.h>
 #include <linux/list.h>
 #include <api/fd/array.h>
-#include <stdio.h>
 #include <internal/evlist.h>
+#include <internal/evsel.h>
 #include "events_stats.h"
 #include "evsel.h"
-#include "mmap.h"
+#include <pthread.h>
 #include <signal.h>
 #include <unistd.h>
 
@@ -20,16 +20,38 @@ struct thread_map;
 struct perf_cpu_map;
 struct record_opts;
 
-#define PERF_EVLIST__HLIST_BITS 8
-#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
+/*
+ * State machine of bkw_mmap_state:
+ *
+ *                     .________________(forbid)_____________.
+ *                     |                                     V
+ * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
+ *                     ^  ^              |   ^               |
+ *                     |  |__(forbid)____/   |___(forbid)___/|
+ *                     |                                     |
+ *                      \_________________(3)_______________/
+ *
+ * NOTREADY     : Backward ring buffers are not ready
+ * RUNNING      : Backward ring buffers are recording
+ * DATA_PENDING : We are required to collect data from backward ring buffers
+ * EMPTY        : We have collected data from backward ring buffers.
+ *
+ * (0): Setup backward ring buffer
+ * (1): Pause ring buffers for reading
+ * (2): Read from ring buffers
+ * (3): Resume ring buffers for recording
+ */
+enum bkw_mmap_state {
+       BKW_MMAP_NOTREADY,
+       BKW_MMAP_RUNNING,
+       BKW_MMAP_DATA_PENDING,
+       BKW_MMAP_EMPTY,
+};
 
 struct evlist {
        struct perf_evlist core;
-       struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
        int              nr_groups;
-       int              nr_mmaps;
        bool             enabled;
-       size_t           mmap_len;
        int              id_pos;
        int              is_pos;
        u64              combined_sample_type;
@@ -38,9 +60,8 @@ struct evlist {
                int     cork_fd;
                pid_t   pid;
        } workload;
-       struct fdarray   pollfd;
-       struct perf_mmap *mmap;
-       struct perf_mmap *overwrite_mmap;
+       struct mmap *mmap;
+       struct mmap *overwrite_mmap;
        struct evsel *selected;
        struct events_stats stats;
        struct perf_env *env;
@@ -65,7 +86,7 @@ struct evlist *perf_evlist__new_default(void);
 struct evlist *perf_evlist__new_dummy(void);
 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
                  struct perf_thread_map *threads);
-void perf_evlist__exit(struct evlist *evlist);
+void evlist__exit(struct evlist *evlist);
 void evlist__delete(struct evlist *evlist);
 
 void evlist__add(struct evlist *evlist, struct evsel *entry);
@@ -119,17 +140,10 @@ struct evsel *
 perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
                                     const char *name);
 
-void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel,
-                        int cpu, int thread, u64 id);
-int perf_evlist__id_add_fd(struct evlist *evlist,
-                          struct evsel *evsel,
-                          int cpu, int thread, int fd);
-
-int perf_evlist__add_pollfd(struct evlist *evlist, int fd);
-int perf_evlist__alloc_pollfd(struct evlist *evlist);
-int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask);
+int evlist__add_pollfd(struct evlist *evlist, int fd);
+int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask);
 
-int perf_evlist__poll(struct evlist *evlist, int timeout);
+int evlist__poll(struct evlist *evlist, int timeout);
 
 struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id);
 struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
@@ -139,7 +153,7 @@ struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id);
 
 void perf_evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state);
 
-void perf_evlist__mmap_consume(struct evlist *evlist, int idx);
+void evlist__mmap_consume(struct evlist *evlist, int idx);
 
 int evlist__open(struct evlist *evlist);
 void evlist__close(struct evlist *evlist);
@@ -170,14 +184,14 @@ int perf_evlist__parse_mmap_pages(const struct option *opt,
 
 unsigned long perf_event_mlock_kb_in_pages(void);
 
-int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
+int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
                         unsigned int auxtrace_pages,
                         bool auxtrace_overwrite, int nr_cblocks,
                         int affinity, int flush, int comp_level);
-int perf_evlist__mmap(struct evlist *evlist, unsigned int pages);
-void perf_evlist__munmap(struct evlist *evlist);
+int evlist__mmap(struct evlist *evlist, unsigned int pages);
+void evlist__munmap(struct evlist *evlist);
 
-size_t perf_evlist__mmap_size(unsigned long pages);
+size_t evlist__mmap_size(unsigned long pages);
 
 void evlist__disable(struct evlist *evlist);
 void evlist__enable(struct evlist *evlist);
@@ -195,7 +209,6 @@ int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
 void __perf_evlist__set_leader(struct list_head *list);
 void perf_evlist__set_leader(struct evlist *evlist);
 
-u64 perf_evlist__read_format(struct evlist *evlist);
 u64 __perf_evlist__combined_sample_type(struct evlist *evlist);
 u64 perf_evlist__combined_sample_type(struct evlist *evlist);
 u64 perf_evlist__combined_branch_type(struct evlist *evlist);
@@ -221,17 +234,19 @@ static inline bool perf_evlist__empty(struct evlist *evlist)
        return list_empty(&evlist->core.entries);
 }
 
-static inline struct evsel *perf_evlist__first(struct evlist *evlist)
+static inline struct evsel *evlist__first(struct evlist *evlist)
 {
-       return list_entry(evlist->core.entries.next, struct evsel, core.node);
+       struct perf_evsel *evsel = perf_evlist__first(&evlist->core);
+
+       return container_of(evsel, struct evsel, core);
 }
 
-static inline struct evsel *perf_evlist__last(struct evlist *evlist)
+static inline struct evsel *evlist__last(struct evlist *evlist)
 {
-       return list_entry(evlist->core.entries.prev, struct evsel, core.node);
-}
+       struct perf_evsel *evsel = perf_evlist__last(&evlist->core);
 
-size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp);
+       return container_of(evsel, struct evsel, core);
+}
 
 int perf_evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size);
 int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
index 8582538..abc7fda 100644 (file)
 #include "counts.h"
 #include "event.h"
 #include "evsel.h"
+#include "util/env.h"
+#include "util/evsel_config.h"
+#include "util/evsel_fprintf.h"
 #include "evlist.h"
-#include "cpumap.h"
+#include <perf/cpumap.h>
 #include "thread_map.h"
 #include "target.h"
 #include "perf_regs.h"
@@ -45,6 +48,7 @@
 #include "../perf-sys.h"
 #include "util/parse-branch-options.h"
 #include <internal/xyarray.h>
+#include <internal/lib.h>
 
 #include <linux/ctype.h>
 
@@ -1226,36 +1230,6 @@ int evsel__disable(struct evsel *evsel)
        return err;
 }
 
-int perf_evsel__alloc_id(struct evsel *evsel, int ncpus, int nthreads)
-{
-       if (ncpus == 0 || nthreads == 0)
-               return 0;
-
-       if (evsel->system_wide)
-               nthreads = 1;
-
-       evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
-       if (evsel->sample_id == NULL)
-               return -ENOMEM;
-
-       evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
-       if (evsel->id == NULL) {
-               xyarray__delete(evsel->sample_id);
-               evsel->sample_id = NULL;
-               return -ENOMEM;
-       }
-
-       return 0;
-}
-
-static void perf_evsel__free_id(struct evsel *evsel)
-{
-       xyarray__delete(evsel->sample_id);
-       evsel->sample_id = NULL;
-       zfree(&evsel->id);
-       evsel->ids = 0;
-}
-
 static void perf_evsel__free_config_terms(struct evsel *evsel)
 {
        struct perf_evsel_config_term *term, *h;
@@ -1272,7 +1246,7 @@ void perf_evsel__exit(struct evsel *evsel)
        assert(evsel->evlist == NULL);
        perf_evsel__free_counts(evsel);
        perf_evsel__free_fd(&evsel->core);
-       perf_evsel__free_id(evsel);
+       perf_evsel__free_id(&evsel->core);
        perf_evsel__free_config_terms(evsel);
        cgroup__put(evsel->cgrp);
        perf_cpu_map__put(evsel->core.cpus);
@@ -1472,152 +1446,6 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread)
        return fd;
 }
 
-struct bit_names {
-       int bit;
-       const char *name;
-};
-
-static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits)
-{
-       bool first_bit = true;
-       int i = 0;
-
-       do {
-               if (value & bits[i].bit) {
-                       buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name);
-                       first_bit = false;
-               }
-       } while (bits[++i].name != NULL);
-}
-
-static void __p_sample_type(char *buf, size_t size, u64 value)
-{
-#define bit_name(n) { PERF_SAMPLE_##n, #n }
-       struct bit_names bits[] = {
-               bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
-               bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
-               bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
-               bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
-               bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
-               bit_name(WEIGHT), bit_name(PHYS_ADDR),
-               { .name = NULL, }
-       };
-#undef bit_name
-       __p_bits(buf, size, value, bits);
-}
-
-static void __p_branch_sample_type(char *buf, size_t size, u64 value)
-{
-#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n }
-       struct bit_names bits[] = {
-               bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY),
-               bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL),
-               bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
-               bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
-               bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
-               { .name = NULL, }
-       };
-#undef bit_name
-       __p_bits(buf, size, value, bits);
-}
-
-static void __p_read_format(char *buf, size_t size, u64 value)
-{
-#define bit_name(n) { PERF_FORMAT_##n, #n }
-       struct bit_names bits[] = {
-               bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
-               bit_name(ID), bit_name(GROUP),
-               { .name = NULL, }
-       };
-#undef bit_name
-       __p_bits(buf, size, value, bits);
-}
-
-#define BUF_SIZE               1024
-
-#define p_hex(val)             snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
-#define p_unsigned(val)                snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
-#define p_signed(val)          snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
-#define p_sample_type(val)     __p_sample_type(buf, BUF_SIZE, val)
-#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
-#define p_read_format(val)     __p_read_format(buf, BUF_SIZE, val)
-
-#define PRINT_ATTRn(_n, _f, _p)                                \
-do {                                                   \
-       if (attr->_f) {                                 \
-               _p(attr->_f);                           \
-               ret += attr__fprintf(fp, _n, buf, priv);\
-       }                                               \
-} while (0)
-
-#define PRINT_ATTRf(_f, _p)    PRINT_ATTRn(#_f, _f, _p)
-
-int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
-                            attr__fprintf_f attr__fprintf, void *priv)
-{
-       char buf[BUF_SIZE];
-       int ret = 0;
-
-       PRINT_ATTRf(type, p_unsigned);
-       PRINT_ATTRf(size, p_unsigned);
-       PRINT_ATTRf(config, p_hex);
-       PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned);
-       PRINT_ATTRf(sample_type, p_sample_type);
-       PRINT_ATTRf(read_format, p_read_format);
-
-       PRINT_ATTRf(disabled, p_unsigned);
-       PRINT_ATTRf(inherit, p_unsigned);
-       PRINT_ATTRf(pinned, p_unsigned);
-       PRINT_ATTRf(exclusive, p_unsigned);
-       PRINT_ATTRf(exclude_user, p_unsigned);
-       PRINT_ATTRf(exclude_kernel, p_unsigned);
-       PRINT_ATTRf(exclude_hv, p_unsigned);
-       PRINT_ATTRf(exclude_idle, p_unsigned);
-       PRINT_ATTRf(mmap, p_unsigned);
-       PRINT_ATTRf(comm, p_unsigned);
-       PRINT_ATTRf(freq, p_unsigned);
-       PRINT_ATTRf(inherit_stat, p_unsigned);
-       PRINT_ATTRf(enable_on_exec, p_unsigned);
-       PRINT_ATTRf(task, p_unsigned);
-       PRINT_ATTRf(watermark, p_unsigned);
-       PRINT_ATTRf(precise_ip, p_unsigned);
-       PRINT_ATTRf(mmap_data, p_unsigned);
-       PRINT_ATTRf(sample_id_all, p_unsigned);
-       PRINT_ATTRf(exclude_host, p_unsigned);
-       PRINT_ATTRf(exclude_guest, p_unsigned);
-       PRINT_ATTRf(exclude_callchain_kernel, p_unsigned);
-       PRINT_ATTRf(exclude_callchain_user, p_unsigned);
-       PRINT_ATTRf(mmap2, p_unsigned);
-       PRINT_ATTRf(comm_exec, p_unsigned);
-       PRINT_ATTRf(use_clockid, p_unsigned);
-       PRINT_ATTRf(context_switch, p_unsigned);
-       PRINT_ATTRf(write_backward, p_unsigned);
-       PRINT_ATTRf(namespaces, p_unsigned);
-       PRINT_ATTRf(ksymbol, p_unsigned);
-       PRINT_ATTRf(bpf_event, p_unsigned);
-       PRINT_ATTRf(aux_output, p_unsigned);
-
-       PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
-       PRINT_ATTRf(bp_type, p_unsigned);
-       PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
-       PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
-       PRINT_ATTRf(branch_sample_type, p_branch_sample_type);
-       PRINT_ATTRf(sample_regs_user, p_hex);
-       PRINT_ATTRf(sample_stack_user, p_unsigned);
-       PRINT_ATTRf(clockid, p_signed);
-       PRINT_ATTRf(sample_regs_intr, p_hex);
-       PRINT_ATTRf(aux_watermark, p_unsigned);
-       PRINT_ATTRf(sample_max_stack, p_unsigned);
-
-       return ret;
-}
-
-static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
-                               void *priv __maybe_unused)
-{
-       return fprintf(fp, "  %-32s %s\n", name, val);
-}
-
 static void perf_evsel__remove_fd(struct evsel *pos,
                                  int nr_cpus, int nr_threads,
                                  int thread_idx)
@@ -1662,7 +1490,7 @@ static bool ignore_missing_thread(struct evsel *evsel,
                return false;
 
        /* The system wide setup does not work with threads. */
-       if (evsel->system_wide)
+       if (evsel->core.system_wide)
                return false;
 
        /* The -ESRCH is perf event syscall errno for pid's not found. */
@@ -1688,6 +1516,12 @@ static bool ignore_missing_thread(struct evsel *evsel,
        return true;
 }
 
+static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
+                               void *priv __maybe_unused)
+{
+       return fprintf(fp, "  %-32s %s\n", name, val);
+}
+
 static void display_attr(struct perf_event_attr *attr)
 {
        if (verbose >= 2) {
@@ -1771,7 +1605,7 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
                threads = empty_thread_map;
        }
 
-       if (evsel->system_wide)
+       if (evsel->core.system_wide)
                nthreads = 1;
        else
                nthreads = threads->nr;
@@ -1818,7 +1652,7 @@ retry_sample_id:
                for (thread = 0; thread < nthreads; thread++) {
                        int fd, group_fd;
 
-                       if (!evsel->cgrp && !evsel->system_wide)
+                       if (!evsel->cgrp && !evsel->core.system_wide)
                                pid = perf_thread_map__pid(threads, thread);
 
                        group_fd = get_group_fd(evsel, cpu, thread);
@@ -1991,7 +1825,7 @@ out_close:
 void evsel__close(struct evsel *evsel)
 {
        perf_evsel__close(&evsel->core);
-       perf_evsel__free_id(evsel);
+       perf_evsel__free_id(&evsel->core);
 }
 
 int perf_evsel__open_per_cpu(struct evsel *evsel,
@@ -2419,283 +2253,6 @@ int perf_evsel__parse_sample_timestamp(struct evsel *evsel,
        return 0;
 }
 
-size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
-                                    u64 read_format)
-{
-       size_t sz, result = sizeof(struct perf_record_sample);
-
-       if (type & PERF_SAMPLE_IDENTIFIER)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_IP)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_TID)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_TIME)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_ADDR)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_ID)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_STREAM_ID)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_CPU)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_PERIOD)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_READ) {
-               result += sizeof(u64);
-               if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-                       result += sizeof(u64);
-               if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-                       result += sizeof(u64);
-               /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
-               if (read_format & PERF_FORMAT_GROUP) {
-                       sz = sample->read.group.nr *
-                            sizeof(struct sample_read_value);
-                       result += sz;
-               } else {
-                       result += sizeof(u64);
-               }
-       }
-
-       if (type & PERF_SAMPLE_CALLCHAIN) {
-               sz = (sample->callchain->nr + 1) * sizeof(u64);
-               result += sz;
-       }
-
-       if (type & PERF_SAMPLE_RAW) {
-               result += sizeof(u32);
-               result += sample->raw_size;
-       }
-
-       if (type & PERF_SAMPLE_BRANCH_STACK) {
-               sz = sample->branch_stack->nr * sizeof(struct branch_entry);
-               sz += sizeof(u64);
-               result += sz;
-       }
-
-       if (type & PERF_SAMPLE_REGS_USER) {
-               if (sample->user_regs.abi) {
-                       result += sizeof(u64);
-                       sz = hweight64(sample->user_regs.mask) * sizeof(u64);
-                       result += sz;
-               } else {
-                       result += sizeof(u64);
-               }
-       }
-
-       if (type & PERF_SAMPLE_STACK_USER) {
-               sz = sample->user_stack.size;
-               result += sizeof(u64);
-               if (sz) {
-                       result += sz;
-                       result += sizeof(u64);
-               }
-       }
-
-       if (type & PERF_SAMPLE_WEIGHT)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_DATA_SRC)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_TRANSACTION)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_REGS_INTR) {
-               if (sample->intr_regs.abi) {
-                       result += sizeof(u64);
-                       sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
-                       result += sz;
-               } else {
-                       result += sizeof(u64);
-               }
-       }
-
-       if (type & PERF_SAMPLE_PHYS_ADDR)
-               result += sizeof(u64);
-
-       return result;
-}
-
-int perf_event__synthesize_sample(union perf_event *event, u64 type,
-                                 u64 read_format,
-                                 const struct perf_sample *sample)
-{
-       __u64 *array;
-       size_t sz;
-       /*
-        * used for cross-endian analysis. See git commit 65014ab3
-        * for why this goofiness is needed.
-        */
-       union u64_swap u;
-
-       array = event->sample.array;
-
-       if (type & PERF_SAMPLE_IDENTIFIER) {
-               *array = sample->id;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_IP) {
-               *array = sample->ip;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_TID) {
-               u.val32[0] = sample->pid;
-               u.val32[1] = sample->tid;
-               *array = u.val64;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_TIME) {
-               *array = sample->time;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_ADDR) {
-               *array = sample->addr;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_ID) {
-               *array = sample->id;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_STREAM_ID) {
-               *array = sample->stream_id;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_CPU) {
-               u.val32[0] = sample->cpu;
-               u.val32[1] = 0;
-               *array = u.val64;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_PERIOD) {
-               *array = sample->period;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_READ) {
-               if (read_format & PERF_FORMAT_GROUP)
-                       *array = sample->read.group.nr;
-               else
-                       *array = sample->read.one.value;
-               array++;
-
-               if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-                       *array = sample->read.time_enabled;
-                       array++;
-               }
-
-               if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-                       *array = sample->read.time_running;
-                       array++;
-               }
-
-               /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
-               if (read_format & PERF_FORMAT_GROUP) {
-                       sz = sample->read.group.nr *
-                            sizeof(struct sample_read_value);
-                       memcpy(array, sample->read.group.values, sz);
-                       array = (void *)array + sz;
-               } else {
-                       *array = sample->read.one.id;
-                       array++;
-               }
-       }
-
-       if (type & PERF_SAMPLE_CALLCHAIN) {
-               sz = (sample->callchain->nr + 1) * sizeof(u64);
-               memcpy(array, sample->callchain, sz);
-               array = (void *)array + sz;
-       }
-
-       if (type & PERF_SAMPLE_RAW) {
-               u.val32[0] = sample->raw_size;
-               *array = u.val64;
-               array = (void *)array + sizeof(u32);
-
-               memcpy(array, sample->raw_data, sample->raw_size);
-               array = (void *)array + sample->raw_size;
-       }
-
-       if (type & PERF_SAMPLE_BRANCH_STACK) {
-               sz = sample->branch_stack->nr * sizeof(struct branch_entry);
-               sz += sizeof(u64);
-               memcpy(array, sample->branch_stack, sz);
-               array = (void *)array + sz;
-       }
-
-       if (type & PERF_SAMPLE_REGS_USER) {
-               if (sample->user_regs.abi) {
-                       *array++ = sample->user_regs.abi;
-                       sz = hweight64(sample->user_regs.mask) * sizeof(u64);
-                       memcpy(array, sample->user_regs.regs, sz);
-                       array = (void *)array + sz;
-               } else {
-                       *array++ = 0;
-               }
-       }
-
-       if (type & PERF_SAMPLE_STACK_USER) {
-               sz = sample->user_stack.size;
-               *array++ = sz;
-               if (sz) {
-                       memcpy(array, sample->user_stack.data, sz);
-                       array = (void *)array + sz;
-                       *array++ = sz;
-               }
-       }
-
-       if (type & PERF_SAMPLE_WEIGHT) {
-               *array = sample->weight;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_DATA_SRC) {
-               *array = sample->data_src;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_TRANSACTION) {
-               *array = sample->transaction;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_REGS_INTR) {
-               if (sample->intr_regs.abi) {
-                       *array++ = sample->intr_regs.abi;
-                       sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
-                       memcpy(array, sample->intr_regs.regs, sz);
-                       array = (void *)array + sz;
-               } else {
-                       *array++ = 0;
-               }
-       }
-
-       if (type & PERF_SAMPLE_PHYS_ADDR) {
-               *array = sample->phys_addr;
-               array++;
-       }
-
-       return 0;
-}
-
 struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name)
 {
        return tep_find_field(evsel->tp_format, name);
@@ -2811,9 +2368,11 @@ bool perf_evsel__fallback(struct evsel *evsel, int err,
                if (evsel->name)
                        free(evsel->name);
                evsel->name = new_name;
-               scnprintf(msg, msgsize,
-"kernel.perf_event_paranoid=%d, trying to fall back to excluding kernel samples", paranoid);
+               scnprintf(msg, msgsize, "kernel.perf_event_paranoid=%d, trying "
+                         "to fall back to excluding kernel and hypervisor "
+                         " samples", paranoid);
                evsel->core.attr.exclude_kernel = 1;
+               evsel->core.attr.exclude_hv     = 1;
 
                return true;
        }
@@ -2954,7 +2513,7 @@ struct perf_env *perf_evsel__env(struct evsel *evsel)
 {
        if (evsel && evsel->evlist)
                return evsel->evlist->env;
-       return NULL;
+       return &perf_env;
 }
 
 static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
@@ -2966,7 +2525,7 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
                     thread++) {
                        int fd = FD(evsel, cpu, thread);
 
-                       if (perf_evlist__id_add_fd(evlist, evsel,
+                       if (perf_evlist__id_add_fd(&evlist->core, &evsel->core,
                                                   cpu, thread, fd) < 0)
                                return -1;
                }
@@ -2980,7 +2539,7 @@ int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
        struct perf_cpu_map *cpus = evsel->core.cpus;
        struct perf_thread_map *threads = evsel->core.threads;
 
-       if (perf_evsel__alloc_id(evsel, cpus->nr, threads->nr))
+       if (perf_evsel__alloc_id(&evsel->core, cpus->nr, threads->nr))
                return -ENOMEM;
 
        return store_evsel_ids(evsel, evlist);
index 68321d1..ddc5ee6 100644 (file)
@@ -4,7 +4,6 @@
 
 #include <linux/list.h>
 #include <stdbool.h>
-#include <stdio.h>
 #include <sys/types.h>
 #include <linux/perf_event.h>
 #include <linux/types.h>
 #include "symbol_conf.h"
 #include <internal/cpumap.h>
 
-struct addr_location;
-struct evsel;
-union perf_event;
-
-/*
- * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
- * more than one entry in the evlist.
- */
-struct perf_sample_id {
-       struct hlist_node       node;
-       u64                     id;
-       struct evsel            *evsel;
-       /*
-       * 'idx' will be used for AUX area sampling. A sample will have AUX area
-       * data that will be queued for decoding, where there are separate
-       * queues for each CPU (per-cpu tracing) or task (per-thread tracing).
-       * The sample ID can be used to lookup 'idx' which is effectively the
-       * queue number.
-       */
-       int                     idx;
-       int                     cpu;
-       pid_t                   tid;
-
-       /* Holds total ID period value for PERF_SAMPLE_READ processing. */
-       u64                     period;
-};
-
+struct bpf_object;
 struct cgroup;
-
-/*
- * The 'struct perf_evsel_config_term' is used to pass event
- * specific configuration data to perf_evsel__config routine.
- * It is allocated within event parsing and attached to
- * perf_evsel::config_terms list head.
-*/
-enum term_type {
-       PERF_EVSEL__CONFIG_TERM_PERIOD,
-       PERF_EVSEL__CONFIG_TERM_FREQ,
-       PERF_EVSEL__CONFIG_TERM_TIME,
-       PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
-       PERF_EVSEL__CONFIG_TERM_STACK_USER,
-       PERF_EVSEL__CONFIG_TERM_INHERIT,
-       PERF_EVSEL__CONFIG_TERM_MAX_STACK,
-       PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
-       PERF_EVSEL__CONFIG_TERM_OVERWRITE,
-       PERF_EVSEL__CONFIG_TERM_DRV_CFG,
-       PERF_EVSEL__CONFIG_TERM_BRANCH,
-       PERF_EVSEL__CONFIG_TERM_PERCORE,
-       PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT,
-};
-
-struct perf_evsel_config_term {
-       struct list_head        list;
-       enum term_type  type;
-       union {
-               u64     period;
-               u64     freq;
-               bool    time;
-               char    *callgraph;
-               char    *drv_cfg;
-               u64     stack_user;
-               int     max_stack;
-               bool    inherit;
-               bool    overwrite;
-               char    *branch;
-               unsigned long max_events;
-               bool    percore;
-               bool    aux_output;
-       } val;
-       bool weak;
-};
-
+struct perf_counts;
 struct perf_stat_evsel;
+union perf_event;
 
 typedef int (perf_evsel__sb_cb_t)(union perf_event *event, void *data);
 
@@ -94,10 +25,6 @@ enum perf_tool_event {
        PERF_TOOL_DURATION_TIME = 1,
 };
 
-struct bpf_object;
-struct perf_counts;
-struct xyarray;
-
 /** struct evsel - event selector
  *
  * @evlist - evlist this evsel is in, if it is in one.
@@ -117,12 +44,9 @@ struct evsel {
        struct perf_evsel       core;
        struct evlist   *evlist;
        char                    *filter;
-       struct xyarray          *sample_id;
-       u64                     *id;
        struct perf_counts      *counts;
        struct perf_counts      *prev_raw_counts;
        int                     idx;
-       u32                     ids;
        unsigned long           max_events;
        unsigned long           nr_events_printed;
        char                    *name;
@@ -146,7 +70,6 @@ struct evsel {
        bool                    disabled;
        bool                    no_aux_samples;
        bool                    immediate;
-       bool                    system_wide;
        bool                    tracking;
        bool                    per_pkg;
        bool                    precise_max;
@@ -179,11 +102,6 @@ struct evsel {
        } side_band;
 };
 
-union u64_swap {
-       u64 val64;
-       u32 val32[2];
-};
-
 struct perf_missing_features {
        bool sample_id_all;
        bool exclude_guest;
@@ -282,8 +200,6 @@ const char *perf_evsel__name(struct evsel *evsel);
 const char *perf_evsel__group_name(struct evsel *evsel);
 int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
 
-int perf_evsel__alloc_id(struct evsel *evsel, int ncpus, int nthreads);
-
 void __perf_evsel__set_sample_bit(struct evsel *evsel,
                                  enum perf_event_sample_format bit);
 void __perf_evsel__reset_sample_bit(struct evsel *evsel,
@@ -439,37 +355,6 @@ static inline bool perf_evsel__is_clock(struct evsel *evsel)
               perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
 }
 
-struct perf_attr_details {
-       bool freq;
-       bool verbose;
-       bool event_group;
-       bool force;
-       bool trace_fields;
-};
-
-int perf_evsel__fprintf(struct evsel *evsel,
-                       struct perf_attr_details *details, FILE *fp);
-
-#define EVSEL__PRINT_IP                        (1<<0)
-#define EVSEL__PRINT_SYM               (1<<1)
-#define EVSEL__PRINT_DSO               (1<<2)
-#define EVSEL__PRINT_SYMOFFSET         (1<<3)
-#define EVSEL__PRINT_ONELINE           (1<<4)
-#define EVSEL__PRINT_SRCLINE           (1<<5)
-#define EVSEL__PRINT_UNKNOWN_AS_ADDR   (1<<6)
-#define EVSEL__PRINT_CALLCHAIN_ARROW   (1<<7)
-#define EVSEL__PRINT_SKIP_IGNORED      (1<<8)
-
-struct callchain_cursor;
-
-int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
-                             unsigned int print_opts,
-                             struct callchain_cursor *cursor, FILE *fp);
-
-int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
-                       int left_alignment, unsigned int print_opts,
-                       struct callchain_cursor *cursor, FILE *fp);
-
 bool perf_evsel__fallback(struct evsel *evsel, int err,
                          char *msg, size_t msgsize);
 int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
@@ -502,11 +387,6 @@ static inline bool evsel__has_callchain(const struct evsel *evsel)
        return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0;
 }
 
-typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
-
-int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
-                            attr__fprintf_f attr__fprintf, void *priv);
-
 struct perf_env *perf_evsel__env(struct evsel *evsel);
 
 int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h
new file mode 100644 (file)
index 0000000..8a76480
--- /dev/null
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __PERF_EVSEL_CONFIG_H
+#define __PERF_EVSEL_CONFIG_H 1
+
+#include <linux/types.h>
+#include <stdbool.h>
+
+/*
+ * The 'struct perf_evsel_config_term' is used to pass event
+ * specific configuration data to perf_evsel__config routine.
+ * It is allocated within event parsing and attached to
+ * perf_evsel::config_terms list head.
+*/
+enum evsel_term_type {
+       PERF_EVSEL__CONFIG_TERM_PERIOD,
+       PERF_EVSEL__CONFIG_TERM_FREQ,
+       PERF_EVSEL__CONFIG_TERM_TIME,
+       PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
+       PERF_EVSEL__CONFIG_TERM_STACK_USER,
+       PERF_EVSEL__CONFIG_TERM_INHERIT,
+       PERF_EVSEL__CONFIG_TERM_MAX_STACK,
+       PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
+       PERF_EVSEL__CONFIG_TERM_OVERWRITE,
+       PERF_EVSEL__CONFIG_TERM_DRV_CFG,
+       PERF_EVSEL__CONFIG_TERM_BRANCH,
+       PERF_EVSEL__CONFIG_TERM_PERCORE,
+       PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT,
+};
+
+struct perf_evsel_config_term {
+       struct list_head      list;
+       enum evsel_term_type  type;
+       union {
+               u64           period;
+               u64           freq;
+               bool          time;
+               char          *callgraph;
+               char          *drv_cfg;
+               u64           stack_user;
+               int           max_stack;
+               bool          inherit;
+               bool          overwrite;
+               char          *branch;
+               unsigned long max_events;
+               bool          percore;
+               bool          aux_output;
+       } val;
+       bool weak;
+};
+#endif // __PERF_EVSEL_CONFIG_H
index 496fec0..028df7a 100644 (file)
@@ -4,6 +4,8 @@
 #include <stdbool.h>
 #include <traceevent/event-parse.h>
 #include "evsel.h"
+#include "util/evsel_fprintf.h"
+#include "util/event.h"
 #include "callchain.h"
 #include "map.h"
 #include "strlist.h"
@@ -101,7 +103,7 @@ out:
 
 int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
                              unsigned int print_opts, struct callchain_cursor *cursor,
-                             FILE *fp)
+                             struct strlist *bt_stop_list, FILE *fp)
 {
        int printed = 0;
        struct callchain_cursor_node *node;
@@ -174,10 +176,8 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
                                printed += fprintf(fp, "\n");
 
                        /* Add srccode here too? */
-                       if (symbol_conf.bt_stop_list &&
-                           node->sym &&
-                           strlist__has_entry(symbol_conf.bt_stop_list,
-                                              node->sym->name)) {
+                       if (bt_stop_list && node->sym &&
+                           strlist__has_entry(bt_stop_list, node->sym->name)) {
                                break;
                        }
 
@@ -192,7 +192,7 @@ next:
 
 int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
                        int left_alignment, unsigned int print_opts,
-                       struct callchain_cursor *cursor, FILE *fp)
+                       struct callchain_cursor *cursor, struct strlist *bt_stop_list, FILE *fp)
 {
        int printed = 0;
        int print_ip = print_opts & EVSEL__PRINT_IP;
@@ -203,8 +203,8 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
        int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
 
        if (cursor != NULL) {
-               printed += sample__fprintf_callchain(sample, left_alignment,
-                                                    print_opts, cursor, fp);
+               printed += sample__fprintf_callchain(sample, left_alignment, print_opts,
+                                                    cursor, bt_stop_list, fp);
        } else {
                printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
 
diff --git a/tools/perf/util/evsel_fprintf.h b/tools/perf/util/evsel_fprintf.h
new file mode 100644 (file)
index 0000000..47e6c84
--- /dev/null
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __PERF_EVSEL_FPRINTF_H
+#define __PERF_EVSEL_FPRINTF_H 1
+
+#include <stdio.h>
+#include <stdbool.h>
+
+struct evsel;
+
+struct perf_attr_details {
+       bool freq;
+       bool verbose;
+       bool event_group;
+       bool force;
+       bool trace_fields;
+};
+
+int perf_evsel__fprintf(struct evsel *evsel,
+                       struct perf_attr_details *details, FILE *fp);
+
+#define EVSEL__PRINT_IP                        (1<<0)
+#define EVSEL__PRINT_SYM               (1<<1)
+#define EVSEL__PRINT_DSO               (1<<2)
+#define EVSEL__PRINT_SYMOFFSET         (1<<3)
+#define EVSEL__PRINT_ONELINE           (1<<4)
+#define EVSEL__PRINT_SRCLINE           (1<<5)
+#define EVSEL__PRINT_UNKNOWN_AS_ADDR   (1<<6)
+#define EVSEL__PRINT_CALLCHAIN_ARROW   (1<<7)
+#define EVSEL__PRINT_SKIP_IGNORED      (1<<8)
+
+struct addr_location;
+struct perf_event_attr;
+struct perf_sample;
+struct callchain_cursor;
+struct strlist;
+
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+                             unsigned int print_opts, struct callchain_cursor *cursor,
+                             struct strlist *bt_stop_list, FILE *fp);
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+                       int left_alignment, unsigned int print_opts,
+                       struct callchain_cursor *cursor,
+                       struct strlist *bt_stop_list, FILE *fp);
+
+typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
+
+int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
+                            attr__fprintf_f attr__fprintf, void *priv);
+#endif // __PERF_EVSEL_H
index b72440b..d413755 100644 (file)
@@ -35,6 +35,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
 #elif defined(__sparc__)
 #define GEN_ELF_ARCH   EM_SPARC
 #define GEN_ELF_CLASS  ELFCLASS32
+#elif defined(__s390x__)
+#define GEN_ELF_ARCH   EM_S390
+#define GEN_ELF_CLASS  ELFCLASS64
 #else
 #error "unsupported architecture"
 #endif
index b0c34dd..becc2d1 100644 (file)
@@ -25,6 +25,7 @@
 #include "dso.h"
 #include "evlist.h"
 #include "evsel.h"
+#include "util/evsel_fprintf.h"
 #include "header.h"
 #include "memswap.h"
 #include "trace-event.h"
 #include "tool.h"
 #include "time-utils.h"
 #include "units.h"
-#include "util.h"
+#include "util/util.h" // perf_exe()
 #include "cputopo.h"
 #include "bpf-event.h"
 
 #include <linux/ctype.h>
+#include <internal/lib.h>
 
 /*
  * magic2 = "PERFILE2"
@@ -70,15 +72,6 @@ struct perf_file_attr {
        struct perf_file_section        ids;
 };
 
-struct feat_fd {
-       struct perf_header      *ph;
-       int                     fd;
-       void                    *buf;   /* Either buf != NULL or fd >= 0 */
-       ssize_t                 offset;
-       size_t                  size;
-       struct evsel    *events;
-};
-
 void perf_header__set_feat(struct perf_header *header, int feat)
 {
        set_bit(feat, header->adds_features);
@@ -524,7 +517,7 @@ static int write_event_desc(struct feat_fd *ff,
                 * copy into an nri to be independent of the
                 * type of ids,
                 */
-               nri = evsel->ids;
+               nri = evsel->core.ids;
                ret = do_write(ff, &nri, sizeof(nri));
                if (ret < 0)
                        return ret;
@@ -538,7 +531,7 @@ static int write_event_desc(struct feat_fd *ff,
                /*
                 * write unique ids for this event
                 */
-               ret = do_write(ff, evsel->id, evsel->ids * sizeof(u64));
+               ret = do_write(ff, evsel->core.id, evsel->core.ids * sizeof(u64));
                if (ret < 0)
                        return ret;
        }
@@ -1081,7 +1074,7 @@ static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 lev
 
        scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path);
        if (sysfs__read_str(file, &cache->map, &len)) {
-               zfree(&cache->map);
+               zfree(&cache->size);
                zfree(&cache->type);
                return -1;
        }
@@ -1303,8 +1296,10 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
                        continue;
 
                if (WARN_ONCE(cnt >= size,
-                             "failed to write MEM_TOPOLOGY, way too many nodes\n"))
+                       "failed to write MEM_TOPOLOGY, way too many nodes\n")) {
+                       closedir(dir);
                        return -1;
+               }
 
                ret = memory_node__read(&nodes[cnt++], idx);
        }
@@ -1598,7 +1593,7 @@ static void free_event_desc(struct evsel *events)
 
        for (evsel = events; evsel->core.attr.size; evsel++) {
                zfree(&evsel->name);
-               zfree(&evsel->id);
+               zfree(&evsel->core.id);
        }
 
        free(events);
@@ -1664,8 +1659,8 @@ static struct evsel *read_event_desc(struct feat_fd *ff)
                id = calloc(nr, sizeof(*id));
                if (!id)
                        goto error;
-               evsel->ids = nr;
-               evsel->id = id;
+               evsel->core.ids = nr;
+               evsel->core.id = id;
 
                for (j = 0 ; j < nr; j++) {
                        if (do_read_u64(ff, id))
@@ -1707,9 +1702,9 @@ static void print_event_desc(struct feat_fd *ff, FILE *fp)
        for (evsel = events; evsel->core.attr.size; evsel++) {
                fprintf(fp, "# event : name = %s, ", evsel->name);
 
-               if (evsel->ids) {
+               if (evsel->core.ids) {
                        fprintf(fp, ", id = {");
-                       for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) {
+                       for (j = 0, id = evsel->core.id; j < evsel->core.ids; j++, id++) {
                                if (j)
                                        fputc(',', fp);
                                fprintf(fp, " %"PRIu64, *id);
@@ -2823,15 +2818,6 @@ static int process_compressed(struct feat_fd *ff,
        return 0;
 }
 
-struct feature_ops {
-       int (*write)(struct feat_fd *ff, struct evlist *evlist);
-       void (*print)(struct feat_fd *ff, FILE *fp);
-       int (*process)(struct feat_fd *ff, void *data);
-       const char *name;
-       bool full_only;
-       bool synthesize;
-};
-
 #define FEAT_OPR(n, func, __full_only) \
        [HEADER_##n] = {                                        \
                .name       = __stringify(n),                   \
@@ -2858,8 +2844,10 @@ struct feature_ops {
 #define process_branch_stack   NULL
 #define process_stat           NULL
 
+// Only used in util/synthetic-events.c
+const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE];
 
-static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
+const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
        FEAT_OPN(TRACING_DATA,  tracing_data,   false),
        FEAT_OPN(BUILD_ID,      build_id,       false),
        FEAT_OPR(HOSTNAME,      hostname,       false),
@@ -3083,7 +3071,7 @@ int perf_session__write_header(struct perf_session *session,
 
        evlist__for_each_entry(session->evlist, evsel) {
                evsel->id_offset = lseek(fd, 0, SEEK_CUR);
-               err = do_write(&ff, evsel->id, evsel->ids * sizeof(u64));
+               err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
                if (err < 0) {
                        pr_debug("failed to write perf header\n");
                        return err;
@@ -3097,7 +3085,7 @@ int perf_session__write_header(struct perf_session *session,
                        .attr = evsel->core.attr,
                        .ids  = {
                                .offset = evsel->id_offset,
-                               .size   = evsel->ids * sizeof(u64),
+                               .size   = evsel->core.ids * sizeof(u64),
                        }
                };
                err = do_write(&ff, &f_attr, sizeof(f_attr));
@@ -3624,7 +3612,7 @@ int perf_session__read_header(struct perf_session *session)
                 * for allocating the perf_sample_id table we fake 1 cpu and
                 * hattr->ids threads.
                 */
-               if (perf_evsel__alloc_id(evsel, 1, nr_ids))
+               if (perf_evsel__alloc_id(&evsel->core, 1, nr_ids))
                        goto out_delete_evlist;
 
                lseek(fd, f_attr.ids.offset, SEEK_SET);
@@ -3633,7 +3621,7 @@ int perf_session__read_header(struct perf_session *session)
                        if (perf_header__getbuffer64(header, fd, &f_id, sizeof(f_id)))
                                goto out_errno;
 
-                       perf_evlist__id_add(session->evlist, evsel, 0, j, f_id);
+                       perf_evlist__id_add(&session->evlist->core, &evsel->core, 0, j, f_id);
                }
 
                lseek(fd, tmp, SEEK_SET);
@@ -3656,105 +3644,6 @@ out_delete_evlist:
        return -ENOMEM;
 }
 
-int perf_event__synthesize_attr(struct perf_tool *tool,
-                               struct perf_event_attr *attr, u32 ids, u64 *id,
-                               perf_event__handler_t process)
-{
-       union perf_event *ev;
-       size_t size;
-       int err;
-
-       size = sizeof(struct perf_event_attr);
-       size = PERF_ALIGN(size, sizeof(u64));
-       size += sizeof(struct perf_event_header);
-       size += ids * sizeof(u64);
-
-       ev = zalloc(size);
-
-       if (ev == NULL)
-               return -ENOMEM;
-
-       ev->attr.attr = *attr;
-       memcpy(ev->attr.id, id, ids * sizeof(u64));
-
-       ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
-       ev->attr.header.size = (u16)size;
-
-       if (ev->attr.header.size == size)
-               err = process(tool, ev, NULL, NULL);
-       else
-               err = -E2BIG;
-
-       free(ev);
-
-       return err;
-}
-
-int perf_event__synthesize_features(struct perf_tool *tool,
-                                   struct perf_session *session,
-                                   struct evlist *evlist,
-                                   perf_event__handler_t process)
-{
-       struct perf_header *header = &session->header;
-       struct feat_fd ff;
-       struct perf_record_header_feature *fe;
-       size_t sz, sz_hdr;
-       int feat, ret;
-
-       sz_hdr = sizeof(fe->header);
-       sz = sizeof(union perf_event);
-       /* get a nice alignment */
-       sz = PERF_ALIGN(sz, page_size);
-
-       memset(&ff, 0, sizeof(ff));
-
-       ff.buf = malloc(sz);
-       if (!ff.buf)
-               return -ENOMEM;
-
-       ff.size = sz - sz_hdr;
-       ff.ph = &session->header;
-
-       for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
-               if (!feat_ops[feat].synthesize) {
-                       pr_debug("No record header feature for header :%d\n", feat);
-                       continue;
-               }
-
-               ff.offset = sizeof(*fe);
-
-               ret = feat_ops[feat].write(&ff, evlist);
-               if (ret || ff.offset <= (ssize_t)sizeof(*fe)) {
-                       pr_debug("Error writing feature\n");
-                       continue;
-               }
-               /* ff.buf may have changed due to realloc in do_write() */
-               fe = ff.buf;
-               memset(fe, 0, sizeof(*fe));
-
-               fe->feat_id = feat;
-               fe->header.type = PERF_RECORD_HEADER_FEATURE;
-               fe->header.size = ff.offset;
-
-               ret = process(tool, ff.buf, NULL, NULL);
-               if (ret) {
-                       free(ff.buf);
-                       return ret;
-               }
-       }
-
-       /* Send HEADER_LAST_FEATURE mark. */
-       fe = ff.buf;
-       fe->feat_id     = HEADER_LAST_FEATURE;
-       fe->header.type = PERF_RECORD_HEADER_FEATURE;
-       fe->header.size = sizeof(*fe);
-
-       ret = process(tool, ff.buf, NULL, NULL);
-
-       free(ff.buf);
-       return ret;
-}
-
 int perf_event__process_feature(struct perf_session *session,
                                union perf_event *event)
 {
@@ -3797,113 +3686,6 @@ int perf_event__process_feature(struct perf_session *session,
        return 0;
 }
 
-static struct perf_record_event_update *
-event_update_event__new(size_t size, u64 type, u64 id)
-{
-       struct perf_record_event_update *ev;
-
-       size += sizeof(*ev);
-       size  = PERF_ALIGN(size, sizeof(u64));
-
-       ev = zalloc(size);
-       if (ev) {
-               ev->header.type = PERF_RECORD_EVENT_UPDATE;
-               ev->header.size = (u16)size;
-               ev->type = type;
-               ev->id = id;
-       }
-       return ev;
-}
-
-int
-perf_event__synthesize_event_update_unit(struct perf_tool *tool,
-                                        struct evsel *evsel,
-                                        perf_event__handler_t process)
-{
-       struct perf_record_event_update *ev;
-       size_t size = strlen(evsel->unit);
-       int err;
-
-       ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->id[0]);
-       if (ev == NULL)
-               return -ENOMEM;
-
-       strlcpy(ev->data, evsel->unit, size + 1);
-       err = process(tool, (union perf_event *)ev, NULL, NULL);
-       free(ev);
-       return err;
-}
-
-int
-perf_event__synthesize_event_update_scale(struct perf_tool *tool,
-                                         struct evsel *evsel,
-                                         perf_event__handler_t process)
-{
-       struct perf_record_event_update *ev;
-       struct perf_record_event_update_scale *ev_data;
-       int err;
-
-       ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->id[0]);
-       if (ev == NULL)
-               return -ENOMEM;
-
-       ev_data = (struct perf_record_event_update_scale *)ev->data;
-       ev_data->scale = evsel->scale;
-       err = process(tool, (union perf_event*) ev, NULL, NULL);
-       free(ev);
-       return err;
-}
-
-int
-perf_event__synthesize_event_update_name(struct perf_tool *tool,
-                                        struct evsel *evsel,
-                                        perf_event__handler_t process)
-{
-       struct perf_record_event_update *ev;
-       size_t len = strlen(evsel->name);
-       int err;
-
-       ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->id[0]);
-       if (ev == NULL)
-               return -ENOMEM;
-
-       strlcpy(ev->data, evsel->name, len + 1);
-       err = process(tool, (union perf_event*) ev, NULL, NULL);
-       free(ev);
-       return err;
-}
-
-int
-perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
-                                       struct evsel *evsel,
-                                       perf_event__handler_t process)
-{
-       size_t size = sizeof(struct perf_record_event_update);
-       struct perf_record_event_update *ev;
-       int max, err;
-       u16 type;
-
-       if (!evsel->core.own_cpus)
-               return 0;
-
-       ev = cpu_map_data__alloc(evsel->core.own_cpus, &size, &type, &max);
-       if (!ev)
-               return -ENOMEM;
-
-       ev->header.type = PERF_RECORD_EVENT_UPDATE;
-       ev->header.size = (u16)size;
-       ev->type = PERF_EVENT_UPDATE__CPUS;
-       ev->id   = evsel->id[0];
-
-       cpu_map_data__synthesize((struct perf_record_cpu_map_data *)ev->data,
-                                evsel->core.own_cpus,
-                                type, max);
-
-       err = process(tool, (union perf_event*) ev, NULL, NULL);
-       free(ev);
-       return err;
-}
-
 size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
 {
        struct perf_record_event_update *ev = &event->event_update;
@@ -3943,93 +3725,6 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
        return ret;
 }
 
-int perf_event__synthesize_attrs(struct perf_tool *tool,
-                                struct evlist *evlist,
-                                perf_event__handler_t process)
-{
-       struct evsel *evsel;
-       int err = 0;
-
-       evlist__for_each_entry(evlist, evsel) {
-               err = perf_event__synthesize_attr(tool, &evsel->core.attr, evsel->ids,
-                                                 evsel->id, process);
-               if (err) {
-                       pr_debug("failed to create perf header attribute\n");
-                       return err;
-               }
-       }
-
-       return err;
-}
-
-static bool has_unit(struct evsel *counter)
-{
-       return counter->unit && *counter->unit;
-}
-
-static bool has_scale(struct evsel *counter)
-{
-       return counter->scale != 1;
-}
-
-int perf_event__synthesize_extra_attr(struct perf_tool *tool,
-                                     struct evlist *evsel_list,
-                                     perf_event__handler_t process,
-                                     bool is_pipe)
-{
-       struct evsel *counter;
-       int err;
-
-       /*
-        * Synthesize other events stuff not carried within
-        * attr event - unit, scale, name
-        */
-       evlist__for_each_entry(evsel_list, counter) {
-               if (!counter->supported)
-                       continue;
-
-               /*
-                * Synthesize unit and scale only if it's defined.
-                */
-               if (has_unit(counter)) {
-                       err = perf_event__synthesize_event_update_unit(tool, counter, process);
-                       if (err < 0) {
-                               pr_err("Couldn't synthesize evsel unit.\n");
-                               return err;
-                       }
-               }
-
-               if (has_scale(counter)) {
-                       err = perf_event__synthesize_event_update_scale(tool, counter, process);
-                       if (err < 0) {
-                               pr_err("Couldn't synthesize evsel counter.\n");
-                               return err;
-                       }
-               }
-
-               if (counter->core.own_cpus) {
-                       err = perf_event__synthesize_event_update_cpus(tool, counter, process);
-                       if (err < 0) {
-                               pr_err("Couldn't synthesize evsel cpus.\n");
-                               return err;
-                       }
-               }
-
-               /*
-                * Name is needed only for pipe output,
-                * perf.data carries event names.
-                */
-               if (is_pipe) {
-                       err = perf_event__synthesize_event_update_name(tool, counter, process);
-                       if (err < 0) {
-                               pr_err("Couldn't synthesize evsel name.\n");
-                               return err;
-                       }
-               }
-       }
-       return 0;
-}
-
 int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
                             union perf_event *event,
                             struct evlist **pevlist)
@@ -4058,11 +3753,11 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
         * for allocating the perf_sample_id table we fake 1 cpu and
         * hattr->ids threads.
         */
-       if (perf_evsel__alloc_id(evsel, 1, n_ids))
+       if (perf_evsel__alloc_id(&evsel->core, 1, n_ids))
                return -ENOMEM;
 
        for (i = 0; i < n_ids; i++) {
-               perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]);
+               perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, event->attr.id[i]);
        }
 
        return 0;
@@ -4114,55 +3809,6 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
        return 0;
 }
 
-int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd,
-                                       struct evlist *evlist,
-                                       perf_event__handler_t process)
-{
-       union perf_event ev;
-       struct tracing_data *tdata;
-       ssize_t size = 0, aligned_size = 0, padding;
-       struct feat_fd ff;
-       int err __maybe_unused = 0;
-
-       /*
-        * We are going to store the size of the data followed
-        * by the data contents. Since the fd descriptor is a pipe,
-        * we cannot seek back to store the size of the data once
-        * we know it. Instead we:
-        *
-        * - write the tracing data to the temp file
-        * - get/write the data size to pipe
-        * - write the tracing data from the temp file
-        *   to the pipe
-        */
-       tdata = tracing_data_get(&evlist->core.entries, fd, true);
-       if (!tdata)
-               return -1;
-
-       memset(&ev, 0, sizeof(ev));
-
-       ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
-       size = tdata->size;
-       aligned_size = PERF_ALIGN(size, sizeof(u64));
-       padding = aligned_size - size;
-       ev.tracing_data.header.size = sizeof(ev.tracing_data);
-       ev.tracing_data.size = aligned_size;
-
-       process(tool, &ev, NULL, NULL);
-
-       /*
-        * The put function will copy all the tracing data
-        * stored in temp file to the pipe.
-        */
-       tracing_data_put(tdata);
-
-       ff = (struct feat_fd){ .fd = fd };
-       if (write_padded(&ff, NULL, 0, padding))
-               return -1;
-
-       return aligned_size;
-}
-
 int perf_event__process_tracing_data(struct perf_session *session,
                                     union perf_event *event)
 {
@@ -4202,34 +3848,6 @@ int perf_event__process_tracing_data(struct perf_session *session,
        return size_read + padding;
 }
 
-int perf_event__synthesize_build_id(struct perf_tool *tool,
-                                   struct dso *pos, u16 misc,
-                                   perf_event__handler_t process,
-                                   struct machine *machine)
-{
-       union perf_event ev;
-       size_t len;
-       int err = 0;
-
-       if (!pos->hit)
-               return err;
-
-       memset(&ev, 0, sizeof(ev));
-
-       len = pos->long_name_len + 1;
-       len = PERF_ALIGN(len, NAME_ALIGN);
-       memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id));
-       ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
-       ev.build_id.header.misc = misc;
-       ev.build_id.pid = machine->pid;
-       ev.build_id.header.size = sizeof(ev.build_id) + len;
-       memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
-
-       err = process(tool, &ev, NULL, machine);
-
-       return err;
-}
-
 int perf_event__process_build_id(struct perf_session *session,
                                 union perf_event *event)
 {
index 3e48ae3..ca53a92 100644 (file)
@@ -5,10 +5,10 @@
 #include <linux/stddef.h>
 #include <linux/perf_event.h>
 #include <sys/types.h>
+#include <stdio.h> // FILE
 #include <stdbool.h>
 #include <linux/bitmap.h>
 #include <linux/types.h>
-#include "event.h"
 #include "env.h"
 #include "pmu.h"
 
@@ -92,8 +92,28 @@ struct perf_header {
        struct perf_env         env;
 };
 
+struct feat_fd {
+       struct perf_header *ph;
+       int                fd;
+       void               *buf;        /* Either buf != NULL or fd >= 0 */
+       ssize_t            offset;
+       size_t             size;
+       struct evsel       *events;
+};
+
+struct perf_header_feature_ops {
+       int        (*write)(struct feat_fd *ff, struct evlist *evlist);
+       void       (*print)(struct feat_fd *ff, FILE *fp);
+       int        (*process)(struct feat_fd *ff, void *data);
+       const char *name;
+       bool       full_only;
+       bool       synthesize;
+};
+
 struct evlist;
 struct perf_session;
+struct perf_tool;
+union perf_event;
 
 int perf_session__read_header(struct perf_session *session);
 int perf_session__write_header(struct perf_session *session,
@@ -115,54 +135,16 @@ int perf_header__process_sections(struct perf_header *header, int fd,
 
 int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full);
 
-int perf_event__synthesize_features(struct perf_tool *tool,
-                                   struct perf_session *session,
-                                   struct evlist *evlist,
-                                   perf_event__handler_t process);
-
-int perf_event__synthesize_extra_attr(struct perf_tool *tool,
-                                     struct evlist *evsel_list,
-                                     perf_event__handler_t process,
-                                     bool is_pipe);
-
 int perf_event__process_feature(struct perf_session *session,
                                union perf_event *event);
-
-int perf_event__synthesize_attr(struct perf_tool *tool,
-                               struct perf_event_attr *attr, u32 ids, u64 *id,
-                               perf_event__handler_t process);
-int perf_event__synthesize_attrs(struct perf_tool *tool,
-                                struct evlist *evlist,
-                                perf_event__handler_t process);
-int perf_event__synthesize_event_update_unit(struct perf_tool *tool,
-                                            struct evsel *evsel,
-                                            perf_event__handler_t process);
-int perf_event__synthesize_event_update_scale(struct perf_tool *tool,
-                                             struct evsel *evsel,
-                                             perf_event__handler_t process);
-int perf_event__synthesize_event_update_name(struct perf_tool *tool,
-                                            struct evsel *evsel,
-                                            perf_event__handler_t process);
-int perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
-                                            struct evsel *evsel,
-                                            perf_event__handler_t process);
 int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
                             struct evlist **pevlist);
 int perf_event__process_event_update(struct perf_tool *tool,
                                     union perf_event *event,
                                     struct evlist **pevlist);
 size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
-
-int perf_event__synthesize_tracing_data(struct perf_tool *tool,
-                                       int fd, struct evlist *evlist,
-                                       perf_event__handler_t process);
 int perf_event__process_tracing_data(struct perf_session *session,
                                     union perf_event *event);
-
-int perf_event__synthesize_build_id(struct perf_tool *tool,
-                                   struct dso *pos, u16 misc,
-                                   perf_event__handler_t process,
-                                   struct machine *machine);
 int perf_event__process_build_id(struct perf_session *session,
                                 union perf_event *event);
 bool is_perf_magic(u64 magic);
index 34803e3..6a186b6 100644 (file)
@@ -15,6 +15,7 @@ struct addr_location;
 struct map_symbol;
 struct mem_info;
 struct branch_info;
+struct branch_stack;
 struct block_info;
 struct symbol;
 struct ui_progress;
index aacffa2..34cb380 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/log2.h>
 #include <linux/zalloc.h>
 
-#include "cpumap.h"
 #include "color.h"
 #include "evsel.h"
 #include "evlist.h"
@@ -29,6 +28,7 @@
 #include "auxtrace.h"
 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
 #include "intel-bts.h"
+#include "util/synthetic-events.h"
 
 #define MAX_TIMESTAMP (~0ULL)
 
@@ -768,7 +768,7 @@ static int intel_bts_synth_events(struct intel_bts *bts,
        int err;
 
        evlist__for_each_entry(evlist, evsel) {
-               if (evsel->core.attr.type == bts->pmu_type && evsel->ids) {
+               if (evsel->core.attr.type == bts->pmu_type && evsel->core.ids) {
                        found = true;
                        break;
                }
@@ -795,7 +795,7 @@ static int intel_bts_synth_events(struct intel_bts *bts,
        attr.sample_id_all = evsel->core.attr.sample_id_all;
        attr.read_format = evsel->core.attr.read_format;
 
-       id = evsel->id[0] + 1000000000;
+       id = evsel->core.id[0] + 1000000000;
        if (!id)
                id = 1;
 
index 9b56fb7..a1c9eb6 100644 (file)
@@ -33,6 +33,7 @@
 #include "tsc.h"
 #include "intel-pt.h"
 #include "config.h"
+#include "util/synthetic-events.h"
 #include "time-utils.h"
 
 #include "../arch/x86/include/uapi/asm/perf_regs.h"
@@ -1704,7 +1705,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
        struct intel_pt *pt = ptq->pt;
        struct evsel *evsel = pt->pebs_evsel;
        u64 sample_type = evsel->core.attr.sample_type;
-       u64 id = evsel->id[0];
+       u64 id = evsel->core.id[0];
        u8 cpumode;
 
        if (intel_pt_skip_event(pt))
@@ -2719,7 +2720,7 @@ static void intel_pt_set_event_name(struct evlist *evlist, u64 id,
        struct evsel *evsel;
 
        evlist__for_each_entry(evlist, evsel) {
-               if (evsel->id && evsel->id[0] == id) {
+               if (evsel->core.id && evsel->core.id[0] == id) {
                        if (evsel->name)
                                zfree(&evsel->name);
                        evsel->name = strdup(name);
@@ -2734,7 +2735,7 @@ static struct evsel *intel_pt_evsel(struct intel_pt *pt,
        struct evsel *evsel;
 
        evlist__for_each_entry(evlist, evsel) {
-               if (evsel->core.attr.type == pt->pmu_type && evsel->ids)
+               if (evsel->core.attr.type == pt->pmu_type && evsel->core.ids)
                        return evsel;
        }
 
@@ -2775,7 +2776,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
        attr.sample_id_all = evsel->core.attr.sample_id_all;
        attr.read_format = evsel->core.attr.read_format;
 
-       id = evsel->id[0] + 1000000000;
+       id = evsel->core.id[0] + 1000000000;
        if (!id)
                id = 1;
 
@@ -2902,7 +2903,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt)
                return;
 
        evlist__for_each_entry(pt->session->evlist, evsel) {
-               if (evsel->core.attr.aux_output && evsel->id) {
+               if (evsel->core.attr.aux_output && evsel->core.id) {
                        pt->sample_pebs = true;
                        pt->pebs_evsel = evsel;
                        return;
index b80f29b..e3ccb0c 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/stringify.h>
 
 #include "build-id.h"
-#include "util.h"
 #include "event.h"
 #include "debug.h"
 #include "evlist.h"
@@ -27,7 +26,6 @@
 #include "jit.h"
 #include "jitdump.h"
 #include "genelf.h"
-#include "../builtin.h"
 
 #include <linux/ctype.h>
 #include <linux/zalloc.h>
@@ -397,7 +395,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
        size_t size;
        u16 idr_size;
        const char *sym;
-       uint32_t count;
+       uint64_t count;
        int ret, csize, usize;
        pid_t pid, tid;
        struct {
@@ -420,7 +418,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
                return -1;
 
        filename = event->mmap2.filename;
-       size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%u.so",
+       size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so",
                        jd->dir,
                        pid,
                        count);
@@ -531,7 +529,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
                return -1;
 
        filename = event->mmap2.filename;
-       size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%"PRIu64,
+       size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so",
                 jd->dir,
                 pid,
                 jr->move.code_index);
@@ -779,7 +777,7 @@ jit_process(struct perf_session *session,
         * track sample_type to compute id_all layout
         * perf sets the same sample type to all events as of now
         */
-       first = perf_evlist__first(session->evlist);
+       first = evlist__first(session->evlist);
        jd.sample_type = first->core.attr.sample_type;
 
        *nbytes = 0;
index 4691363..6f0fa05 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef __PERF_KVM_STAT_H
 #define __PERF_KVM_STAT_H
 
+#ifdef HAVE_KVM_STAT_SUPPORT
+
 #include "tool.h"
 #include "stat.h"
 #include "record.h"
@@ -144,5 +146,7 @@ extern const int decode_str_len;
 extern const char *kvm_exit_reason;
 extern const char *kvm_entry_trace;
 extern const char *kvm_exit_trace;
+#endif /* HAVE_KVM_STAT_SUPPORT */
 
+extern int kvm_add_default_arch_event(int *argc, const char **argv);
 #endif /* __PERF_KVM_STAT_H */
index 66756e6..6b4e5a0 100644 (file)
@@ -22,7 +22,6 @@
 #define LIBUNWIND__ARCH_REG_SP PERF_REG_ARM64_SP
 
 #include "unwind.h"
-#include "debug.h"
 #include "libunwind-aarch64.h"
 #include <../../../../arch/arm64/include/uapi/asm/perf_regs.h>
 #include "../../arch/arm64/util/unwind-libunwind.c"
index c5e5681..21c216c 100644 (file)
@@ -22,7 +22,6 @@
 #define LIBUNWIND__ARCH_REG_SP PERF_REG_X86_SP
 
 #include "unwind.h"
-#include "debug.h"
 #include "libunwind-x86.h"
 #include <../../../../arch/x86/include/uapi/asm/perf_regs.h>
 
index 55fb4b3..8b14e4a 100644 (file)
@@ -8,6 +8,7 @@
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <linux/err.h>
 #include <linux/string.h>
 #include <linux/zalloc.h>
@@ -232,14 +233,14 @@ static int detect_kbuild_dir(char **kbuild_dir)
        const char *prefix_dir = "";
        const char *suffix_dir = "";
 
+       /* _UTSNAME_LENGTH is 65 */
+       char release[128];
+
        char *autoconf_path;
 
        int err;
 
        if (!test_dir) {
-               /* _UTSNAME_LENGTH is 65 */
-               char release[128];
-
                err = fetch_kernel_version(NULL, release,
                                           sizeof(release));
                if (err)
index 3974470..39062df 100644 (file)
@@ -7,10 +7,10 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include "compress.h"
-#include "util.h"
 #include "debug.h"
 #include <string.h>
 #include <unistd.h>
+#include <internal/lib.h>
 
 #define BUFSIZE 8192
 
index b4749d3..70a9f87 100644 (file)
@@ -32,6 +32,7 @@
 #include "linux/hash.h"
 #include "asm/bug.h"
 #include "bpf-event.h"
+#include <internal/lib.h> // page_size
 
 #include <linux/ctype.h>
 #include <symbol/kallsyms.h>
@@ -2609,21 +2610,6 @@ int machines__for_each_thread(struct machines *machines,
        return rc;
 }
 
-int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
-                                 struct target *target, struct perf_thread_map *threads,
-                                 perf_event__handler_t process, bool data_mmap,
-                                 unsigned int nr_threads_synthesize)
-{
-       if (target__has_task(target))
-               return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap);
-       else if (target__has_cpu(target))
-               return perf_event__synthesize_threads(tool, process,
-                                                     machine, data_mmap,
-                                                     nr_threads_synthesize);
-       /* command specified */
-       return 0;
-}
-
 pid_t machine__get_current_tid(struct machine *machine, int cpu)
 {
        int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS);
index ffd391a..18e13c0 100644 (file)
@@ -6,7 +6,6 @@
 #include <linux/rbtree.h>
 #include "map_groups.h"
 #include "dsos.h"
-#include "event.h"
 #include "rwsem.h"
 
 struct addr_location;
@@ -252,20 +251,6 @@ int machines__for_each_thread(struct machines *machines,
                              int (*fn)(struct thread *thread, void *p),
                              void *priv);
 
-int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
-                                 struct target *target, struct perf_thread_map *threads,
-                                 perf_event__handler_t process, bool data_mmap,
-                                 unsigned int nr_threads_synthesize);
-static inline
-int machine__synthesize_threads(struct machine *machine, struct target *target,
-                               struct perf_thread_map *threads, bool data_mmap,
-                               unsigned int nr_threads_synthesize)
-{
-       return __machine__synthesize_threads(machine, NULL, target, threads,
-                                            perf_event__process, data_mmap,
-                                            nr_threads_synthesize);
-}
-
 pid_t machine__get_current_tid(struct machine *machine, int cpu);
 int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
                             pid_t tid);
index 5b83ed1..eec9b28 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "symbol.h"
+#include <assert.h>
 #include <errno.h>
 #include <inttypes.h>
 #include <limits.h>
@@ -850,6 +851,8 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
                        }
 
                        after->start = map->end;
+                       after->pgoff += map->end - pos->start;
+                       assert(pos->map_ip(pos, map->end) == after->map_ip(after, map->end));
                        __map_groups__insert(pos->groups, after);
                        if (verbose >= 2 && !use_browser)
                                map__fprintf(after, fp);
index 1e29ff9..2c38e8c 100644 (file)
@@ -2,6 +2,13 @@
 #ifndef PERF_MEMSWAP_H_
 #define PERF_MEMSWAP_H_
 
+#include <linux/types.h>
+
+union u64_swap {
+       u64 val64;
+       u32 val32[2];
+};
+
 void mem_bswap_64(void *src, int byte_size);
 void mem_bswap_32(void *src, int byte_size);
 
index 33c5b54..a35dc57 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/zalloc.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h> // sysconf()
 #ifdef HAVE_LIBNUMA_SUPPORT
 #include <numaif.h>
 #endif
 #include "event.h"
 #include "mmap.h"
 #include "../perf.h"
-#include "util.h" /* page_size */
+#include <internal/lib.h> /* page_size */
 
-size_t perf_mmap__mmap_len(struct perf_mmap *map)
+size_t perf_mmap__mmap_len(struct mmap *map)
 {
-       return map->mask + 1 + page_size;
+       return map->core.mask + 1 + page_size;
 }
 
 /* When check_messup is true, 'end' must points to a good entry */
-static union perf_event *perf_mmap__read(struct perf_mmap *map,
+static union perf_event *perf_mmap__read(struct mmap *map,
                                         u64 *startp, u64 end)
 {
-       unsigned char *data = map->base + page_size;
+       unsigned char *data = map->core.base + page_size;
        union perf_event *event = NULL;
        int diff = end - *startp;
 
        if (diff >= (int)sizeof(event->header)) {
                size_t size;
 
-               event = (union perf_event *)&data[*startp & map->mask];
+               event = (union perf_event *)&data[*startp & map->core.mask];
                size = event->header.size;
 
                if (size < sizeof(event->header) || diff < (int)size)
@@ -48,20 +49,20 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
                 * Event straddles the mmap boundary -- header should always
                 * be inside due to u64 alignment of output.
                 */
-               if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
+               if ((*startp & map->core.mask) + size != ((*startp + size) & map->core.mask)) {
                        unsigned int offset = *startp;
                        unsigned int len = min(sizeof(*event), size), cpy;
-                       void *dst = map->event_copy;
+                       void *dst = map->core.event_copy;
 
                        do {
-                               cpy = min(map->mask + 1 - (offset & map->mask), len);
-                               memcpy(dst, &data[offset & map->mask], cpy);
+                               cpy = min(map->core.mask + 1 - (offset & map->core.mask), len);
+                               memcpy(dst, &data[offset & map->core.mask], cpy);
                                offset += cpy;
                                dst += cpy;
                                len -= cpy;
                        } while (len);
 
-                       event = (union perf_event *)map->event_copy;
+                       event = (union perf_event *)map->core.event_copy;
                }
 
                *startp += size;
@@ -82,55 +83,55 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
  * }
  * perf_mmap__read_done()
  */
-union perf_event *perf_mmap__read_event(struct perf_mmap *map)
+union perf_event *perf_mmap__read_event(struct mmap *map)
 {
        union perf_event *event;
 
        /*
         * Check if event was unmapped due to a POLLHUP/POLLERR.
         */
-       if (!refcount_read(&map->refcnt))
+       if (!refcount_read(&map->core.refcnt))
                return NULL;
 
        /* non-overwirte doesn't pause the ringbuffer */
-       if (!map->overwrite)
-               map->end = perf_mmap__read_head(map);
+       if (!map->core.overwrite)
+               map->core.end = perf_mmap__read_head(map);
 
-       event = perf_mmap__read(map, &map->start, map->end);
+       event = perf_mmap__read(map, &map->core.start, map->core.end);
 
-       if (!map->overwrite)
-               map->prev = map->start;
+       if (!map->core.overwrite)
+               map->core.prev = map->core.start;
 
        return event;
 }
 
-static bool perf_mmap__empty(struct perf_mmap *map)
+static bool perf_mmap__empty(struct mmap *map)
 {
-       return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base;
+       return perf_mmap__read_head(map) == map->core.prev && !map->auxtrace_mmap.base;
 }
 
-void perf_mmap__get(struct perf_mmap *map)
+void perf_mmap__get(struct mmap *map)
 {
-       refcount_inc(&map->refcnt);
+       refcount_inc(&map->core.refcnt);
 }
 
-void perf_mmap__put(struct perf_mmap *map)
+void perf_mmap__put(struct mmap *map)
 {
-       BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
+       BUG_ON(map->core.base && refcount_read(&map->core.refcnt) == 0);
 
-       if (refcount_dec_and_test(&map->refcnt))
+       if (refcount_dec_and_test(&map->core.refcnt))
                perf_mmap__munmap(map);
 }
 
-void perf_mmap__consume(struct perf_mmap *map)
+void perf_mmap__consume(struct mmap *map)
 {
-       if (!map->overwrite) {
-               u64 old = map->prev;
+       if (!map->core.overwrite) {
+               u64 old = map->core.prev;
 
                perf_mmap__write_tail(map, old);
        }
 
-       if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
+       if (refcount_read(&map->core.refcnt) == 1 && perf_mmap__empty(map))
                perf_mmap__put(map);
 }
 
@@ -161,13 +162,13 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb
 }
 
 #ifdef HAVE_AIO_SUPPORT
-static int perf_mmap__aio_enabled(struct perf_mmap *map)
+static int perf_mmap__aio_enabled(struct mmap *map)
 {
        return map->aio.nr_cblocks > 0;
 }
 
 #ifdef HAVE_LIBNUMA_SUPPORT
-static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
+static int perf_mmap__aio_alloc(struct mmap *map, int idx)
 {
        map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE,
                                  MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
@@ -179,7 +180,7 @@ static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
        return 0;
 }
 
-static void perf_mmap__aio_free(struct perf_mmap *map, int idx)
+static void perf_mmap__aio_free(struct mmap *map, int idx)
 {
        if (map->aio.data[idx]) {
                munmap(map->aio.data[idx], perf_mmap__mmap_len(map));
@@ -187,7 +188,7 @@ static void perf_mmap__aio_free(struct perf_mmap *map, int idx)
        }
 }
 
-static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affinity)
+static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
 {
        void *data;
        size_t mmap_len;
@@ -207,7 +208,7 @@ static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affi
        return 0;
 }
 #else /* !HAVE_LIBNUMA_SUPPORT */
-static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
+static int perf_mmap__aio_alloc(struct mmap *map, int idx)
 {
        map->aio.data[idx] = malloc(perf_mmap__mmap_len(map));
        if (map->aio.data[idx] == NULL)
@@ -216,19 +217,19 @@ static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
        return 0;
 }
 
-static void perf_mmap__aio_free(struct perf_mmap *map, int idx)
+static void perf_mmap__aio_free(struct mmap *map, int idx)
 {
        zfree(&(map->aio.data[idx]));
 }
 
-static int perf_mmap__aio_bind(struct perf_mmap *map __maybe_unused, int idx __maybe_unused,
+static int perf_mmap__aio_bind(struct mmap *map __maybe_unused, int idx __maybe_unused,
                int cpu __maybe_unused, int affinity __maybe_unused)
 {
        return 0;
 }
 #endif
 
-static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
+static int perf_mmap__aio_mmap(struct mmap *map, struct mmap_params *mp)
 {
        int delta_max, i, prio, ret;
 
@@ -256,7 +257,7 @@ static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
                                pr_debug2("failed to allocate data buffer area, error %m");
                                return -1;
                        }
-                       ret = perf_mmap__aio_bind(map, i, map->cpu, mp->affinity);
+                       ret = perf_mmap__aio_bind(map, i, map->core.cpu, mp->affinity);
                        if (ret == -1)
                                return -1;
                        /*
@@ -282,7 +283,7 @@ static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
        return 0;
 }
 
-static void perf_mmap__aio_munmap(struct perf_mmap *map)
+static void perf_mmap__aio_munmap(struct mmap *map)
 {
        int i;
 
@@ -294,34 +295,34 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map)
        zfree(&map->aio.aiocb);
 }
 #else /* !HAVE_AIO_SUPPORT */
-static int perf_mmap__aio_enabled(struct perf_mmap *map __maybe_unused)
+static int perf_mmap__aio_enabled(struct mmap *map __maybe_unused)
 {
        return 0;
 }
 
-static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused,
+static int perf_mmap__aio_mmap(struct mmap *map __maybe_unused,
                               struct mmap_params *mp __maybe_unused)
 {
        return 0;
 }
 
-static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused)
+static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused)
 {
 }
 #endif
 
-void perf_mmap__munmap(struct perf_mmap *map)
+void perf_mmap__munmap(struct mmap *map)
 {
        perf_mmap__aio_munmap(map);
        if (map->data != NULL) {
                munmap(map->data, perf_mmap__mmap_len(map));
                map->data = NULL;
        }
-       if (map->base != NULL) {
-               munmap(map->base, perf_mmap__mmap_len(map));
-               map->base = NULL;
-               map->fd = -1;
-               refcount_set(&map->refcnt, 0);
+       if (map->core.base != NULL) {
+               munmap(map->core.base, perf_mmap__mmap_len(map));
+               map->core.base = NULL;
+               map->core.fd = -1;
+               refcount_set(&map->core.refcnt, 0);
        }
        auxtrace_mmap__munmap(&map->auxtrace_mmap);
 }
@@ -343,16 +344,16 @@ static void build_node_mask(int node, cpu_set_t *mask)
        }
 }
 
-static void perf_mmap__setup_affinity_mask(struct perf_mmap *map, struct mmap_params *mp)
+static void perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
 {
        CPU_ZERO(&map->affinity_mask);
        if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
-               build_node_mask(cpu__get_node(map->cpu), &map->affinity_mask);
+               build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask);
        else if (mp->affinity == PERF_AFFINITY_CPU)
-               CPU_SET(map->cpu, &map->affinity_mask);
+               CPU_SET(map->core.cpu, &map->affinity_mask);
 }
 
-int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu)
+int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
 {
        /*
         * The last one will be done at perf_mmap__consume(), so that we
@@ -367,23 +368,23 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c
         * evlist layer can't just drop it when filtering events in
         * perf_evlist__filter_pollfd().
         */
-       refcount_set(&map->refcnt, 2);
-       map->prev = 0;
-       map->mask = mp->mask;
-       map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
+       refcount_set(&map->core.refcnt, 2);
+       map->core.prev = 0;
+       map->core.mask = mp->mask;
+       map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
                         MAP_SHARED, fd, 0);
-       if (map->base == MAP_FAILED) {
+       if (map->core.base == MAP_FAILED) {
                pr_debug2("failed to mmap perf event ring buffer, error %d\n",
                          errno);
-               map->base = NULL;
+               map->core.base = NULL;
                return -1;
        }
-       map->fd = fd;
-       map->cpu = cpu;
+       map->core.fd = fd;
+       map->core.cpu = cpu;
 
        perf_mmap__setup_affinity_mask(map, mp);
 
-       map->flush = mp->flush;
+       map->core.flush = mp->flush;
 
        map->comp_level = mp->comp_level;
 
@@ -399,7 +400,7 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c
        }
 
        if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
-                               &mp->auxtrace_mp, map->base, fd))
+                               &mp->auxtrace_mp, map->core.base, fd))
                return -1;
 
        return perf_mmap__aio_mmap(map, mp);
@@ -440,25 +441,25 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
 /*
  * Report the start and end of the available data in ringbuffer
  */
-static int __perf_mmap__read_init(struct perf_mmap *md)
+static int __perf_mmap__read_init(struct mmap *md)
 {
        u64 head = perf_mmap__read_head(md);
-       u64 old = md->prev;
-       unsigned char *data = md->base + page_size;
+       u64 old = md->core.prev;
+       unsigned char *data = md->core.base + page_size;
        unsigned long size;
 
-       md->start = md->overwrite ? head : old;
-       md->end = md->overwrite ? old : head;
+       md->core.start = md->core.overwrite ? head : old;
+       md->core.end = md->core.overwrite ? old : head;
 
-       if ((md->end - md->start) < md->flush)
+       if ((md->core.end - md->core.start) < md->core.flush)
                return -EAGAIN;
 
-       size = md->end - md->start;
-       if (size > (unsigned long)(md->mask) + 1) {
-               if (!md->overwrite) {
+       size = md->core.end - md->core.start;
+       if (size > (unsigned long)(md->core.mask) + 1) {
+               if (!md->core.overwrite) {
                        WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
 
-                       md->prev = head;
+                       md->core.prev = head;
                        perf_mmap__consume(md);
                        return -EAGAIN;
                }
@@ -467,29 +468,29 @@ static int __perf_mmap__read_init(struct perf_mmap *md)
                 * Backward ring buffer is full. We still have a chance to read
                 * most of data from it.
                 */
-               if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
+               if (overwrite_rb_find_range(data, md->core.mask, &md->core.start, &md->core.end))
                        return -EINVAL;
        }
 
        return 0;
 }
 
-int perf_mmap__read_init(struct perf_mmap *map)
+int perf_mmap__read_init(struct mmap *map)
 {
        /*
         * Check if event was unmapped due to a POLLHUP/POLLERR.
         */
-       if (!refcount_read(&map->refcnt))
+       if (!refcount_read(&map->core.refcnt))
                return -ENOENT;
 
        return __perf_mmap__read_init(map);
 }
 
-int perf_mmap__push(struct perf_mmap *md, void *to,
-                   int push(struct perf_mmap *map, void *to, void *buf, size_t size))
+int perf_mmap__push(struct mmap *md, void *to,
+                   int push(struct mmap *map, void *to, void *buf, size_t size))
 {
        u64 head = perf_mmap__read_head(md);
-       unsigned char *data = md->base + page_size;
+       unsigned char *data = md->core.base + page_size;
        unsigned long size;
        void *buf;
        int rc = 0;
@@ -498,12 +499,12 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
        if (rc < 0)
                return (rc == -EAGAIN) ? 1 : -1;
 
-       size = md->end - md->start;
+       size = md->core.end - md->core.start;
 
-       if ((md->start & md->mask) + size != (md->end & md->mask)) {
-               buf = &data[md->start & md->mask];
-               size = md->mask + 1 - (md->start & md->mask);
-               md->start += size;
+       if ((md->core.start & md->core.mask) + size != (md->core.end & md->core.mask)) {
+               buf = &data[md->core.start & md->core.mask];
+               size = md->core.mask + 1 - (md->core.start & md->core.mask);
+               md->core.start += size;
 
                if (push(md, to, buf, size) < 0) {
                        rc = -1;
@@ -511,16 +512,16 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
                }
        }
 
-       buf = &data[md->start & md->mask];
-       size = md->end - md->start;
-       md->start += size;
+       buf = &data[md->core.start & md->core.mask];
+       size = md->core.end - md->core.start;
+       md->core.start += size;
 
        if (push(md, to, buf, size) < 0) {
                rc = -1;
                goto out;
        }
 
-       md->prev = head;
+       md->core.prev = head;
        perf_mmap__consume(md);
 out:
        return rc;
@@ -529,16 +530,16 @@ out:
 /*
  * Mandatory for overwrite mode
  * The direction of overwrite mode is backward.
- * The last perf_mmap__read() will set tail to map->prev.
- * Need to correct the map->prev to head which is the end of next read.
+ * The last perf_mmap__read() will set tail to map->core.prev.
+ * Need to correct the map->core.prev to head which is the end of next read.
  */
-void perf_mmap__read_done(struct perf_mmap *map)
+void perf_mmap__read_done(struct mmap *map)
 {
        /*
         * Check if event was unmapped due to a POLLHUP/POLLERR.
         */
-       if (!refcount_read(&map->refcnt))
+       if (!refcount_read(&map->core.refcnt))
                return;
 
-       map->prev = perf_mmap__read_head(map);
+       map->core.prev = perf_mmap__read_head(map);
 }
index 3857a49..e567c1c 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef __PERF_MMAP_H
 #define __PERF_MMAP_H 1
 
+#include <internal/mmap.h>
 #include <linux/compiler.h>
 #include <linux/refcount.h>
 #include <linux/types.h>
 
 struct aiocb;
 /**
- * struct perf_mmap - perf's ring buffer mmap details
+ * struct mmap - perf's ring buffer mmap details
  *
  * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
  */
-struct perf_mmap {
-       void             *base;
-       int              mask;
-       int              fd;
-       int              cpu;
-       refcount_t       refcnt;
-       u64              prev;
-       u64              start;
-       u64              end;
-       bool             overwrite;
+struct mmap {
+       struct perf_mmap        core;
        struct auxtrace_mmap auxtrace_mmap;
-       char             event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
 #ifdef HAVE_AIO_SUPPORT
        struct {
                void             **data;
@@ -40,71 +32,42 @@ struct perf_mmap {
        } aio;
 #endif
        cpu_set_t       affinity_mask;
-       u64             flush;
        void            *data;
        int             comp_level;
 };
 
-/*
- * State machine of bkw_mmap_state:
- *
- *                     .________________(forbid)_____________.
- *                     |                                     V
- * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
- *                     ^  ^              |   ^               |
- *                     |  |__(forbid)____/   |___(forbid)___/|
- *                     |                                     |
- *                      \_________________(3)_______________/
- *
- * NOTREADY     : Backward ring buffers are not ready
- * RUNNING      : Backward ring buffers are recording
- * DATA_PENDING : We are required to collect data from backward ring buffers
- * EMPTY        : We have collected data from backward ring buffers.
- *
- * (0): Setup backward ring buffer
- * (1): Pause ring buffers for reading
- * (2): Read from ring buffers
- * (3): Resume ring buffers for recording
- */
-enum bkw_mmap_state {
-       BKW_MMAP_NOTREADY,
-       BKW_MMAP_RUNNING,
-       BKW_MMAP_DATA_PENDING,
-       BKW_MMAP_EMPTY,
-};
-
 struct mmap_params {
        int prot, mask, nr_cblocks, affinity, flush, comp_level;
        struct auxtrace_mmap_params auxtrace_mp;
 };
 
-int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu);
-void perf_mmap__munmap(struct perf_mmap *map);
+int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu);
+void perf_mmap__munmap(struct mmap *map);
 
-void perf_mmap__get(struct perf_mmap *map);
-void perf_mmap__put(struct perf_mmap *map);
+void perf_mmap__get(struct mmap *map);
+void perf_mmap__put(struct mmap *map);
 
-void perf_mmap__consume(struct perf_mmap *map);
+void perf_mmap__consume(struct mmap *map);
 
-static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
+static inline u64 perf_mmap__read_head(struct mmap *mm)
 {
-       return ring_buffer_read_head(mm->base);
+       return ring_buffer_read_head(mm->core.base);
 }
 
-static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
+static inline void perf_mmap__write_tail(struct mmap *md, u64 tail)
 {
-       ring_buffer_write_tail(md->base, tail);
+       ring_buffer_write_tail(md->core.base, tail);
 }
 
-union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
+union perf_event *perf_mmap__read_forward(struct mmap *map);
 
-union perf_event *perf_mmap__read_event(struct perf_mmap *map);
+union perf_event *perf_mmap__read_event(struct mmap *map);
 
-int perf_mmap__push(struct perf_mmap *md, void *to,
-                   int push(struct perf_mmap *map, void *to, void *buf, size_t size));
+int perf_mmap__push(struct mmap *md, void *to,
+                   int push(struct mmap *map, void *to, void *buf, size_t size));
 
-size_t perf_mmap__mmap_len(struct perf_mmap *map);
+size_t perf_mmap__mmap_len(struct mmap *map);
 
-int perf_mmap__read_init(struct perf_mmap *md);
-void perf_mmap__read_done(struct perf_mmap *map);
+int perf_mmap__read_init(struct mmap *md);
+void perf_mmap__read_done(struct mmap *map);
 #endif /*__PERF_MMAP_H */
index 99be15d..285d6f3 100644 (file)
 #include <string.h>
 #include <unistd.h>
 #include <asm/bug.h>
+#include <linux/kernel.h>
 #include <linux/zalloc.h>
 
+static const char *perf_ns__names[] = {
+       [NET_NS_INDEX]          = "net",
+       [UTS_NS_INDEX]          = "uts",
+       [IPC_NS_INDEX]          = "ipc",
+       [PID_NS_INDEX]          = "pid",
+       [USER_NS_INDEX]         = "user",
+       [MNT_NS_INDEX]          = "mnt",
+       [CGROUP_NS_INDEX]       = "cgroup",
+};
+
+const char *perf_ns__name(unsigned int id)
+{
+       if (id >= ARRAY_SIZE(perf_ns__names))
+               return "UNKNOWN";
+       return perf_ns__names[id];
+}
+
 struct namespaces *namespaces__new(struct perf_record_namespaces *event)
 {
        struct namespaces *namespaces;
index 40edef5..4b33f68 100644 (file)
@@ -66,4 +66,6 @@ static inline void __nsinfo__zput(struct nsinfo **nsip)
 
 #define nsinfo__zput(nsi) __nsinfo__zput(&nsi)
 
+const char *perf_ns__name(unsigned int id);
+
 #endif  /* __PERF_NAMESPACES_H */
index 5ec21d2..b5e2ade 100644 (file)
 #include "parse-events-flex.h"
 #include "pmu.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "probe-file.h"
 #include "asm/bug.h"
 #include "util/parse-branch-options.h"
 #include "metricgroup.h"
+#include "util/evsel_config.h"
+#include "util/event.h"
 
 #define MAX_NAME_LEN 100
 
@@ -335,7 +336,7 @@ __add_event(struct list_head *list, int *idx,
        (*idx)++;
        evsel->core.cpus   = perf_cpu_map__get(cpus);
        evsel->core.own_cpus = perf_cpu_map__get(cpus);
-       evsel->system_wide = pmu ? pmu->is_uncore : false;
+       evsel->core.system_wide = pmu ? pmu->is_uncore : false;
        evsel->auto_merge_stats = auto_merge_stats;
 
        if (name)
@@ -1936,7 +1937,7 @@ int parse_events(struct evlist *evlist, const char *str,
 
                perf_evlist__splice_list_tail(evlist, &parse_state.list);
                evlist->nr_groups += parse_state.nr_groups;
-               last = perf_evlist__last(evlist);
+               last = evlist__last(evlist);
                last->cmdline_group_boundary = true;
 
                return 0;
@@ -2050,7 +2051,7 @@ foreach_evsel_in_last_glob(struct evlist *evlist,
         * So no need to WARN here, let *func do this.
         */
        if (evlist->core.nr_entries > 0)
-               last = perf_evlist__last(evlist);
+               last = evlist__last(evlist);
 
        do {
                err = (*func)(last, arg);
index f1c36ed..48126ae 100644 (file)
@@ -9,13 +9,11 @@
 #define YYDEBUG 1
 
 #include <fnmatch.h>
+#include <stdio.h>
 #include <linux/compiler.h>
-#include <linux/list.h>
 #include <linux/types.h>
-#include "util.h"
 #include "pmu.h"
 #include "evsel.h"
-#include "debug.h"
 #include "parse-events.h"
 #include "parse-events-bison.h"
 
index e635c59..7a0ab35 100644 (file)
@@ -12,7 +12,6 @@
 #include <setjmp.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
-#include "util/util.h"
 #include "util/debug.h"
 #include "util/perf-hooks.h"
 
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
new file mode 100644 (file)
index 0000000..d4ad3f0
--- /dev/null
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include "util/evsel_fprintf.h"
+
+struct bit_names {
+       int bit;
+       const char *name;
+};
+
+static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits)
+{
+       bool first_bit = true;
+       int i = 0;
+
+       do {
+               if (value & bits[i].bit) {
+                       buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name);
+                       first_bit = false;
+               }
+       } while (bits[++i].name != NULL);
+}
+
+static void __p_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_##n, #n }
+       struct bit_names bits[] = {
+               bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
+               bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
+               bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
+               bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
+               bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
+               bit_name(WEIGHT), bit_name(PHYS_ADDR),
+               { .name = NULL, }
+       };
+#undef bit_name
+       __p_bits(buf, size, value, bits);
+}
+
+static void __p_branch_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n }
+       struct bit_names bits[] = {
+               bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY),
+               bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL),
+               bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
+               bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
+               bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
+               { .name = NULL, }
+       };
+#undef bit_name
+       __p_bits(buf, size, value, bits);
+}
+
+static void __p_read_format(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_FORMAT_##n, #n }
+       struct bit_names bits[] = {
+               bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
+               bit_name(ID), bit_name(GROUP),
+               { .name = NULL, }
+       };
+#undef bit_name
+       __p_bits(buf, size, value, bits);
+}
+
+#define BUF_SIZE               1024
+
+#define p_hex(val)             snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
+#define p_unsigned(val)                snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
+#define p_signed(val)          snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
+#define p_sample_type(val)     __p_sample_type(buf, BUF_SIZE, val)
+#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
+#define p_read_format(val)     __p_read_format(buf, BUF_SIZE, val)
+
+#define PRINT_ATTRn(_n, _f, _p)                                \
+do {                                                   \
+       if (attr->_f) {                                 \
+               _p(attr->_f);                           \
+               ret += attr__fprintf(fp, _n, buf, priv);\
+       }                                               \
+} while (0)
+
+#define PRINT_ATTRf(_f, _p)    PRINT_ATTRn(#_f, _f, _p)
+
+int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
+                            attr__fprintf_f attr__fprintf, void *priv)
+{
+       char buf[BUF_SIZE];
+       int ret = 0;
+
+       PRINT_ATTRf(type, p_unsigned);
+       PRINT_ATTRf(size, p_unsigned);
+       PRINT_ATTRf(config, p_hex);
+       PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned);
+       PRINT_ATTRf(sample_type, p_sample_type);
+       PRINT_ATTRf(read_format, p_read_format);
+
+       PRINT_ATTRf(disabled, p_unsigned);
+       PRINT_ATTRf(inherit, p_unsigned);
+       PRINT_ATTRf(pinned, p_unsigned);
+       PRINT_ATTRf(exclusive, p_unsigned);
+       PRINT_ATTRf(exclude_user, p_unsigned);
+       PRINT_ATTRf(exclude_kernel, p_unsigned);
+       PRINT_ATTRf(exclude_hv, p_unsigned);
+       PRINT_ATTRf(exclude_idle, p_unsigned);
+       PRINT_ATTRf(mmap, p_unsigned);
+       PRINT_ATTRf(comm, p_unsigned);
+       PRINT_ATTRf(freq, p_unsigned);
+       PRINT_ATTRf(inherit_stat, p_unsigned);
+       PRINT_ATTRf(enable_on_exec, p_unsigned);
+       PRINT_ATTRf(task, p_unsigned);
+       PRINT_ATTRf(watermark, p_unsigned);
+       PRINT_ATTRf(precise_ip, p_unsigned);
+       PRINT_ATTRf(mmap_data, p_unsigned);
+       PRINT_ATTRf(sample_id_all, p_unsigned);
+       PRINT_ATTRf(exclude_host, p_unsigned);
+       PRINT_ATTRf(exclude_guest, p_unsigned);
+       PRINT_ATTRf(exclude_callchain_kernel, p_unsigned);
+       PRINT_ATTRf(exclude_callchain_user, p_unsigned);
+       PRINT_ATTRf(mmap2, p_unsigned);
+       PRINT_ATTRf(comm_exec, p_unsigned);
+       PRINT_ATTRf(use_clockid, p_unsigned);
+       PRINT_ATTRf(context_switch, p_unsigned);
+       PRINT_ATTRf(write_backward, p_unsigned);
+       PRINT_ATTRf(namespaces, p_unsigned);
+       PRINT_ATTRf(ksymbol, p_unsigned);
+       PRINT_ATTRf(bpf_event, p_unsigned);
+       PRINT_ATTRf(aux_output, p_unsigned);
+
+       PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
+       PRINT_ATTRf(bp_type, p_unsigned);
+       PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
+       PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
+       PRINT_ATTRf(branch_sample_type, p_branch_sample_type);
+       PRINT_ATTRf(sample_regs_user, p_hex);
+       PRINT_ATTRf(sample_stack_user, p_unsigned);
+       PRINT_ATTRf(clockid, p_signed);
+       PRINT_ATTRf(sample_regs_intr, p_hex);
+       PRINT_ATTRf(aux_watermark, p_unsigned);
+       PRINT_ATTRf(sample_max_stack, p_unsigned);
+
+       return ret;
+}
index fb597fa..5608da8 100644 (file)
@@ -20,7 +20,6 @@
 #include "debug.h"
 #include "pmu.h"
 #include "parse-events.h"
-#include "cpumap.h"
 #include "header.h"
 #include "pmu-events/pmu-events.h"
 #include "string2.h"
index b8e0967..91cab5f 100644 (file)
@@ -2331,6 +2331,7 @@ void clear_probe_trace_event(struct probe_trace_event *tev)
                }
        }
        zfree(&tev->args);
+       tev->nargs = 0;
 }
 
 struct kprobe_blacklist_node {
index d13db55..b659466 100644 (file)
@@ -16,6 +16,7 @@
 #include "strlist.h"
 #include "strfilter.h"
 #include "debug.h"
+#include "build-id.h"
 #include "dso.h"
 #include "color.h"
 #include "symbol.h"
index 505905f..cd9f95e 100644 (file)
@@ -1245,6 +1245,17 @@ static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf,
        return n;
 }
 
+static bool trace_event_finder_overlap(struct trace_event_finder *tf)
+{
+       int i;
+
+       for (i = 0; i < tf->ntevs; i++) {
+               if (tf->pf.addr == tf->tevs[i].point.address)
+                       return true;
+       }
+       return false;
+}
+
 /* Add a found probe point into trace event list */
 static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
 {
@@ -1255,6 +1266,14 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
        struct perf_probe_arg *args = NULL;
        int ret, i;
 
+       /*
+        * For some reason (e.g. different column assigned to same address)
+        * This callback can be called with the address which already passed.
+        * Ignore it first.
+        */
+       if (trace_event_finder_overlap(tf))
+               return 0;
+
        /* Check number of tevs */
        if (tf->ntevs == tf->max_tevs) {
                pr_warning("Too many( > %d) probe point found.\n",
index c6dd478..9af1838 100644 (file)
@@ -10,6 +10,7 @@ util/python.c
 util/cap.c
 util/evlist.c
 util/evsel.c
+util/perf_event_attr_fprintf.c
 util/cpumap.c
 util/memswap.c
 util/mmap.c
index 07ca453..0246036 100644 (file)
@@ -6,17 +6,16 @@
 #include <linux/err.h>
 #include <perf/cpumap.h>
 #include <traceevent/event-parse.h>
-#include "debug.h"
 #include "evlist.h"
 #include "callchain.h"
 #include "evsel.h"
 #include "event.h"
-#include "cpumap.h"
 #include "print_binary.h"
 #include "thread_map.h"
 #include "trace-event.h"
 #include "mmap.h"
-#include "util.h"
+#include "util/env.h"
+#include <internal/lib.h>
 #include "../perf-sys.h"
 
 #if PY_MAJOR_VERSION < 3
@@ -55,12 +54,19 @@ int parse_callchain_record(const char *arg __maybe_unused,
        return 0;
 }
 
+/*
+ * Add this one here not to drag util/env.c
+ */
+struct perf_env perf_env;
+
 /*
  * Support debug printing even though util/debug.c is not linked.  That means
  * implementing 'verbose' and 'eprintf'.
  */
 int verbose;
 
+int eprintf(int level, int var, const char *fmt, ...);
+
 int eprintf(int level, int var, const char *fmt, ...)
 {
        va_list args;
@@ -884,7 +890,7 @@ static int pyrf_evlist__init(struct pyrf_evlist *pevlist,
 
 static void pyrf_evlist__delete(struct pyrf_evlist *pevlist)
 {
-       perf_evlist__exit(&pevlist->evlist);
+       evlist__exit(&pevlist->evlist);
        Py_TYPE(pevlist)->tp_free((PyObject*)pevlist);
 }
 
@@ -899,7 +905,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
                                         &pages, &overwrite))
                return NULL;
 
-       if (perf_evlist__mmap(evlist, pages) < 0) {
+       if (evlist__mmap(evlist, pages) < 0) {
                PyErr_SetFromErrno(PyExc_OSError);
                return NULL;
        }
@@ -918,7 +924,7 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
        if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
                return NULL;
 
-       n = perf_evlist__poll(evlist, timeout);
+       n = evlist__poll(evlist, timeout);
        if (n < 0) {
                PyErr_SetFromErrno(PyExc_OSError);
                return NULL;
@@ -935,17 +941,17 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
         PyObject *list = PyList_New(0);
        int i;
 
-       for (i = 0; i < evlist->pollfd.nr; ++i) {
+       for (i = 0; i < evlist->core.pollfd.nr; ++i) {
                PyObject *file;
 #if PY_MAJOR_VERSION < 3
-               FILE *fp = fdopen(evlist->pollfd.entries[i].fd, "r");
+               FILE *fp = fdopen(evlist->core.pollfd.entries[i].fd, "r");
 
                if (fp == NULL)
                        goto free_list;
 
                file = PyFile_FromFile(fp, "perf", "r", NULL);
 #else
-               file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1,
+               file = PyFile_FromFd(evlist->core.pollfd.entries[i].fd, "perf", "r", -1,
                                     NULL, NULL, NULL, 0);
 #endif
                if (file == NULL)
@@ -984,14 +990,14 @@ static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist,
        return Py_BuildValue("i", evlist->core.nr_entries);
 }
 
-static struct perf_mmap *get_md(struct evlist *evlist, int cpu)
+static struct mmap *get_md(struct evlist *evlist, int cpu)
 {
        int i;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
-               struct perf_mmap *md = &evlist->mmap[i];
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
+               struct mmap *md = &evlist->mmap[i];
 
-               if (md->cpu == cpu)
+               if (md->core.cpu == cpu)
                        return md;
        }
 
@@ -1005,7 +1011,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
        union perf_event *event;
        int sample_id_all = 1, cpu;
        static char *kwlist[] = { "cpu", "sample_id_all", NULL };
-       struct perf_mmap *md;
+       struct mmap *md;
        int err;
 
        if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
index 286fe81..8579505 100644 (file)
@@ -2,7 +2,6 @@
 #include "debug.h"
 #include "evlist.h"
 #include "evsel.h"
-#include "cpumap.h"
 #include "parse-events.h"
 #include <errno.h>
 #include <limits.h>
@@ -10,7 +9,6 @@
 #include <api/fs/fs.h>
 #include <subcmd/parse-options.h>
 #include <perf/cpumap.h>
-#include "util.h"
 #include "cloexec.h"
 #include "record.h"
 #include "../perf-sys.h"
@@ -32,7 +30,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
        if (parse_events(evlist, str, NULL))
                goto out_delete;
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
 
        while (1) {
                fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
@@ -173,7 +171,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
                use_sample_identifier = perf_can_sample_identifier();
                sample_id = true;
        } else if (evlist->core.nr_entries > 1) {
-               struct evsel *first = perf_evlist__first(evlist);
+               struct evsel *first = evlist__first(evlist);
 
                evlist__for_each_entry(evlist, evsel) {
                        if (evsel->core.attr.sample_type == first->core.attr.sample_type)
@@ -278,7 +276,7 @@ bool perf_evlist__can_select_event(struct evlist *evlist, const char *str)
        if (err)
                goto out_delete;
 
-       evsel = perf_evlist__last(temp_evlist);
+       evsel = evlist__last(temp_evlist);
 
        if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) {
                struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
index 5e52e7b..f3d29d8 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "util.h"
 #include "rwsem.h"
 
index 24a9990..6785cd8 100644 (file)
 #include <sys/stat.h>
 #include <sys/types.h>
 
-#include "cpumap.h"
 #include "color.h"
 #include "evsel.h"
 #include "evlist.h"
index 4d9593e..05b43ab 100644 (file)
@@ -22,7 +22,6 @@
 #include <asm/byteorder.h>
 
 #include "debug.h"
-#include "util.h"
 #include "session.h"
 #include "evlist.h"
 #include "color.h"
index 666a56e..5d341ef 100644 (file)
@@ -37,7 +37,6 @@
 #include "../dso.h"
 #include "../callchain.h"
 #include "../evsel.h"
-#include "../util.h"
 #include "../event.h"
 #include "../thread.h"
 #include "../comm.h"
@@ -49,7 +48,6 @@
 #include "map.h"
 #include "symbol.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "print_binary.h"
 #include "stat.h"
 #include "mem-events.h"
index e9e4a04..061bb4d 100644 (file)
@@ -22,7 +22,6 @@
 #include "symbol.h"
 #include "session.h"
 #include "tool.h"
-#include "cpumap.h"
 #include "perf_regs.h"
 #include "asm/bug.h"
 #include "auxtrace.h"
 #include "thread-stack.h"
 #include "sample-raw.h"
 #include "stat.h"
-#include "util.h"
 #include "ui/progress.h"
 #include "../perf.h"
 #include "arch/common.h"
+#include <internal/lib.h>
+#include <linux/err.h>
 
 #ifdef HAVE_ZSTD_SUPPORT
 static int perf_session__process_compressed_event(struct perf_session *session,
@@ -187,6 +187,7 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
 struct perf_session *perf_session__new(struct perf_data *data,
                                       bool repipe, struct perf_tool *tool)
 {
+       int ret = -ENOMEM;
        struct perf_session *session = zalloc(sizeof(*session));
 
        if (!session)
@@ -201,13 +202,15 @@ struct perf_session *perf_session__new(struct perf_data *data,
 
        perf_env__init(&session->header.env);
        if (data) {
-               if (perf_data__open(data))
+               ret = perf_data__open(data);
+               if (ret < 0)
                        goto out_delete;
 
                session->data = data;
 
                if (perf_data__is_read(data)) {
-                       if (perf_session__open(session) < 0)
+                       ret = perf_session__open(session);
+                       if (ret < 0)
                                goto out_delete;
 
                        /*
@@ -222,8 +225,11 @@ struct perf_session *perf_session__new(struct perf_data *data,
                        perf_evlist__init_trace_event_sample_raw(session->evlist);
 
                        /* Open the directory data. */
-                       if (data->is_dir && perf_data__open_dir(data))
+                       if (data->is_dir) {
+                               ret = perf_data__open_dir(data);
+                       if (ret)
                                goto out_delete;
+                       }
                }
        } else  {
                session->machines.host.env = &perf_env;
@@ -256,7 +262,7 @@ struct perf_session *perf_session__new(struct perf_data *data,
  out_delete:
        perf_session__delete(session);
  out:
-       return NULL;
+       return ERR_PTR(ret);
 }
 
 static void perf_session__delete_threads(struct perf_session *session)
@@ -1317,6 +1323,7 @@ static int deliver_sample_value(struct evlist *evlist,
                                struct machine *machine)
 {
        struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id);
+       struct evsel *evsel;
 
        if (sid) {
                sample->id     = v->id;
@@ -1336,7 +1343,8 @@ static int deliver_sample_value(struct evlist *evlist,
        if (!sample->period)
                return 0;
 
-       return tool->sample(tool, event, sample, sid->evsel, machine);
+       evsel = container_of(sid->evsel, struct evsel, core);
+       return tool->sample(tool, event, sample, evsel, machine);
 }
 
 static int deliver_sample_group(struct evlist *evlist,
@@ -2412,73 +2420,3 @@ int perf_event__process_id_index(struct perf_session *session,
        }
        return 0;
 }
-
-int perf_event__synthesize_id_index(struct perf_tool *tool,
-                                   perf_event__handler_t process,
-                                   struct evlist *evlist,
-                                   struct machine *machine)
-{
-       union perf_event *ev;
-       struct evsel *evsel;
-       size_t nr = 0, i = 0, sz, max_nr, n;
-       int err;
-
-       pr_debug2("Synthesizing id index\n");
-
-       max_nr = (UINT16_MAX - sizeof(struct perf_record_id_index)) /
-                sizeof(struct id_index_entry);
-
-       evlist__for_each_entry(evlist, evsel)
-               nr += evsel->ids;
-
-       n = nr > max_nr ? max_nr : nr;
-       sz = sizeof(struct perf_record_id_index) + n * sizeof(struct id_index_entry);
-       ev = zalloc(sz);
-       if (!ev)
-               return -ENOMEM;
-
-       ev->id_index.header.type = PERF_RECORD_ID_INDEX;
-       ev->id_index.header.size = sz;
-       ev->id_index.nr = n;
-
-       evlist__for_each_entry(evlist, evsel) {
-               u32 j;
-
-               for (j = 0; j < evsel->ids; j++) {
-                       struct id_index_entry *e;
-                       struct perf_sample_id *sid;
-
-                       if (i >= n) {
-                               err = process(tool, ev, NULL, machine);
-                               if (err)
-                                       goto out_err;
-                               nr -= n;
-                               i = 0;
-                       }
-
-                       e = &ev->id_index.entries[i++];
-
-                       e->id = evsel->id[j];
-
-                       sid = perf_evlist__id2sid(evlist, e->id);
-                       if (!sid) {
-                               free(ev);
-                               return -ENOENT;
-                       }
-
-                       e->idx = sid->idx;
-                       e->cpu = sid->cpu;
-                       e->tid = sid->tid;
-               }
-       }
-
-       sz = sizeof(struct perf_record_id_index) + nr * sizeof(struct id_index_entry);
-       ev->id_index.header.size = sz;
-       ev->id_index.nr = nr;
-
-       err = process(tool, ev, NULL, machine);
-out_err:
-       free(ev);
-
-       return err;
-}
index b7aa076..b4c9428 100644 (file)
@@ -138,9 +138,4 @@ int perf_session__deliver_synth_event(struct perf_session *session,
 int perf_event__process_id_index(struct perf_session *session,
                                 union perf_event *event);
 
-int perf_event__synthesize_id_index(struct perf_tool *tool,
-                                   perf_event__handler_t process,
-                                   struct evlist *evlist,
-                                   struct machine *machine);
-
 #endif /* __PERF_SESSION_H */
index a2308eb..43d1d41 100644 (file)
@@ -2329,7 +2329,7 @@ static struct evsel *find_evsel(struct evlist *evlist, char *event_name)
                if (nr > evlist->core.nr_entries)
                        return NULL;
 
-               evsel = perf_evlist__first(evlist);
+               evsel = evlist__first(evlist);
                while (--nr > 0)
                        evsel = perf_evsel__next(evsel);
 
index adfcf1f..d84ed8b 100644 (file)
@@ -15,7 +15,7 @@
 #include <string.h>
 #include "srccode.h"
 #include "debug.h"
-#include "util.h"
+#include <internal/lib.h> // page_size
 
 #define MAXSRCCACHE (32*1024*1024)
 #define MAXSRCFILES     64
index 70c87fd..2c41d47 100644 (file)
@@ -738,6 +738,8 @@ static void generic_metric(struct perf_stat_config *config,
        char *n, *pn;
 
        expr__ctx_init(&pctx);
+       /* Must be first id entry */
+       expr__add_id(&pctx, name, avg);
        for (i = 0; metric_events[i]; i++) {
                struct saved_value *v;
                struct stats *stats;
@@ -776,8 +778,6 @@ static void generic_metric(struct perf_stat_config *config,
                        expr__add_id(&pctx, n, avg_stats(stats)*scale);
        }
 
-       expr__add_id(&pctx, name, avg);
-
        if (!metric_events[i]) {
                const char *p = metric_expr;
 
index 8f1ea27..ebdd130 100644 (file)
@@ -4,6 +4,7 @@
 #include <math.h>
 #include <string.h>
 #include "counts.h"
+#include "cpumap.h"
 #include "debug.h"
 #include "header.h"
 #include "stat.h"
@@ -161,6 +162,15 @@ static void perf_evsel__free_prev_raw_counts(struct evsel *evsel)
        evsel->prev_raw_counts = NULL;
 }
 
+static void perf_evsel__reset_prev_raw_counts(struct evsel *evsel)
+{
+       if (evsel->prev_raw_counts) {
+               evsel->prev_raw_counts->aggr.val = 0;
+               evsel->prev_raw_counts->aggr.ena = 0;
+               evsel->prev_raw_counts->aggr.run = 0;
+       }
+}
+
 static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
 {
        int ncpus = perf_evsel__nr_cpus(evsel);
@@ -211,6 +221,14 @@ void perf_evlist__reset_stats(struct evlist *evlist)
        }
 }
 
+void perf_evlist__reset_prev_raw_counts(struct evlist *evlist)
+{
+       struct evsel *evsel;
+
+       evlist__for_each_entry(evlist, evsel)
+               perf_evsel__reset_prev_raw_counts(evsel);
+}
+
 static void zero_per_pkg(struct evsel *counter)
 {
        if (counter->per_pkg_mask)
@@ -318,7 +336,7 @@ static int process_counter_maps(struct perf_stat_config *config,
        int ncpus = perf_evsel__nr_cpus(counter);
        int cpu, thread;
 
-       if (counter->system_wide)
+       if (counter->core.system_wide)
                nthreads = 1;
 
        for (thread = 0; thread < nthreads; thread++) {
@@ -493,45 +511,3 @@ int create_perf_stat_counter(struct evsel *evsel,
 
        return perf_evsel__open_per_thread(evsel, evsel->core.threads);
 }
-
-int perf_stat_synthesize_config(struct perf_stat_config *config,
-                               struct perf_tool *tool,
-                               struct evlist *evlist,
-                               perf_event__handler_t process,
-                               bool attrs)
-{
-       int err;
-
-       if (attrs) {
-               err = perf_event__synthesize_attrs(tool, evlist, process);
-               if (err < 0) {
-                       pr_err("Couldn't synthesize attrs.\n");
-                       return err;
-               }
-       }
-
-       err = perf_event__synthesize_extra_attr(tool, evlist, process,
-                                               attrs);
-
-       err = perf_event__synthesize_thread_map2(tool, evlist->core.threads,
-                                                process, NULL);
-       if (err < 0) {
-               pr_err("Couldn't synthesize thread map.\n");
-               return err;
-       }
-
-       err = perf_event__synthesize_cpu_map(tool, evlist->core.cpus,
-                                            process, NULL);
-       if (err < 0) {
-               pr_err("Couldn't synthesize thread map.\n");
-               return err;
-       }
-
-       err = perf_event__synthesize_stat_config(tool, config, process, NULL);
-       if (err < 0) {
-               pr_err("Couldn't synthesize config.\n");
-               return err;
-       }
-
-       return 0;
-}
index 14fe3e5..edbeb2f 100644 (file)
@@ -7,8 +7,9 @@
 #include <sys/types.h>
 #include <sys/resource.h>
 #include "rblist.h"
-#include "event.h"
 
+struct perf_cpu_map;
+struct perf_stat_config;
 struct timespec;
 
 struct stats {
@@ -192,6 +193,7 @@ void perf_stat__collect_metric_expr(struct evlist *);
 int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
 void perf_evlist__free_stats(struct evlist *evlist);
 void perf_evlist__reset_stats(struct evlist *evlist);
+void perf_evlist__reset_prev_raw_counts(struct evlist *evlist);
 
 int perf_stat_process_counter(struct perf_stat_config *config,
                              struct evsel *counter);
@@ -210,11 +212,6 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
 int create_perf_stat_counter(struct evsel *evsel,
                             struct perf_stat_config *config,
                             struct target *target);
-int perf_stat_synthesize_config(struct perf_stat_config *config,
-                               struct perf_tool *tool,
-                               struct evlist *evlist,
-                               perf_event__handler_t process,
-                               bool attrs);
 void
 perf_evlist__print_counters(struct evlist *evlist,
                            struct perf_stat_config *config,
index 582f4a6..96f941e 100644 (file)
 #include <linux/string.h>
 #include <linux/time64.h>
 #include <linux/zalloc.h>
+#include <internal/cpumap.h>
 #include <perf/cpumap.h>
 
 #include "env.h"
 #include "svghelper.h"
-#include "cpumap.h"
 
 static u64 first_time, last_time;
 static u64 turbo_frequency, max_freq;
index 9428639..66f4be1 100644 (file)
@@ -7,6 +7,7 @@
 #include <unistd.h>
 #include <inttypes.h>
 
+#include "dso.h"
 #include "map.h"
 #include "map_groups.h"
 #include "symbol.h"
 #include "machine.h"
 #include "vdso.h"
 #include "debug.h"
-#include "util.h"
+#include "util/copyfile.h"
 #include <linux/ctype.h>
+#include <linux/kernel.h>
 #include <linux/zalloc.h>
 #include <symbol/kallsyms.h>
+#include <internal/lib.h>
 
 #ifndef EM_AARCH64
 #define EM_AARCH64     183  /* ARM 64 bit */
index 7e2813e..d6e99af 100644 (file)
@@ -1,8 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
 #include "dso.h"
 #include "symbol.h"
 #include "symsrc.h"
-#include "util.h"
 
 #include <errno.h>
 #include <unistd.h>
@@ -13,6 +11,7 @@
 #include <byteswap.h>
 #include <sys/stat.h>
 #include <linux/zalloc.h>
+#include <internal/lib.h>
 
 static bool check_need_swap(int file_endian)
 {
index 765c75d..a8f80e4 100644 (file)
@@ -19,7 +19,7 @@
 #include "build-id.h"
 #include "cap.h"
 #include "dso.h"
-#include "util.h"
+#include "util.h" // lsdir()
 #include "debug.h"
 #include "event.h"
 #include "machine.h"
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
new file mode 100644 (file)
index 0000000..807cbca
--- /dev/null
@@ -0,0 +1,1884 @@
+// SPDX-License-Identifier: GPL-2.0-only 
+
+#include "util/debug.h"
+#include "util/dso.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/map.h"
+#include "util/map_symbol.h"
+#include "util/branch.h"
+#include "util/memswap.h"
+#include "util/namespaces.h"
+#include "util/session.h"
+#include "util/stat.h"
+#include "util/symbol.h"
+#include "util/synthetic-events.h"
+#include "util/target.h"
+#include "util/time-utils.h"
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <linux/perf_event.h>
+#include <asm/bug.h>
+#include <perf/evsel.h>
+#include <internal/cpumap.h>
+#include <perf/cpumap.h>
+#include <internal/lib.h> // page_size
+#include <internal/threadmap.h>
+#include <perf/threadmap.h>
+#include <symbol/kallsyms.h>
+#include <dirent.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include <api/fs/fs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500
+
+unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT;
+
+int perf_tool__process_synth_event(struct perf_tool *tool,
+                                  union perf_event *event,
+                                  struct machine *machine,
+                                  perf_event__handler_t process)
+{
+       struct perf_sample synth_sample = {
+               .pid       = -1,
+               .tid       = -1,
+               .time      = -1,
+               .stream_id = -1,
+               .cpu       = -1,
+               .period    = 1,
+               .cpumode   = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK,
+       };
+
+       return process(tool, event, &synth_sample, machine);
+};
+
+/*
+ * Assumes that the first 4095 bytes of /proc/pid/stat contains
+ * the comm, tgid and ppid.
+ */
+static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
+                                   pid_t *tgid, pid_t *ppid)
+{
+       char filename[PATH_MAX];
+       char bf[4096];
+       int fd;
+       size_t size = 0;
+       ssize_t n;
+       char *name, *tgids, *ppids;
+
+       *tgid = -1;
+       *ppid = -1;
+
+       snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
+
+       fd = open(filename, O_RDONLY);
+       if (fd < 0) {
+               pr_debug("couldn't open %s\n", filename);
+               return -1;
+       }
+
+       n = read(fd, bf, sizeof(bf) - 1);
+       close(fd);
+       if (n <= 0) {
+               pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n",
+                          pid);
+               return -1;
+       }
+       bf[n] = '\0';
+
+       name = strstr(bf, "Name:");
+       tgids = strstr(bf, "Tgid:");
+       ppids = strstr(bf, "PPid:");
+
+       if (name) {
+               char *nl;
+
+               name = skip_spaces(name + 5);  /* strlen("Name:") */
+               nl = strchr(name, '\n');
+               if (nl)
+                       *nl = '\0';
+
+               size = strlen(name);
+               if (size >= len)
+                       size = len - 1;
+               memcpy(comm, name, size);
+               comm[size] = '\0';
+       } else {
+               pr_debug("Name: string not found for pid %d\n", pid);
+       }
+
+       if (tgids) {
+               tgids += 5;  /* strlen("Tgid:") */
+               *tgid = atoi(tgids);
+       } else {
+               pr_debug("Tgid: string not found for pid %d\n", pid);
+       }
+
+       if (ppids) {
+               ppids += 5;  /* strlen("PPid:") */
+               *ppid = atoi(ppids);
+       } else {
+               pr_debug("PPid: string not found for pid %d\n", pid);
+       }
+
+       return 0;
+}
+
+static int perf_event__prepare_comm(union perf_event *event, pid_t pid,
+                                   struct machine *machine,
+                                   pid_t *tgid, pid_t *ppid)
+{
+       size_t size;
+
+       *ppid = -1;
+
+       memset(&event->comm, 0, sizeof(event->comm));
+
+       if (machine__is_host(machine)) {
+               if (perf_event__get_comm_ids(pid, event->comm.comm,
+                                            sizeof(event->comm.comm),
+                                            tgid, ppid) != 0) {
+                       return -1;
+               }
+       } else {
+               *tgid = machine->pid;
+       }
+
+       if (*tgid < 0)
+               return -1;
+
+       event->comm.pid = *tgid;
+       event->comm.header.type = PERF_RECORD_COMM;
+
+       size = strlen(event->comm.comm) + 1;
+       size = PERF_ALIGN(size, sizeof(u64));
+       memset(event->comm.comm + size, 0, machine->id_hdr_size);
+       event->comm.header.size = (sizeof(event->comm) -
+                               (sizeof(event->comm.comm) - size) +
+                               machine->id_hdr_size);
+       event->comm.tid = pid;
+
+       return 0;
+}
+
+pid_t perf_event__synthesize_comm(struct perf_tool *tool,
+                                        union perf_event *event, pid_t pid,
+                                        perf_event__handler_t process,
+                                        struct machine *machine)
+{
+       pid_t tgid, ppid;
+
+       if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0)
+               return -1;
+
+       if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+               return -1;
+
+       return tgid;
+}
+
+static void perf_event__get_ns_link_info(pid_t pid, const char *ns,
+                                        struct perf_ns_link_info *ns_link_info)
+{
+       struct stat64 st;
+       char proc_ns[128];
+
+       sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns);
+       if (stat64(proc_ns, &st) == 0) {
+               ns_link_info->dev = st.st_dev;
+               ns_link_info->ino = st.st_ino;
+       }
+}
+
+int perf_event__synthesize_namespaces(struct perf_tool *tool,
+                                     union perf_event *event,
+                                     pid_t pid, pid_t tgid,
+                                     perf_event__handler_t process,
+                                     struct machine *machine)
+{
+       u32 idx;
+       struct perf_ns_link_info *ns_link_info;
+
+       if (!tool || !tool->namespace_events)
+               return 0;
+
+       memset(&event->namespaces, 0, (sizeof(event->namespaces) +
+              (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+              machine->id_hdr_size));
+
+       event->namespaces.pid = tgid;
+       event->namespaces.tid = pid;
+
+       event->namespaces.nr_namespaces = NR_NAMESPACES;
+
+       ns_link_info = event->namespaces.link_info;
+
+       for (idx = 0; idx < event->namespaces.nr_namespaces; idx++)
+               perf_event__get_ns_link_info(pid, perf_ns__name(idx),
+                                            &ns_link_info[idx]);
+
+       event->namespaces.header.type = PERF_RECORD_NAMESPACES;
+
+       event->namespaces.header.size = (sizeof(event->namespaces) +
+                       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+                       machine->id_hdr_size);
+
+       if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+               return -1;
+
+       return 0;
+}
+
+static int perf_event__synthesize_fork(struct perf_tool *tool,
+                                      union perf_event *event,
+                                      pid_t pid, pid_t tgid, pid_t ppid,
+                                      perf_event__handler_t process,
+                                      struct machine *machine)
+{
+       memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
+
+       /*
+        * for main thread set parent to ppid from status file. For other
+        * threads set parent pid to main thread. ie., assume main thread
+        * spawns all threads in a process
+       */
+       if (tgid == pid) {
+               event->fork.ppid = ppid;
+               event->fork.ptid = ppid;
+       } else {
+               event->fork.ppid = tgid;
+               event->fork.ptid = tgid;
+       }
+       event->fork.pid  = tgid;
+       event->fork.tid  = pid;
+       event->fork.header.type = PERF_RECORD_FORK;
+       event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC;
+
+       event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
+
+       if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+               return -1;
+
+       return 0;
+}
+
+int perf_event__synthesize_mmap_events(struct perf_tool *tool,
+                                      union perf_event *event,
+                                      pid_t pid, pid_t tgid,
+                                      perf_event__handler_t process,
+                                      struct machine *machine,
+                                      bool mmap_data)
+{
+       char filename[PATH_MAX];
+       FILE *fp;
+       unsigned long long t;
+       bool truncation = false;
+       unsigned long long timeout = proc_map_timeout * 1000000ULL;
+       int rc = 0;
+       const char *hugetlbfs_mnt = hugetlbfs__mountpoint();
+       int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0;
+
+       if (machine__is_default_guest(machine))
+               return 0;
+
+       snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps",
+                machine->root_dir, pid, pid);
+
+       fp = fopen(filename, "r");
+       if (fp == NULL) {
+               /*
+                * We raced with a task exiting - just return:
+                */
+               pr_debug("couldn't open %s\n", filename);
+               return -1;
+       }
+
+       event->header.type = PERF_RECORD_MMAP2;
+       t = rdclock();
+
+       while (1) {
+               char bf[BUFSIZ];
+               char prot[5];
+               char execname[PATH_MAX];
+               char anonstr[] = "//anon";
+               unsigned int ino;
+               size_t size;
+               ssize_t n;
+
+               if (fgets(bf, sizeof(bf), fp) == NULL)
+                       break;
+
+               if ((rdclock() - t) > timeout) {
+                       pr_warning("Reading %s time out. "
+                                  "You may want to increase "
+                                  "the time limit by --proc-map-timeout\n",
+                                  filename);
+                       truncation = true;
+                       goto out;
+               }
+
+               /* ensure null termination since stack will be reused. */
+               strcpy(execname, "");
+
+               /* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
+               n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n",
+                      &event->mmap2.start, &event->mmap2.len, prot,
+                      &event->mmap2.pgoff, &event->mmap2.maj,
+                      &event->mmap2.min,
+                      &ino, execname);
+
+               /*
+                * Anon maps don't have the execname.
+                */
+               if (n < 7)
+                       continue;
+
+               event->mmap2.ino = (u64)ino;
+
+               /*
+                * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
+                */
+               if (machine__is_host(machine))
+                       event->header.misc = PERF_RECORD_MISC_USER;
+               else
+                       event->header.misc = PERF_RECORD_MISC_GUEST_USER;
+
+               /* map protection and flags bits */
+               event->mmap2.prot = 0;
+               event->mmap2.flags = 0;
+               if (prot[0] == 'r')
+                       event->mmap2.prot |= PROT_READ;
+               if (prot[1] == 'w')
+                       event->mmap2.prot |= PROT_WRITE;
+               if (prot[2] == 'x')
+                       event->mmap2.prot |= PROT_EXEC;
+
+               if (prot[3] == 's')
+                       event->mmap2.flags |= MAP_SHARED;
+               else
+                       event->mmap2.flags |= MAP_PRIVATE;
+
+               if (prot[2] != 'x') {
+                       if (!mmap_data || prot[0] != 'r')
+                               continue;
+
+                       event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
+               }
+
+out:
+               if (truncation)
+                       event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
+
+               if (!strcmp(execname, ""))
+                       strcpy(execname, anonstr);
+
+               if (hugetlbfs_mnt_len &&
+                   !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
+                       strcpy(execname, anonstr);
+                       event->mmap2.flags |= MAP_HUGETLB;
+               }
+
+               size = strlen(execname) + 1;
+               memcpy(event->mmap2.filename, execname, size);
+               size = PERF_ALIGN(size, sizeof(u64));
+               event->mmap2.len -= event->mmap.start;
+               event->mmap2.header.size = (sizeof(event->mmap2) -
+                                       (sizeof(event->mmap2.filename) - size));
+               memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
+               event->mmap2.header.size += machine->id_hdr_size;
+               event->mmap2.pid = tgid;
+               event->mmap2.tid = pid;
+
+               if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
+                       rc = -1;
+                       break;
+               }
+
+               if (truncation)
+                       break;
+       }
+
+       fclose(fp);
+       return rc;
+}
+
+int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process,
+                                  struct machine *machine)
+{
+       int rc = 0;
+       struct map *pos;
+       struct maps *maps = machine__kernel_maps(machine);
+       union perf_event *event = zalloc((sizeof(event->mmap) +
+                                         machine->id_hdr_size));
+       if (event == NULL) {
+               pr_debug("Not enough memory synthesizing mmap event "
+                        "for kernel modules\n");
+               return -1;
+       }
+
+       event->header.type = PERF_RECORD_MMAP;
+
+       /*
+        * kernel uses 0 for user space maps, see kernel/perf_event.c
+        * __perf_event_mmap
+        */
+       if (machine__is_host(machine))
+               event->header.misc = PERF_RECORD_MISC_KERNEL;
+       else
+               event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+
+       for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+               size_t size;
+
+               if (!__map__is_kmodule(pos))
+                       continue;
+
+               size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
+               event->mmap.header.type = PERF_RECORD_MMAP;
+               event->mmap.header.size = (sizeof(event->mmap) -
+                                       (sizeof(event->mmap.filename) - size));
+               memset(event->mmap.filename + size, 0, machine->id_hdr_size);
+               event->mmap.header.size += machine->id_hdr_size;
+               event->mmap.start = pos->start;
+               event->mmap.len   = pos->end - pos->start;
+               event->mmap.pid   = machine->pid;
+
+               memcpy(event->mmap.filename, pos->dso->long_name,
+                      pos->dso->long_name_len + 1);
+               if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
+                       rc = -1;
+                       break;
+               }
+       }
+
+       free(event);
+       return rc;
+}
+
+static int __event__synthesize_thread(union perf_event *comm_event,
+                                     union perf_event *mmap_event,
+                                     union perf_event *fork_event,
+                                     union perf_event *namespaces_event,
+                                     pid_t pid, int full, perf_event__handler_t process,
+                                     struct perf_tool *tool, struct machine *machine, bool mmap_data)
+{
+       char filename[PATH_MAX];
+       DIR *tasks;
+       struct dirent *dirent;
+       pid_t tgid, ppid;
+       int rc = 0;
+
+       /* special case: only send one comm event using passed in pid */
+       if (!full) {
+               tgid = perf_event__synthesize_comm(tool, comm_event, pid,
+                                                  process, machine);
+
+               if (tgid == -1)
+                       return -1;
+
+               if (perf_event__synthesize_namespaces(tool, namespaces_event, pid,
+                                                     tgid, process, machine) < 0)
+                       return -1;
+
+               /*
+                * send mmap only for thread group leader
+                * see thread__init_map_groups
+                */
+               if (pid == tgid &&
+                   perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
+                                                      process, machine, mmap_data))
+                       return -1;
+
+               return 0;
+       }
+
+       if (machine__is_default_guest(machine))
+               return 0;
+
+       snprintf(filename, sizeof(filename), "%s/proc/%d/task",
+                machine->root_dir, pid);
+
+       tasks = opendir(filename);
+       if (tasks == NULL) {
+               pr_debug("couldn't open %s\n", filename);
+               return 0;
+       }
+
+       while ((dirent = readdir(tasks)) != NULL) {
+               char *end;
+               pid_t _pid;
+
+               _pid = strtol(dirent->d_name, &end, 10);
+               if (*end)
+                       continue;
+
+               rc = -1;
+               if (perf_event__prepare_comm(comm_event, _pid, machine,
+                                            &tgid, &ppid) != 0)
+                       break;
+
+               if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
+                                               ppid, process, machine) < 0)
+                       break;
+
+               if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid,
+                                                     tgid, process, machine) < 0)
+                       break;
+
+               /*
+                * Send the prepared comm event
+                */
+               if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0)
+                       break;
+
+               rc = 0;
+               if (_pid == pid) {
+                       /* process the parent's maps too */
+                       rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
+                                               process, machine, mmap_data);
+                       if (rc)
+                               break;
+               }
+       }
+
+       closedir(tasks);
+       return rc;
+}
+
+int perf_event__synthesize_thread_map(struct perf_tool *tool,
+                                     struct perf_thread_map *threads,
+                                     perf_event__handler_t process,
+                                     struct machine *machine,
+                                     bool mmap_data)
+{
+       union perf_event *comm_event, *mmap_event, *fork_event;
+       union perf_event *namespaces_event;
+       int err = -1, thread, j;
+
+       comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
+       if (comm_event == NULL)
+               goto out;
+
+       mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
+       if (mmap_event == NULL)
+               goto out_free_comm;
+
+       fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
+       if (fork_event == NULL)
+               goto out_free_mmap;
+
+       namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+                                 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+                                 machine->id_hdr_size);
+       if (namespaces_event == NULL)
+               goto out_free_fork;
+
+       err = 0;
+       for (thread = 0; thread < threads->nr; ++thread) {
+               if (__event__synthesize_thread(comm_event, mmap_event,
+                                              fork_event, namespaces_event,
+                                              perf_thread_map__pid(threads, thread), 0,
+                                              process, tool, machine,
+                                              mmap_data)) {
+                       err = -1;
+                       break;
+               }
+
+               /*
+                * comm.pid is set to thread group id by
+                * perf_event__synthesize_comm
+                */
+               if ((int) comm_event->comm.pid != perf_thread_map__pid(threads, thread)) {
+                       bool need_leader = true;
+
+                       /* is thread group leader in thread_map? */
+                       for (j = 0; j < threads->nr; ++j) {
+                               if ((int) comm_event->comm.pid == perf_thread_map__pid(threads, j)) {
+                                       need_leader = false;
+                                       break;
+                               }
+                       }
+
+                       /* if not, generate events for it */
+                       if (need_leader &&
+                           __event__synthesize_thread(comm_event, mmap_event,
+                                                      fork_event, namespaces_event,
+                                                      comm_event->comm.pid, 0,
+                                                      process, tool, machine,
+                                                      mmap_data)) {
+                               err = -1;
+                               break;
+                       }
+               }
+       }
+       free(namespaces_event);
+out_free_fork:
+       free(fork_event);
+out_free_mmap:
+       free(mmap_event);
+out_free_comm:
+       free(comm_event);
+out:
+       return err;
+}
+
+static int __perf_event__synthesize_threads(struct perf_tool *tool,
+                                           perf_event__handler_t process,
+                                           struct machine *machine,
+                                           bool mmap_data,
+                                           struct dirent **dirent,
+                                           int start,
+                                           int num)
+{
+       union perf_event *comm_event, *mmap_event, *fork_event;
+       union perf_event *namespaces_event;
+       int err = -1;
+       char *end;
+       pid_t pid;
+       int i;
+
+       comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
+       if (comm_event == NULL)
+               goto out;
+
+       mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
+       if (mmap_event == NULL)
+               goto out_free_comm;
+
+       fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
+       if (fork_event == NULL)
+               goto out_free_mmap;
+
+       namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+                                 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+                                 machine->id_hdr_size);
+       if (namespaces_event == NULL)
+               goto out_free_fork;
+
+       for (i = start; i < start + num; i++) {
+               if (!isdigit(dirent[i]->d_name[0]))
+                       continue;
+
+               pid = (pid_t)strtol(dirent[i]->d_name, &end, 10);
+               /* only interested in proper numerical dirents */
+               if (*end)
+                       continue;
+               /*
+                * We may race with exiting thread, so don't stop just because
+                * one thread couldn't be synthesized.
+                */
+               __event__synthesize_thread(comm_event, mmap_event, fork_event,
+                                          namespaces_event, pid, 1, process,
+                                          tool, machine, mmap_data);
+       }
+       err = 0;
+
+       free(namespaces_event);
+out_free_fork:
+       free(fork_event);
+out_free_mmap:
+       free(mmap_event);
+out_free_comm:
+       free(comm_event);
+out:
+       return err;
+}
+
+struct synthesize_threads_arg {
+       struct perf_tool *tool;
+       perf_event__handler_t process;
+       struct machine *machine;
+       bool mmap_data;
+       struct dirent **dirent;
+       int num;
+       int start;
+};
+
+static void *synthesize_threads_worker(void *arg)
+{
+       struct synthesize_threads_arg *args = arg;
+
+       __perf_event__synthesize_threads(args->tool, args->process,
+                                        args->machine, args->mmap_data,
+                                        args->dirent,
+                                        args->start, args->num);
+       return NULL;
+}
+
+int perf_event__synthesize_threads(struct perf_tool *tool,
+                                  perf_event__handler_t process,
+                                  struct machine *machine,
+                                  bool mmap_data,
+                                  unsigned int nr_threads_synthesize)
+{
+       struct synthesize_threads_arg *args = NULL;
+       pthread_t *synthesize_threads = NULL;
+       char proc_path[PATH_MAX];
+       struct dirent **dirent;
+       int num_per_thread;
+       int m, n, i, j;
+       int thread_nr;
+       int base = 0;
+       int err = -1;
+
+
+       if (machine__is_default_guest(machine))
+               return 0;
+
+       snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
+       n = scandir(proc_path, &dirent, 0, alphasort);
+       if (n < 0)
+               return err;
+
+       if (nr_threads_synthesize == UINT_MAX)
+               thread_nr = sysconf(_SC_NPROCESSORS_ONLN);
+       else
+               thread_nr = nr_threads_synthesize;
+
+       if (thread_nr <= 1) {
+               err = __perf_event__synthesize_threads(tool, process,
+                                                      machine, mmap_data,
+                                                      dirent, base, n);
+               goto free_dirent;
+       }
+       if (thread_nr > n)
+               thread_nr = n;
+
+       synthesize_threads = calloc(sizeof(pthread_t), thread_nr);
+       if (synthesize_threads == NULL)
+               goto free_dirent;
+
+       args = calloc(sizeof(*args), thread_nr);
+       if (args == NULL)
+               goto free_threads;
+
+       num_per_thread = n / thread_nr;
+       m = n % thread_nr;
+       for (i = 0; i < thread_nr; i++) {
+               args[i].tool = tool;
+               args[i].process = process;
+               args[i].machine = machine;
+               args[i].mmap_data = mmap_data;
+               args[i].dirent = dirent;
+       }
+       for (i = 0; i < m; i++) {
+               args[i].num = num_per_thread + 1;
+               args[i].start = i * args[i].num;
+       }
+       if (i != 0)
+               base = args[i-1].start + args[i-1].num;
+       for (j = i; j < thread_nr; j++) {
+               args[j].num = num_per_thread;
+               args[j].start = base + (j - i) * args[i].num;
+       }
+
+       for (i = 0; i < thread_nr; i++) {
+               if (pthread_create(&synthesize_threads[i], NULL,
+                                  synthesize_threads_worker, &args[i]))
+                       goto out_join;
+       }
+       err = 0;
+out_join:
+       for (i = 0; i < thread_nr; i++)
+               pthread_join(synthesize_threads[i], NULL);
+       free(args);
+free_threads:
+       free(synthesize_threads);
+free_dirent:
+       for (i = 0; i < n; i++)
+               zfree(&dirent[i]);
+       free(dirent);
+
+       return err;
+}
+
+int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused,
+                                             perf_event__handler_t process __maybe_unused,
+                                             struct machine *machine __maybe_unused)
+{
+       return 0;
+}
+
+static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+                                               perf_event__handler_t process,
+                                               struct machine *machine)
+{
+       size_t size;
+       struct map *map = machine__kernel_map(machine);
+       struct kmap *kmap;
+       int err;
+       union perf_event *event;
+
+       if (map == NULL)
+               return -1;
+
+       kmap = map__kmap(map);
+       if (!kmap->ref_reloc_sym)
+               return -1;
+
+       /*
+        * We should get this from /sys/kernel/sections/.text, but till that is
+        * available use this, and after it is use this as a fallback for older
+        * kernels.
+        */
+       event = zalloc((sizeof(event->mmap) + machine->id_hdr_size));
+       if (event == NULL) {
+               pr_debug("Not enough memory synthesizing mmap event "
+                        "for kernel modules\n");
+               return -1;
+       }
+
+       if (machine__is_host(machine)) {
+               /*
+                * kernel uses PERF_RECORD_MISC_USER for user space maps,
+                * see kernel/perf_event.c __perf_event_mmap
+                */
+               event->header.misc = PERF_RECORD_MISC_KERNEL;
+       } else {
+               event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+       }
+
+       size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
+                       "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
+       size = PERF_ALIGN(size, sizeof(u64));
+       event->mmap.header.type = PERF_RECORD_MMAP;
+       event->mmap.header.size = (sizeof(event->mmap) -
+                       (sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
+       event->mmap.pgoff = kmap->ref_reloc_sym->addr;
+       event->mmap.start = map->start;
+       event->mmap.len   = map->end - event->mmap.start;
+       event->mmap.pid   = machine->pid;
+
+       err = perf_tool__process_synth_event(tool, event, machine, process);
+       free(event);
+
+       return err;
+}
+
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+                                      perf_event__handler_t process,
+                                      struct machine *machine)
+{
+       int err;
+
+       err = __perf_event__synthesize_kernel_mmap(tool, process, machine);
+       if (err < 0)
+               return err;
+
+       return perf_event__synthesize_extra_kmaps(tool, process, machine);
+}
+
+int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+                                     struct perf_thread_map *threads,
+                                     perf_event__handler_t process,
+                                     struct machine *machine)
+{
+       union perf_event *event;
+       int i, err, size;
+
+       size  = sizeof(event->thread_map);
+       size += threads->nr * sizeof(event->thread_map.entries[0]);
+
+       event = zalloc(size);
+       if (!event)
+               return -ENOMEM;
+
+       event->header.type = PERF_RECORD_THREAD_MAP;
+       event->header.size = size;
+       event->thread_map.nr = threads->nr;
+
+       for (i = 0; i < threads->nr; i++) {
+               struct perf_record_thread_map_entry *entry = &event->thread_map.entries[i];
+               char *comm = perf_thread_map__comm(threads, i);
+
+               if (!comm)
+                       comm = (char *) "";
+
+               entry->pid = perf_thread_map__pid(threads, i);
+               strncpy((char *) &entry->comm, comm, sizeof(entry->comm));
+       }
+
+       err = process(tool, event, NULL, machine);
+
+       free(event);
+       return err;
+}
+
+static void synthesize_cpus(struct cpu_map_entries *cpus,
+                           struct perf_cpu_map *map)
+{
+       int i;
+
+       cpus->nr = map->nr;
+
+       for (i = 0; i < map->nr; i++)
+               cpus->cpu[i] = map->map[i];
+}
+
+static void synthesize_mask(struct perf_record_record_cpu_map *mask,
+                           struct perf_cpu_map *map, int max)
+{
+       int i;
+
+       mask->nr = BITS_TO_LONGS(max);
+       mask->long_size = sizeof(long);
+
+       for (i = 0; i < map->nr; i++)
+               set_bit(map->map[i], mask->mask);
+}
+
+static size_t cpus_size(struct perf_cpu_map *map)
+{
+       return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16);
+}
+
+static size_t mask_size(struct perf_cpu_map *map, int *max)
+{
+       int i;
+
+       *max = 0;
+
+       for (i = 0; i < map->nr; i++) {
+               /* bit possition of the cpu is + 1 */
+               int bit = map->map[i] + 1;
+
+               if (bit > *max)
+                       *max = bit;
+       }
+
+       return sizeof(struct perf_record_record_cpu_map) + BITS_TO_LONGS(*max) * sizeof(long);
+}
+
+void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int *max)
+{
+       size_t size_cpus, size_mask;
+       bool is_dummy = perf_cpu_map__empty(map);
+
+       /*
+        * Both array and mask data have variable size based
+        * on the number of cpus and their actual values.
+        * The size of the 'struct perf_record_cpu_map_data' is:
+        *
+        *   array = size of 'struct cpu_map_entries' +
+        *           number of cpus * sizeof(u64)
+        *
+        *   mask  = size of 'struct perf_record_record_cpu_map' +
+        *           maximum cpu bit converted to size of longs
+        *
+        * and finaly + the size of 'struct perf_record_cpu_map_data'.
+        */
+       size_cpus = cpus_size(map);
+       size_mask = mask_size(map, max);
+
+       if (is_dummy || (size_cpus < size_mask)) {
+               *size += size_cpus;
+               *type  = PERF_CPU_MAP__CPUS;
+       } else {
+               *size += size_mask;
+               *type  = PERF_CPU_MAP__MASK;
+       }
+
+       *size += sizeof(struct perf_record_cpu_map_data);
+       *size = PERF_ALIGN(*size, sizeof(u64));
+       return zalloc(*size);
+}
+
+void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf_cpu_map *map,
+                             u16 type, int max)
+{
+       data->type = type;
+
+       switch (type) {
+       case PERF_CPU_MAP__CPUS:
+               synthesize_cpus((struct cpu_map_entries *) data->data, map);
+               break;
+       case PERF_CPU_MAP__MASK:
+               synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max);
+       default:
+               break;
+       };
+}
+
+static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map)
+{
+       size_t size = sizeof(struct perf_record_cpu_map);
+       struct perf_record_cpu_map *event;
+       int max;
+       u16 type;
+
+       event = cpu_map_data__alloc(map, &size, &type, &max);
+       if (!event)
+               return NULL;
+
+       event->header.type = PERF_RECORD_CPU_MAP;
+       event->header.size = size;
+       event->data.type   = type;
+
+       cpu_map_data__synthesize(&event->data, map, type, max);
+       return event;
+}
+
+int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+                                  struct perf_cpu_map *map,
+                                  perf_event__handler_t process,
+                                  struct machine *machine)
+{
+       struct perf_record_cpu_map *event;
+       int err;
+
+       event = cpu_map_event__new(map);
+       if (!event)
+               return -ENOMEM;
+
+       err = process(tool, (union perf_event *) event, NULL, machine);
+
+       free(event);
+       return err;
+}
+
+int perf_event__synthesize_stat_config(struct perf_tool *tool,
+                                      struct perf_stat_config *config,
+                                      perf_event__handler_t process,
+                                      struct machine *machine)
+{
+       struct perf_record_stat_config *event;
+       int size, i = 0, err;
+
+       size  = sizeof(*event);
+       size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0]));
+
+       event = zalloc(size);
+       if (!event)
+               return -ENOMEM;
+
+       event->header.type = PERF_RECORD_STAT_CONFIG;
+       event->header.size = size;
+       event->nr          = PERF_STAT_CONFIG_TERM__MAX;
+
+#define ADD(__term, __val)                                     \
+       event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term;   \
+       event->data[i].val = __val;                             \
+       i++;
+
+       ADD(AGGR_MODE,  config->aggr_mode)
+       ADD(INTERVAL,   config->interval)
+       ADD(SCALE,      config->scale)
+
+       WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX,
+                 "stat config terms unbalanced\n");
+#undef ADD
+
+       err = process(tool, (union perf_event *) event, NULL, machine);
+
+       free(event);
+       return err;
+}
+
+int perf_event__synthesize_stat(struct perf_tool *tool,
+                               u32 cpu, u32 thread, u64 id,
+                               struct perf_counts_values *count,
+                               perf_event__handler_t process,
+                               struct machine *machine)
+{
+       struct perf_record_stat event;
+
+       event.header.type = PERF_RECORD_STAT;
+       event.header.size = sizeof(event);
+       event.header.misc = 0;
+
+       event.id        = id;
+       event.cpu       = cpu;
+       event.thread    = thread;
+       event.val       = count->val;
+       event.ena       = count->ena;
+       event.run       = count->run;
+
+       return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+int perf_event__synthesize_stat_round(struct perf_tool *tool,
+                                     u64 evtime, u64 type,
+                                     perf_event__handler_t process,
+                                     struct machine *machine)
+{
+       struct perf_record_stat_round event;
+
+       event.header.type = PERF_RECORD_STAT_ROUND;
+       event.header.size = sizeof(event);
+       event.header.misc = 0;
+
+       event.time = evtime;
+       event.type = type;
+
+       return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format)
+{
+       size_t sz, result = sizeof(struct perf_record_sample);
+
+       if (type & PERF_SAMPLE_IDENTIFIER)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_IP)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_TID)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_TIME)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_ADDR)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_ID)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_STREAM_ID)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_CPU)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_PERIOD)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_READ) {
+               result += sizeof(u64);
+               if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+                       result += sizeof(u64);
+               if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+                       result += sizeof(u64);
+               /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+               if (read_format & PERF_FORMAT_GROUP) {
+                       sz = sample->read.group.nr *
+                            sizeof(struct sample_read_value);
+                       result += sz;
+               } else {
+                       result += sizeof(u64);
+               }
+       }
+
+       if (type & PERF_SAMPLE_CALLCHAIN) {
+               sz = (sample->callchain->nr + 1) * sizeof(u64);
+               result += sz;
+       }
+
+       if (type & PERF_SAMPLE_RAW) {
+               result += sizeof(u32);
+               result += sample->raw_size;
+       }
+
+       if (type & PERF_SAMPLE_BRANCH_STACK) {
+               sz = sample->branch_stack->nr * sizeof(struct branch_entry);
+               sz += sizeof(u64);
+               result += sz;
+       }
+
+       if (type & PERF_SAMPLE_REGS_USER) {
+               if (sample->user_regs.abi) {
+                       result += sizeof(u64);
+                       sz = hweight64(sample->user_regs.mask) * sizeof(u64);
+                       result += sz;
+               } else {
+                       result += sizeof(u64);
+               }
+       }
+
+       if (type & PERF_SAMPLE_STACK_USER) {
+               sz = sample->user_stack.size;
+               result += sizeof(u64);
+               if (sz) {
+                       result += sz;
+                       result += sizeof(u64);
+               }
+       }
+
+       if (type & PERF_SAMPLE_WEIGHT)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_DATA_SRC)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_TRANSACTION)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_REGS_INTR) {
+               if (sample->intr_regs.abi) {
+                       result += sizeof(u64);
+                       sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
+                       result += sz;
+               } else {
+                       result += sizeof(u64);
+               }
+       }
+
+       if (type & PERF_SAMPLE_PHYS_ADDR)
+               result += sizeof(u64);
+
+       return result;
+}
+
+int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format,
+                                 const struct perf_sample *sample)
+{
+       __u64 *array;
+       size_t sz;
+       /*
+        * used for cross-endian analysis. See git commit 65014ab3
+        * for why this goofiness is needed.
+        */
+       union u64_swap u;
+
+       array = event->sample.array;
+
+       if (type & PERF_SAMPLE_IDENTIFIER) {
+               *array = sample->id;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_IP) {
+               *array = sample->ip;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_TID) {
+               u.val32[0] = sample->pid;
+               u.val32[1] = sample->tid;
+               *array = u.val64;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_TIME) {
+               *array = sample->time;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_ADDR) {
+               *array = sample->addr;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_ID) {
+               *array = sample->id;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_STREAM_ID) {
+               *array = sample->stream_id;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_CPU) {
+               u.val32[0] = sample->cpu;
+               u.val32[1] = 0;
+               *array = u.val64;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_PERIOD) {
+               *array = sample->period;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_READ) {
+               if (read_format & PERF_FORMAT_GROUP)
+                       *array = sample->read.group.nr;
+               else
+                       *array = sample->read.one.value;
+               array++;
+
+               if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+                       *array = sample->read.time_enabled;
+                       array++;
+               }
+
+               if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+                       *array = sample->read.time_running;
+                       array++;
+               }
+
+               /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+               if (read_format & PERF_FORMAT_GROUP) {
+                       sz = sample->read.group.nr *
+                            sizeof(struct sample_read_value);
+                       memcpy(array, sample->read.group.values, sz);
+                       array = (void *)array + sz;
+               } else {
+                       *array = sample->read.one.id;
+                       array++;
+               }
+       }
+
+       if (type & PERF_SAMPLE_CALLCHAIN) {
+               sz = (sample->callchain->nr + 1) * sizeof(u64);
+               memcpy(array, sample->callchain, sz);
+               array = (void *)array + sz;
+       }
+
+       if (type & PERF_SAMPLE_RAW) {
+               u.val32[0] = sample->raw_size;
+               *array = u.val64;
+               array = (void *)array + sizeof(u32);
+
+               memcpy(array, sample->raw_data, sample->raw_size);
+               array = (void *)array + sample->raw_size;
+       }
+
+       if (type & PERF_SAMPLE_BRANCH_STACK) {
+               sz = sample->branch_stack->nr * sizeof(struct branch_entry);
+               sz += sizeof(u64);
+               memcpy(array, sample->branch_stack, sz);
+               array = (void *)array + sz;
+       }
+
+       if (type & PERF_SAMPLE_REGS_USER) {
+               if (sample->user_regs.abi) {
+                       *array++ = sample->user_regs.abi;
+                       sz = hweight64(sample->user_regs.mask) * sizeof(u64);
+                       memcpy(array, sample->user_regs.regs, sz);
+                       array = (void *)array + sz;
+               } else {
+                       *array++ = 0;
+               }
+       }
+
+       if (type & PERF_SAMPLE_STACK_USER) {
+               sz = sample->user_stack.size;
+               *array++ = sz;
+               if (sz) {
+                       memcpy(array, sample->user_stack.data, sz);
+                       array = (void *)array + sz;
+                       *array++ = sz;
+               }
+       }
+
+       if (type & PERF_SAMPLE_WEIGHT) {
+               *array = sample->weight;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_DATA_SRC) {
+               *array = sample->data_src;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_TRANSACTION) {
+               *array = sample->transaction;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_REGS_INTR) {
+               if (sample->intr_regs.abi) {
+                       *array++ = sample->intr_regs.abi;
+                       sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
+                       memcpy(array, sample->intr_regs.regs, sz);
+                       array = (void *)array + sz;
+               } else {
+                       *array++ = 0;
+               }
+       }
+
+       if (type & PERF_SAMPLE_PHYS_ADDR) {
+               *array = sample->phys_addr;
+               array++;
+       }
+
+       return 0;
+}
+
+int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process,
+                                   struct evlist *evlist, struct machine *machine)
+{
+       union perf_event *ev;
+       struct evsel *evsel;
+       size_t nr = 0, i = 0, sz, max_nr, n;
+       int err;
+
+       pr_debug2("Synthesizing id index\n");
+
+       max_nr = (UINT16_MAX - sizeof(struct perf_record_id_index)) /
+                sizeof(struct id_index_entry);
+
+       evlist__for_each_entry(evlist, evsel)
+               nr += evsel->core.ids;
+
+       n = nr > max_nr ? max_nr : nr;
+       sz = sizeof(struct perf_record_id_index) + n * sizeof(struct id_index_entry);
+       ev = zalloc(sz);
+       if (!ev)
+               return -ENOMEM;
+
+       ev->id_index.header.type = PERF_RECORD_ID_INDEX;
+       ev->id_index.header.size = sz;
+       ev->id_index.nr = n;
+
+       evlist__for_each_entry(evlist, evsel) {
+               u32 j;
+
+               for (j = 0; j < evsel->core.ids; j++) {
+                       struct id_index_entry *e;
+                       struct perf_sample_id *sid;
+
+                       if (i >= n) {
+                               err = process(tool, ev, NULL, machine);
+                               if (err)
+                                       goto out_err;
+                               nr -= n;
+                               i = 0;
+                       }
+
+                       e = &ev->id_index.entries[i++];
+
+                       e->id = evsel->core.id[j];
+
+                       sid = perf_evlist__id2sid(evlist, e->id);
+                       if (!sid) {
+                               free(ev);
+                               return -ENOENT;
+                       }
+
+                       e->idx = sid->idx;
+                       e->cpu = sid->cpu;
+                       e->tid = sid->tid;
+               }
+       }
+
+       sz = sizeof(struct perf_record_id_index) + nr * sizeof(struct id_index_entry);
+       ev->id_index.header.size = sz;
+       ev->id_index.nr = nr;
+
+       err = process(tool, ev, NULL, machine);
+out_err:
+       free(ev);
+
+       return err;
+}
+
+int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+                                 struct target *target, struct perf_thread_map *threads,
+                                 perf_event__handler_t process, bool data_mmap,
+                                 unsigned int nr_threads_synthesize)
+{
+       if (target__has_task(target))
+               return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap);
+       else if (target__has_cpu(target))
+               return perf_event__synthesize_threads(tool, process,
+                                                     machine, data_mmap,
+                                                     nr_threads_synthesize);
+       /* command specified */
+       return 0;
+}
+
+int machine__synthesize_threads(struct machine *machine, struct target *target,
+                               struct perf_thread_map *threads, bool data_mmap,
+                               unsigned int nr_threads_synthesize)
+{
+       return __machine__synthesize_threads(machine, NULL, target, threads,
+                                            perf_event__process, data_mmap,
+                                            nr_threads_synthesize);
+}
+
+static struct perf_record_event_update *event_update_event__new(size_t size, u64 type, u64 id)
+{
+       struct perf_record_event_update *ev;
+
+       size += sizeof(*ev);
+       size  = PERF_ALIGN(size, sizeof(u64));
+
+       ev = zalloc(size);
+       if (ev) {
+               ev->header.type = PERF_RECORD_EVENT_UPDATE;
+               ev->header.size = (u16)size;
+               ev->type        = type;
+               ev->id          = id;
+       }
+       return ev;
+}
+
+int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel,
+                                            perf_event__handler_t process)
+{
+       size_t size = strlen(evsel->unit);
+       struct perf_record_event_update *ev;
+       int err;
+
+       ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->core.id[0]);
+       if (ev == NULL)
+               return -ENOMEM;
+
+       strlcpy(ev->data, evsel->unit, size + 1);
+       err = process(tool, (union perf_event *)ev, NULL, NULL);
+       free(ev);
+       return err;
+}
+
+int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel,
+                                             perf_event__handler_t process)
+{
+       struct perf_record_event_update *ev;
+       struct perf_record_event_update_scale *ev_data;
+       int err;
+
+       ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->core.id[0]);
+       if (ev == NULL)
+               return -ENOMEM;
+
+       ev_data = (struct perf_record_event_update_scale *)ev->data;
+       ev_data->scale = evsel->scale;
+       err = process(tool, (union perf_event *)ev, NULL, NULL);
+       free(ev);
+       return err;
+}
+
+int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel,
+                                            perf_event__handler_t process)
+{
+       struct perf_record_event_update *ev;
+       size_t len = strlen(evsel->name);
+       int err;
+
+       ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->core.id[0]);
+       if (ev == NULL)
+               return -ENOMEM;
+
+       strlcpy(ev->data, evsel->name, len + 1);
+       err = process(tool, (union perf_event *)ev, NULL, NULL);
+       free(ev);
+       return err;
+}
+
+int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel,
+                                            perf_event__handler_t process)
+{
+       size_t size = sizeof(struct perf_record_event_update);
+       struct perf_record_event_update *ev;
+       int max, err;
+       u16 type;
+
+       if (!evsel->core.own_cpus)
+               return 0;
+
+       ev = cpu_map_data__alloc(evsel->core.own_cpus, &size, &type, &max);
+       if (!ev)
+               return -ENOMEM;
+
+       ev->header.type = PERF_RECORD_EVENT_UPDATE;
+       ev->header.size = (u16)size;
+       ev->type        = PERF_EVENT_UPDATE__CPUS;
+       ev->id          = evsel->core.id[0];
+
+       cpu_map_data__synthesize((struct perf_record_cpu_map_data *)ev->data,
+                                evsel->core.own_cpus, type, max);
+
+       err = process(tool, (union perf_event *)ev, NULL, NULL);
+       free(ev);
+       return err;
+}
+
+int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist,
+                                perf_event__handler_t process)
+{
+       struct evsel *evsel;
+       int err = 0;
+
+       evlist__for_each_entry(evlist, evsel) {
+               err = perf_event__synthesize_attr(tool, &evsel->core.attr, evsel->core.ids,
+                                                 evsel->core.id, process);
+               if (err) {
+                       pr_debug("failed to create perf header attribute\n");
+                       return err;
+               }
+       }
+
+       return err;
+}
+
+static bool has_unit(struct evsel *evsel)
+{
+       return evsel->unit && *evsel->unit;
+}
+
+static bool has_scale(struct evsel *evsel)
+{
+       return evsel->scale != 1;
+}
+
+int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list,
+                                     perf_event__handler_t process, bool is_pipe)
+{
+       struct evsel *evsel;
+       int err;
+
+       /*
+        * Synthesize other events stuff not carried within
+        * attr event - unit, scale, name
+        */
+       evlist__for_each_entry(evsel_list, evsel) {
+               if (!evsel->supported)
+                       continue;
+
+               /*
+                * Synthesize unit and scale only if it's defined.
+                */
+               if (has_unit(evsel)) {
+                       err = perf_event__synthesize_event_update_unit(tool, evsel, process);
+                       if (err < 0) {
+                               pr_err("Couldn't synthesize evsel unit.\n");
+                               return err;
+                       }
+               }
+
+               if (has_scale(evsel)) {
+                       err = perf_event__synthesize_event_update_scale(tool, evsel, process);
+                       if (err < 0) {
+                               pr_err("Couldn't synthesize evsel evsel.\n");
+                               return err;
+                       }
+               }
+
+               if (evsel->core.own_cpus) {
+                       err = perf_event__synthesize_event_update_cpus(tool, evsel, process);
+                       if (err < 0) {
+                               pr_err("Couldn't synthesize evsel cpus.\n");
+                               return err;
+                       }
+               }
+
+               /*
+                * Name is needed only for pipe output,
+                * perf.data carries event names.
+                */
+               if (is_pipe) {
+                       err = perf_event__synthesize_event_update_name(tool, evsel, process);
+                       if (err < 0) {
+                               pr_err("Couldn't synthesize evsel name.\n");
+                               return err;
+                       }
+               }
+       }
+       return 0;
+}
+
+int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr,
+                               u32 ids, u64 *id, perf_event__handler_t process)
+{
+       union perf_event *ev;
+       size_t size;
+       int err;
+
+       size = sizeof(struct perf_event_attr);
+       size = PERF_ALIGN(size, sizeof(u64));
+       size += sizeof(struct perf_event_header);
+       size += ids * sizeof(u64);
+
+       ev = zalloc(size);
+
+       if (ev == NULL)
+               return -ENOMEM;
+
+       ev->attr.attr = *attr;
+       memcpy(ev->attr.id, id, ids * sizeof(u64));
+
+       ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
+       ev->attr.header.size = (u16)size;
+
+       if (ev->attr.header.size == size)
+               err = process(tool, ev, NULL, NULL);
+       else
+               err = -E2BIG;
+
+       free(ev);
+
+       return err;
+}
+
+int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist,
+                                       perf_event__handler_t process)
+{
+       union perf_event ev;
+       struct tracing_data *tdata;
+       ssize_t size = 0, aligned_size = 0, padding;
+       struct feat_fd ff;
+
+       /*
+        * We are going to store the size of the data followed
+        * by the data contents. Since the fd descriptor is a pipe,
+        * we cannot seek back to store the size of the data once
+        * we know it. Instead we:
+        *
+        * - write the tracing data to the temp file
+        * - get/write the data size to pipe
+        * - write the tracing data from the temp file
+        *   to the pipe
+        */
+       tdata = tracing_data_get(&evlist->core.entries, fd, true);
+       if (!tdata)
+               return -1;
+
+       memset(&ev, 0, sizeof(ev));
+
+       ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
+       size = tdata->size;
+       aligned_size = PERF_ALIGN(size, sizeof(u64));
+       padding = aligned_size - size;
+       ev.tracing_data.header.size = sizeof(ev.tracing_data);
+       ev.tracing_data.size = aligned_size;
+
+       process(tool, &ev, NULL, NULL);
+
+       /*
+        * The put function will copy all the tracing data
+        * stored in temp file to the pipe.
+        */
+       tracing_data_put(tdata);
+
+       ff = (struct feat_fd){ .fd = fd };
+       if (write_padded(&ff, NULL, 0, padding))
+               return -1;
+
+       return aligned_size;
+}
+
+int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc,
+                                   perf_event__handler_t process, struct machine *machine)
+{
+       union perf_event ev;
+       size_t len;
+
+       if (!pos->hit)
+               return 0;
+
+       memset(&ev, 0, sizeof(ev));
+
+       len = pos->long_name_len + 1;
+       len = PERF_ALIGN(len, NAME_ALIGN);
+       memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id));
+       ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
+       ev.build_id.header.misc = misc;
+       ev.build_id.pid = machine->pid;
+       ev.build_id.header.size = sizeof(ev.build_id) + len;
+       memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
+
+       return process(tool, &ev, NULL, machine);
+}
+
+int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool,
+                                      struct evlist *evlist, perf_event__handler_t process, bool attrs)
+{
+       int err;
+
+       if (attrs) {
+               err = perf_event__synthesize_attrs(tool, evlist, process);
+               if (err < 0) {
+                       pr_err("Couldn't synthesize attrs.\n");
+                       return err;
+               }
+       }
+
+       err = perf_event__synthesize_extra_attr(tool, evlist, process, attrs);
+       err = perf_event__synthesize_thread_map2(tool, evlist->core.threads, process, NULL);
+       if (err < 0) {
+               pr_err("Couldn't synthesize thread map.\n");
+               return err;
+       }
+
+       err = perf_event__synthesize_cpu_map(tool, evlist->core.cpus, process, NULL);
+       if (err < 0) {
+               pr_err("Couldn't synthesize thread map.\n");
+               return err;
+       }
+
+       err = perf_event__synthesize_stat_config(tool, config, process, NULL);
+       if (err < 0) {
+               pr_err("Couldn't synthesize config.\n");
+               return err;
+       }
+
+       return 0;
+}
+
+int __weak perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
+                                      struct perf_tool *tool __maybe_unused,
+                                      perf_event__handler_t process __maybe_unused,
+                                      struct machine *machine __maybe_unused)
+{
+       return 0;
+}
+
+extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE];
+
+int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session,
+                                   struct evlist *evlist, perf_event__handler_t process)
+{
+       struct perf_header *header = &session->header;
+       struct perf_record_header_feature *fe;
+       struct feat_fd ff;
+       size_t sz, sz_hdr;
+       int feat, ret;
+
+       sz_hdr = sizeof(fe->header);
+       sz = sizeof(union perf_event);
+       /* get a nice alignment */
+       sz = PERF_ALIGN(sz, page_size);
+
+       memset(&ff, 0, sizeof(ff));
+
+       ff.buf = malloc(sz);
+       if (!ff.buf)
+               return -ENOMEM;
+
+       ff.size = sz - sz_hdr;
+       ff.ph = &session->header;
+
+       for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
+               if (!feat_ops[feat].synthesize) {
+                       pr_debug("No record header feature for header :%d\n", feat);
+                       continue;
+               }
+
+               ff.offset = sizeof(*fe);
+
+               ret = feat_ops[feat].write(&ff, evlist);
+               if (ret || ff.offset <= (ssize_t)sizeof(*fe)) {
+                       pr_debug("Error writing feature\n");
+                       continue;
+               }
+               /* ff.buf may have changed due to realloc in do_write() */
+               fe = ff.buf;
+               memset(fe, 0, sizeof(*fe));
+
+               fe->feat_id = feat;
+               fe->header.type = PERF_RECORD_HEADER_FEATURE;
+               fe->header.size = ff.offset;
+
+               ret = process(tool, ff.buf, NULL, NULL);
+               if (ret) {
+                       free(ff.buf);
+                       return ret;
+               }
+       }
+
+       /* Send HEADER_LAST_FEATURE mark. */
+       fe = ff.buf;
+       fe->feat_id     = HEADER_LAST_FEATURE;
+       fe->header.type = PERF_RECORD_HEADER_FEATURE;
+       fe->header.size = sizeof(*fe);
+
+       ret = process(tool, ff.buf, NULL, NULL);
+
+       free(ff.buf);
+       return ret;
+}
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
new file mode 100644 (file)
index 0000000..baead0c
--- /dev/null
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYNTHETIC_EVENTS_H
+#define __PERF_SYNTHETIC_EVENTS_H
+
+#include <stdbool.h>
+#include <sys/types.h> // pid_t
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+struct auxtrace_record;
+struct dso;
+struct evlist;
+struct evsel;
+struct machine;
+struct perf_counts_values;
+struct perf_cpu_map;
+struct perf_event_attr;
+struct perf_event_mmap_page;
+struct perf_sample;
+struct perf_session;
+struct perf_stat_config;
+struct perf_thread_map;
+struct perf_tool;
+struct record_opts;
+struct target;
+
+union perf_event;
+
+typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event,
+                                    struct perf_sample *sample, struct machine *machine);
+
+int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process);
+int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_cpu_map(struct perf_tool *tool, struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe);
+int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine);
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data);
+int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample);
+int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
+int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool mmap_data);
+int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, unsigned int nr_threads_synthesize);
+int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine);
+
+int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process);
+
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format);
+
+int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+                                 struct target *target, struct perf_thread_map *threads,
+                                 perf_event__handler_t process, bool data_mmap,
+                                 unsigned int nr_threads_synthesize);
+int machine__synthesize_threads(struct machine *machine, struct target *target,
+                               struct perf_thread_map *threads, bool data_mmap,
+                               unsigned int nr_threads_synthesize);
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct perf_tool *tool,
+                                        struct perf_session *session, perf_event__handler_t process);
+
+#else // HAVE_AUXTRACE_SUPPORT
+
+#include <errno.h>
+
+static inline int
+perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
+                                    struct perf_tool *tool __maybe_unused,
+                                    struct perf_session *session __maybe_unused,
+                                    perf_event__handler_t process __maybe_unused)
+{
+       return -EINVAL;
+}
+#endif // HAVE_AUXTRACE_SUPPORT
+
+#ifdef HAVE_LIBBPF_SUPPORT
+int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process,
+                                     struct machine *machine, struct record_opts *opts);
+#else // HAVE_LIBBPF_SUPPORT
+static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused,
+                                                   perf_event__handler_t process __maybe_unused,
+                                                   struct machine *machine __maybe_unused,
+                                                   struct record_opts *opts __maybe_unused)
+{
+       return 0;
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+#endif // __PERF_SYNTHETIC_EVENTS_H
index 565f7ae..a3db13d 100644 (file)
@@ -6,8 +6,6 @@
  */
 
 #include "target.h"
-#include "util.h"
-#include "debug.h"
 
 #include <pwd.h>
 #include <stdio.h>
index 51fb574..3dce2de 100644 (file)
@@ -5,7 +5,6 @@
  * Refactored from builtin-top.c, see that files for further copyright notes.
  */
 
-#include "cpumap.h"
 #include "event.h"
 #include "evlist.h"
 #include "evsel.h"
@@ -72,7 +71,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
        }
 
        if (top->evlist->core.nr_entries == 1) {
-               struct evsel *first = perf_evlist__first(top->evlist);
+               struct evsel *first = evlist__first(top->evlist);
                ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ",
                                (uint64_t)first->core.attr.sample_period,
                                opts->freq ? "Hz" : "");
index d63d542..086e98f 100644 (file)
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2008,2009, Steven Rostedt <srostedt@redhat.com>
  */
-#include "util.h"
 #include <dirent.h>
 #include <mntent.h>
 #include <stdio.h>
@@ -19,6 +18,7 @@
 #include <linux/list.h>
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
+#include <internal/lib.h> // page_size
 
 #include "trace-event.h"
 #include <api/fs/tracing_path.h>
index b6c0db0..8593d3c 100644 (file)
@@ -15,7 +15,6 @@
 #include <unistd.h>
 #include <errno.h>
 
-#include "util.h"
 #include "trace-event.h"
 #include "debug.h"
 
index 01b9d89..b3ee651 100644 (file)
@@ -14,7 +14,6 @@
 #include <api/fs/fs.h>
 #include "trace-event.h"
 #include "machine.h"
-#include "util.h"
 
 /*
  * global trace_event object used by trace_event__tp_format
index e0c3af3..3c5a632 100644 (file)
@@ -4,13 +4,12 @@
 
 #include <linux/types.h>
 
-#include "event.h"
-
 struct perf_tsc_conversion {
        u16 time_shift;
        u32 time_mult;
        u64 time_zero;
 };
+
 struct perf_event_mmap_page;
 
 int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
@@ -20,13 +19,4 @@ u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
 u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
 u64 rdtsc(void);
 
-struct perf_event_mmap_page;
-struct perf_tool;
-struct machine;
-
-int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
-                               struct perf_tool *tool,
-                               perf_event__handler_t process,
-                               struct machine *machine);
-
-#endif
+#endif // __PERF_TSC_H
index 9ece188..15f6e46 100644 (file)
@@ -17,7 +17,6 @@
 #include "event.h"
 #include "perf_regs.h"
 #include "callchain.h"
-#include "util.h"
 
 static char *debuginfo_path;
 
index ebdbb05..1800887 100644 (file)
@@ -37,7 +37,6 @@
 #include "unwind.h"
 #include "map.h"
 #include "symbol.h"
-#include "util.h"
 #include "debug.h"
 #include "asm/bug.h"
 #include "dso.h"
index 3949a60..196438e 100644 (file)
@@ -8,7 +8,6 @@
  * Copyright (C) Linus Torvalds, 2005
  */
 #include "util.h"
-#include "debug.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <linux/compiler.h>
index 32322a2..ae56c76 100644 (file)
@@ -2,9 +2,7 @@
 #include "util.h"
 #include "debug.h"
 #include "event.h"
-#include "namespaces.h"
 #include <api/fs/fs.h>
-#include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/utsname.h>
 #include <dirent.h>
@@ -41,8 +39,6 @@ void perf_set_multithreaded(void)
        perf_singlethreaded = false;
 }
 
-unsigned int page_size;
-
 int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
 int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK;
 
@@ -158,8 +154,10 @@ static int rm_rf_depth_pat(const char *path, int depth, const char **pat)
                if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
                        continue;
 
-               if (!match_pat(d->d_name, pat))
-                       return -2;
+               if (!match_pat(d->d_name, pat)) {
+                       ret =  -2;
+                       break;
+               }
 
                scnprintf(namebuf, sizeof(namebuf), "%s/%s",
                          path, d->d_name);
@@ -234,138 +232,6 @@ out:
        return list;
 }
 
-static int slow_copyfile(const char *from, const char *to, struct nsinfo *nsi)
-{
-       int err = -1;
-       char *line = NULL;
-       size_t n;
-       FILE *from_fp, *to_fp;
-       struct nscookie nsc;
-
-       nsinfo__mountns_enter(nsi, &nsc);
-       from_fp = fopen(from, "r");
-       nsinfo__mountns_exit(&nsc);
-       if (from_fp == NULL)
-               goto out;
-
-       to_fp = fopen(to, "w");
-       if (to_fp == NULL)
-               goto out_fclose_from;
-
-       while (getline(&line, &n, from_fp) > 0)
-               if (fputs(line, to_fp) == EOF)
-                       goto out_fclose_to;
-       err = 0;
-out_fclose_to:
-       fclose(to_fp);
-       free(line);
-out_fclose_from:
-       fclose(from_fp);
-out:
-       return err;
-}
-
-int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
-{
-       void *ptr;
-       loff_t pgoff;
-
-       pgoff = off_in & ~(page_size - 1);
-       off_in -= pgoff;
-
-       ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff);
-       if (ptr == MAP_FAILED)
-               return -1;
-
-       while (size) {
-               ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out);
-               if (ret < 0 && errno == EINTR)
-                       continue;
-               if (ret <= 0)
-                       break;
-
-               size -= ret;
-               off_in += ret;
-               off_out += ret;
-       }
-       munmap(ptr, off_in + size);
-
-       return size ? -1 : 0;
-}
-
-static int copyfile_mode_ns(const char *from, const char *to, mode_t mode,
-                           struct nsinfo *nsi)
-{
-       int fromfd, tofd;
-       struct stat st;
-       int err;
-       char *tmp = NULL, *ptr = NULL;
-       struct nscookie nsc;
-
-       nsinfo__mountns_enter(nsi, &nsc);
-       err = stat(from, &st);
-       nsinfo__mountns_exit(&nsc);
-       if (err)
-               goto out;
-       err = -1;
-
-       /* extra 'x' at the end is to reserve space for '.' */
-       if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) {
-               tmp = NULL;
-               goto out;
-       }
-       ptr = strrchr(tmp, '/');
-       if (!ptr)
-               goto out;
-       ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1);
-       *ptr = '.';
-
-       tofd = mkstemp(tmp);
-       if (tofd < 0)
-               goto out;
-
-       if (fchmod(tofd, mode))
-               goto out_close_to;
-
-       if (st.st_size == 0) { /* /proc? do it slowly... */
-               err = slow_copyfile(from, tmp, nsi);
-               goto out_close_to;
-       }
-
-       nsinfo__mountns_enter(nsi, &nsc);
-       fromfd = open(from, O_RDONLY);
-       nsinfo__mountns_exit(&nsc);
-       if (fromfd < 0)
-               goto out_close_to;
-
-       err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size);
-
-       close(fromfd);
-out_close_to:
-       close(tofd);
-       if (!err)
-               err = link(tmp, to);
-       unlink(tmp);
-out:
-       free(tmp);
-       return err;
-}
-
-int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi)
-{
-       return copyfile_mode_ns(from, to, 0755, nsi);
-}
-
-int copyfile_mode(const char *from, const char *to, mode_t mode)
-{
-       return copyfile_mode_ns(from, to, mode, NULL);
-}
-
-int copyfile(const char *from, const char *to)
-{
-       return copyfile_mode(from, to, 0755);
-}
-
 size_t hex_width(u64 v)
 {
        size_t n = 1;
index 45a5c6f..9969b8b 100644 (file)
 #include <stddef.h>
 #include <linux/compiler.h>
 #include <sys/types.h>
-#include <internal/lib.h>
 
 /* General helper functions */
 void usage(const char *err) __noreturn;
 void die(const char *err, ...) __noreturn __printf(1, 2);
 
 struct dirent;
-struct nsinfo;
 struct strlist;
 
 int mkdir_p(char *path, mode_t mode);
@@ -26,15 +24,9 @@ int rm_rf(const char *path);
 int rm_rf_perf_data(const char *path);
 struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *));
 bool lsdir_no_dot_filter(const char *name, struct dirent *d);
-int copyfile(const char *from, const char *to);
-int copyfile_mode(const char *from, const char *to, mode_t mode);
-int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi);
-int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size);
 
 size_t hex_width(u64 v);
 
-extern unsigned int page_size;
-
 int sysctl__max_stack(void);
 
 int fetch_kernel_version(unsigned int *puint,
index e5e6599..ba4b439 100644 (file)
@@ -11,7 +11,7 @@
 
 #include "vdso.h"
 #include "dso.h"
-#include "util.h"
+#include <internal/lib.h>
 #include "map.h"
 #include "symbol.h"
 #include "machine.h"
index 59d456f..78d2297 100644 (file)
@@ -7,11 +7,9 @@
 #include <sys/mman.h>
 #include <zlib.h>
 #include <linux/compiler.h>
+#include <internal/lib.h>
 
 #include "util/compress.h"
-#include "util/util.h"
-#include "util/debug.h"
-
 
 #define CHUNK_SIZE  16384
 
index 59753b3..2a9890c 100644 (file)
@@ -38,6 +38,7 @@ static int fact_avx = 0xFF;
 static unsigned long long fact_trl;
 static int out_format_json;
 static int cmd_help;
+static int force_online_offline;
 
 /* clos related */
 static int current_clos = -1;
@@ -138,14 +139,14 @@ int out_format_is_json(void)
 int get_physical_package_id(int cpu)
 {
        return parse_int_file(
-               1, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id",
+               0, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id",
                cpu);
 }
 
 int get_physical_core_id(int cpu)
 {
        return parse_int_file(
-               1, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
+               0, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
 }
 
 int get_physical_die_id(int cpu)
@@ -165,6 +166,26 @@ int get_topo_max_cpus(void)
        return topo_max_cpus;
 }
 
+static void set_cpu_online_offline(int cpu, int state)
+{
+       char buffer[128];
+       int fd;
+
+       snprintf(buffer, sizeof(buffer),
+                "/sys/devices/system/cpu/cpu%d/online", cpu);
+
+       fd = open(buffer, O_WRONLY);
+       if (fd < 0)
+               err(-1, "%s open failed", buffer);
+
+       if (state)
+               write(fd, "1\n", 2);
+       else
+               write(fd, "0\n", 2);
+
+       close(fd);
+}
+
 #define MAX_PACKAGE_COUNT 8
 #define MAX_DIE_PER_PACKAGE 2
 static void for_each_online_package_in_set(void (*callback)(int, void *, void *,
@@ -402,6 +423,9 @@ void set_cpu_mask_from_punit_coremask(int cpu, unsigned long long core_mask,
                        int j;
 
                        for (j = 0; j < topo_max_cpus; ++j) {
+                               if (!CPU_ISSET_S(j, present_cpumask_size, present_cpumask))
+                                       continue;
+
                                if (cpu_map[j].pkg_id == pkg_id &&
                                    cpu_map[j].die_id == die_id &&
                                    cpu_map[j].punit_cpu_core == i) {
@@ -484,7 +508,7 @@ int isst_send_mbox_command(unsigned int cpu, unsigned char command,
                int write = 0;
                int clos_id, core_id, ret = 0;
 
-               debug_printf("CLOS %d\n", cpu);
+               debug_printf("CPU %d\n", cpu);
 
                if (parameter & BIT(MBOX_CMD_WRITE_BIT)) {
                        value = req_data;
@@ -649,8 +673,8 @@ static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3,
        if (ret)
                perror("get_tdp_*");
        else
-               isst_display_result(cpu, outf, "perf-profile", (char *)arg3,
-                                   *(unsigned int *)arg4);
+               isst_ctdp_display_core_info(cpu, outf, arg3,
+                                           *(unsigned int *)arg4);
 }
 
 #define _get_tdp_level(desc, suffix, object, help)                                \
@@ -733,9 +757,34 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
        ret = isst_set_tdp_level(cpu, tdp_level);
        if (ret)
                perror("set_tdp_level_for_cpu");
-       else
+       else {
                isst_display_result(cpu, outf, "perf-profile", "set_tdp_level",
                                    ret);
+               if (force_online_offline) {
+                       struct isst_pkg_ctdp_level_info ctdp_level;
+                       int pkg_id = get_physical_package_id(cpu);
+                       int die_id = get_physical_die_id(cpu);
+
+                       fprintf(stderr, "Option is set to online/offline\n");
+                       ctdp_level.core_cpumask_size =
+                               alloc_cpu_set(&ctdp_level.core_cpumask);
+                       isst_get_coremask_info(cpu, tdp_level, &ctdp_level);
+                       if (ctdp_level.cpu_count) {
+                               int i, max_cpus = get_topo_max_cpus();
+                               for (i = 0; i < max_cpus; ++i) {
+                                       if (pkg_id != get_physical_package_id(i) || die_id != get_physical_die_id(i))
+                                               continue;
+                                       if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) {
+                                               fprintf(stderr, "online cpu %d\n", i);
+                                               set_cpu_online_offline(i, 1);
+                                       } else {
+                                               fprintf(stderr, "offline cpu %d\n", i);
+                                               set_cpu_online_offline(i, 0);
+                                       }
+                               }
+                       }
+               }
+       }
 }
 
 static void set_tdp_level(void)
@@ -744,6 +793,8 @@ static void set_tdp_level(void)
                fprintf(stderr, "Set Config TDP level\n");
                fprintf(stderr,
                        "\t Arguments: -l|--level : Specify tdp level\n");
+               fprintf(stderr,
+                       "\t Optional Arguments: -o | online : online/offline for the tdp level\n");
                exit(0);
        }
 
@@ -1082,6 +1133,40 @@ static void dump_clos_config(void)
        isst_ctdp_display_information_end(outf);
 }
 
+static void get_clos_info_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
+                                 void *arg4)
+{
+       int enable, ret, prio_type;
+
+       ret = isst_clos_get_clos_information(cpu, &enable, &prio_type);
+       if (ret)
+               perror("isst_clos_get_info");
+       else
+               isst_clos_display_clos_information(cpu, outf, enable, prio_type);
+}
+
+static void dump_clos_info(void)
+{
+       if (cmd_help) {
+               fprintf(stderr,
+                       "Print Intel Speed Select Technology core power information\n");
+               fprintf(stderr, "\tSpecify targeted cpu id with [--cpu|-c]\n");
+               exit(0);
+       }
+
+       if (!max_target_cpus) {
+               fprintf(stderr,
+                       "Invalid target cpu. Specify with [-c|--cpu]\n");
+               exit(0);
+       }
+
+       isst_ctdp_display_information_start(outf);
+       for_each_online_target_cpu_in_set(get_clos_info_for_cpu, NULL,
+                                         NULL, NULL, NULL);
+       isst_ctdp_display_information_end(outf);
+
+}
+
 static void set_clos_config_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
                                    void *arg4)
 {
@@ -1198,7 +1283,7 @@ static void get_clos_assoc_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
        if (ret)
                perror("isst_clos_get_assoc_status");
        else
-               isst_display_result(cpu, outf, "core-power", "get-assoc", clos);
+               isst_clos_display_assoc_information(cpu, outf, clos);
 }
 
 static void get_clos_assoc(void)
@@ -1208,13 +1293,17 @@ static void get_clos_assoc(void)
                fprintf(stderr, "\tSpecify targeted cpu id with [--cpu|-c]\n");
                exit(0);
        }
-       if (max_target_cpus)
-               for_each_online_target_cpu_in_set(get_clos_assoc_for_cpu, NULL,
-                                                 NULL, NULL, NULL);
-       else {
+
+       if (!max_target_cpus) {
                fprintf(stderr,
                        "Invalid target cpu. Specify with [-c|--cpu]\n");
+               exit(0);
        }
+
+       isst_ctdp_display_information_start(outf);
+       for_each_online_target_cpu_in_set(get_clos_assoc_for_cpu, NULL,
+                                         NULL, NULL, NULL);
+       isst_ctdp_display_information_end(outf);
 }
 
 static struct process_cmd_struct isst_cmds[] = {
@@ -1231,10 +1320,11 @@ static struct process_cmd_struct isst_cmds[] = {
        { "turbo-freq", "info", dump_fact_config },
        { "turbo-freq", "enable", set_fact_enable },
        { "turbo-freq", "disable", set_fact_disable },
-       { "core-power", "info", dump_clos_config },
+       { "core-power", "info", dump_clos_info },
        { "core-power", "enable", set_clos_enable },
        { "core-power", "disable", set_clos_disable },
        { "core-power", "config", set_clos_config },
+       { "core-power", "get-config", dump_clos_config },
        { "core-power", "assoc", set_clos_assoc },
        { "core-power", "get-assoc", get_clos_assoc },
        { NULL, NULL, NULL }
@@ -1316,6 +1406,7 @@ static void parse_cmd_args(int argc, int start, char **argv)
        static struct option long_options[] = {
                { "bucket", required_argument, 0, 'b' },
                { "level", required_argument, 0, 'l' },
+               { "online", required_argument, 0, 'o' },
                { "trl-type", required_argument, 0, 'r' },
                { "trl", required_argument, 0, 't' },
                { "help", no_argument, 0, 'h' },
@@ -1332,7 +1423,7 @@ static void parse_cmd_args(int argc, int start, char **argv)
        option_index = start;
 
        optind = start + 1;
-       while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:h",
+       while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:ho",
                                  long_options, &option_index)) != -1) {
                switch (opt) {
                case 'b':
@@ -1344,6 +1435,9 @@ static void parse_cmd_args(int argc, int start, char **argv)
                case 'l':
                        tdp_level = atoi(optarg);
                        break;
+               case 'o':
+                       force_online_offline = 1;
+                       break;
                case 't':
                        sscanf(optarg, "0x%llx", &fact_trl);
                        break;
@@ -1362,7 +1456,6 @@ static void parse_cmd_args(int argc, int start, char **argv)
                /* CLOS related */
                case 'c':
                        current_clos = atoi(optarg);
-                       printf("clos %d\n", current_clos);
                        break;
                case 'd':
                        clos_desired = atoi(optarg);
@@ -1433,6 +1526,7 @@ static void core_power_help(void)
        printf("\tenable\n");
        printf("\tdisable\n");
        printf("\tconfig\n");
+       printf("\tget-config\n");
        printf("\tassoc\n");
        printf("\tget-assoc\n");
 }
index 0bf341a..6dee533 100644 (file)
@@ -619,6 +619,31 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
        return 0;
 }
 
+int isst_clos_get_clos_information(int cpu, int *enable, int *type)
+{
+       unsigned int resp;
+       int ret;
+
+       ret = isst_send_mbox_command(cpu, CONFIG_CLOS, CLOS_PM_QOS_CONFIG, 0, 0,
+                                    &resp);
+       if (ret)
+               return ret;
+
+       debug_printf("cpu:%d CLOS_PM_QOS_CONFIG resp:%x\n", cpu, resp);
+
+       if (resp & BIT(1))
+               *enable = 1;
+       else
+               *enable = 0;
+
+       if (resp & BIT(2))
+               *type = 1;
+       else
+               *type = 0;
+
+       return 0;
+}
+
 int isst_pm_qos_config(int cpu, int enable_clos, int priority_type)
 {
        unsigned int req, resp;
index df4aa99..40346d5 100644 (file)
@@ -287,6 +287,26 @@ static void _isst_fact_display_information(int cpu, FILE *outf, int level,
        format_and_print(outf, base_level + 2, header, value);
 }
 
+void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix,
+                                unsigned int val)
+{
+       char header[256];
+       char value[256];
+
+       snprintf(header, sizeof(header), "package-%d",
+                get_physical_package_id(cpu));
+       format_and_print(outf, 1, header, NULL);
+       snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
+       format_and_print(outf, 2, header, NULL);
+       snprintf(header, sizeof(header), "cpu-%d", cpu);
+       format_and_print(outf, 3, header, NULL);
+
+       snprintf(value, sizeof(value), "%u", val);
+       format_and_print(outf, 4, prefix, value);
+
+       format_and_print(outf, 1, NULL, NULL);
+}
+
 void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
                                   struct isst_pkg_ctdp *pkg_dev)
 {
@@ -503,6 +523,57 @@ void isst_clos_display_information(int cpu, FILE *outf, int clos,
        format_and_print(outf, 1, NULL, NULL);
 }
 
+void isst_clos_display_clos_information(int cpu, FILE *outf,
+                                       int clos_enable, int type)
+{
+       char header[256];
+       char value[256];
+
+       snprintf(header, sizeof(header), "package-%d",
+                get_physical_package_id(cpu));
+       format_and_print(outf, 1, header, NULL);
+       snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
+       format_and_print(outf, 2, header, NULL);
+       snprintf(header, sizeof(header), "cpu-%d", cpu);
+       format_and_print(outf, 3, header, NULL);
+
+       snprintf(header, sizeof(header), "core-power");
+       format_and_print(outf, 4, header, NULL);
+
+       snprintf(header, sizeof(header), "enable-status");
+       snprintf(value, sizeof(value), "%d", clos_enable);
+       format_and_print(outf, 5, header, value);
+
+       snprintf(header, sizeof(header), "priority-type");
+       snprintf(value, sizeof(value), "%d", type);
+       format_and_print(outf, 5, header, value);
+
+       format_and_print(outf, 1, NULL, NULL);
+}
+
+void isst_clos_display_assoc_information(int cpu, FILE *outf, int clos)
+{
+       char header[256];
+       char value[256];
+
+       snprintf(header, sizeof(header), "package-%d",
+                get_physical_package_id(cpu));
+       format_and_print(outf, 1, header, NULL);
+       snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
+       format_and_print(outf, 2, header, NULL);
+       snprintf(header, sizeof(header), "cpu-%d", cpu);
+       format_and_print(outf, 3, header, NULL);
+
+       snprintf(header, sizeof(header), "get-assoc");
+       format_and_print(outf, 4, header, NULL);
+
+       snprintf(header, sizeof(header), "clos");
+       snprintf(value, sizeof(value), "%d", clos);
+       format_and_print(outf, 5, header, value);
+
+       format_and_print(outf, 1, NULL, NULL);
+}
+
 void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd,
                         int result)
 {
index 2f7f627..d280b27 100644 (file)
@@ -187,12 +187,16 @@ extern int isst_send_msr_command(unsigned int cpu, unsigned int command,
                                 int write, unsigned long long *req_resp);
 
 extern int isst_get_ctdp_levels(int cpu, struct isst_pkg_ctdp *pkg_dev);
+extern int isst_get_coremask_info(int cpu, int config_index,
+                          struct isst_pkg_ctdp_level_info *ctdp_level);
 extern int isst_get_process_ctdp(int cpu, int tdp_level,
                                 struct isst_pkg_ctdp *pkg_dev);
 extern void isst_get_process_ctdp_complete(int cpu,
                                           struct isst_pkg_ctdp *pkg_dev);
 extern void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
                                          struct isst_pkg_ctdp *pkg_dev);
+extern void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix,
+                                       unsigned int val);
 extern void isst_ctdp_display_information_start(FILE *outf);
 extern void isst_ctdp_display_information_end(FILE *outf);
 extern void isst_pbf_display_information(int cpu, FILE *outf, int level,
@@ -223,10 +227,14 @@ extern int isst_clos_associate(int cpu, int clos);
 extern int isst_clos_get_assoc_status(int cpu, int *clos_id);
 extern void isst_clos_display_information(int cpu, FILE *outf, int clos,
                                          struct isst_clos_config *clos_config);
-
+extern void isst_clos_display_assoc_information(int cpu, FILE *outf, int clos);
 extern int isst_read_reg(unsigned short reg, unsigned int *val);
 extern int isst_write_reg(int reg, unsigned int val);
 
 extern void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd,
                                int result);
+
+extern int isst_clos_get_clos_information(int cpu, int *enable, int *type);
+extern void isst_clos_display_clos_information(int cpu, FILE *outf,
+                                              int clos_enable, int type);
 #endif
index 448d686..0bf5640 100644 (file)
@@ -4,6 +4,7 @@
  */
 #ifndef __NFIT_TEST_H__
 #define __NFIT_TEST_H__
+#include <linux/acpi.h>
 #include <linux/list.h>
 #include <linux/uuid.h>
 #include <linux/ioport.h>
@@ -202,9 +203,6 @@ struct nd_intel_lss {
        __u32 status;
 } __packed;
 
-union acpi_object;
-typedef void *acpi_handle;
-
 typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t);
 typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle,
                 const guid_t *guid, u64 rev, u64 func,
index 8059ce8..61df01c 100644 (file)
@@ -2,3 +2,5 @@ gpiogpio-event-mon
 gpiogpio-hammer
 gpioinclude/
 gpiolsgpio
+tpm2/SpaceTest.log
+tpm2/*.pyc
index 25b43a8..4cdbae6 100644 (file)
@@ -63,6 +63,13 @@ TARGETS += zram
 TARGETS_HOTPLUG = cpu-hotplug
 TARGETS_HOTPLUG += memory-hotplug
 
+# User can optionally provide a TARGETS skiplist.
+SKIP_TARGETS ?=
+ifneq ($(SKIP_TARGETS),)
+       TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS))
+       override TARGETS := $(TMP)
+endif
+
 # Clear LDFLAGS and MAKEFLAGS if called from main
 # Makefile to avoid test build failures when test
 # Makefile doesn't have explicit build rules.
@@ -126,9 +133,9 @@ endif
 # in the default INSTALL_HDR_PATH usr/include.
 khdr:
 ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
-       make --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
+       $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
 else
-       make --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \
+       $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \
                ARCH=$(ARCH) -C $(top_srcdir) headers_install
 endif
 
@@ -136,44 +143,47 @@ all: khdr
        @for TARGET in $(TARGETS); do           \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
                mkdir $$BUILD_TARGET  -p;       \
-               make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET;\
        done;
 
 run_tests: all
        @for TARGET in $(TARGETS); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
-               make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
        done;
 
 hotplug:
        @for TARGET in $(TARGETS_HOTPLUG); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
-               make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET;\
        done;
 
 run_hotplug: hotplug
        @for TARGET in $(TARGETS_HOTPLUG); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
-               make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
        done;
 
 clean_hotplug:
        @for TARGET in $(TARGETS_HOTPLUG); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
-               make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
        done;
 
 run_pstore_crash:
-       make -C pstore run_crash
+       $(MAKE) -C pstore run_crash
 
 # Use $BUILD as the default install root. $BUILD points to the
 # right output location for the following cases:
 # 1. output_dir=kernel_src
 # 2. a separate output directory is specified using O= KBUILD_OUTPUT
 # 3. a separate output directory is specified using KBUILD_OUTPUT
+# Avoid conflict with INSTALL_PATH set by the main Makefile
 #
-INSTALL_PATH ?= $(BUILD)/install
-INSTALL_PATH := $(abspath $(INSTALL_PATH))
+KSFT_INSTALL_PATH ?= $(BUILD)/kselftest_install
+KSFT_INSTALL_PATH := $(abspath $(KSFT_INSTALL_PATH))
+# Avoid changing the rest of the logic here and lib.mk.
+INSTALL_PATH := $(KSFT_INSTALL_PATH)
 ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
 
 install: all
@@ -184,7 +194,7 @@ ifdef INSTALL_PATH
        install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/
        @for TARGET in $(TARGETS); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
-               make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
        done;
 
        @# Ask all targets to emit their test scripts
@@ -198,12 +208,17 @@ ifdef INSTALL_PATH
        echo "  cat /dev/null > \$$logfile" >> $(ALL_SCRIPT)
        echo "fi" >> $(ALL_SCRIPT)
 
+       @# While building run_kselftest.sh skip also non-existent TARGET dirs:
+       @# they could be the result of a build failure and should NOT be
+       @# included in the generated runlist.
        for TARGET in $(TARGETS); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
+               [ ! -d $$INSTALL_PATH/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \
                echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
                echo "cd $$TARGET" >> $(ALL_SCRIPT); \
                echo -n "run_many" >> $(ALL_SCRIPT); \
-               make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
+               echo -n "Emit Tests for $$TARGET\n"; \
+               $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
                echo "" >> $(ALL_SCRIPT);           \
                echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
        done;
@@ -216,7 +231,7 @@ endif
 clean:
        @for TARGET in $(TARGETS); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
-               make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
        done;
 
 .PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean
index 6cbeea7..8547ecb 100644 (file)
@@ -195,7 +195,7 @@ static void run_test(int cgroup_fd)
 
        if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
                                      (void *)&server_fd)))
-               goto close_bpf_object;
+               goto close_server_fd;
 
        pthread_mutex_lock(&server_started_mtx);
        pthread_cond_wait(&server_started, &server_started_mtx);
index fdc0b36..f4cd60d 100644 (file)
@@ -203,14 +203,24 @@ static int start_server(void)
        return fd;
 }
 
+static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
+
 static void *server_thread(void *arg)
 {
        struct sockaddr_storage addr;
        socklen_t len = sizeof(addr);
        int fd = *(int *)arg;
        int client_fd;
+       int err;
+
+       err = listen(fd, 1);
 
-       if (CHECK_FAIL(listen(fd, 1)) < 0) {
+       pthread_mutex_lock(&server_started_mtx);
+       pthread_cond_signal(&server_started);
+       pthread_mutex_unlock(&server_started_mtx);
+
+       if (CHECK_FAIL(err < 0)) {
                perror("Failed to listed on socket");
                return NULL;
        }
@@ -248,8 +258,16 @@ void test_tcp_rtt(void)
        if (CHECK_FAIL(server_fd < 0))
                goto close_cgroup_fd;
 
-       pthread_create(&tid, NULL, server_thread, (void *)&server_fd);
+       if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
+                                     (void *)&server_fd)))
+               goto close_server_fd;
+
+       pthread_mutex_lock(&server_started_mtx);
+       pthread_cond_wait(&server_started, &server_started_mtx);
+       pthread_mutex_unlock(&server_started_mtx);
+
        CHECK_FAIL(run_test(cgroup_fd, server_fd));
+close_server_fd:
        close(server_fd);
 close_cgroup_fd:
        close(cgroup_fd);
index 8a399bd..067eb62 100644 (file)
@@ -413,7 +413,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
 #else
 #pragma unroll
 #endif
-       for (int i = 0; i < STROBE_MAX_MAP_ENTRIES && i < map.cnt; ++i) {
+       for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) {
+               if (i >= map.cnt)
+                       break;
+
                descr->key_lens[i] = 0;
                len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
                                         map.entries[i].key);
index d23d4da..e2d0619 100755 (executable)
@@ -63,6 +63,9 @@ fi
 
 # Setup
 tc qdisc add dev lo ingress
+echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter
+echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
+echo 0 > /proc/sys/net/ipv4/conf/lo/rp_filter
 
 echo "Testing IPv4..."
 # Drops all IP/UDP packets coming from port 9
index acf7a74..59ea569 100755 (executable)
@@ -314,15 +314,15 @@ test_gso()
        command -v nc >/dev/null 2>&1 || \
                { echo >&2 "nc is not available: skipping TSO tests"; return; }
 
-       # listen on IPv*_DST, capture TCP into $TMPFILE
+       # listen on port 9000, capture TCP into $TMPFILE
        if [ "${PROTO}" == "IPv4" ] ; then
                IP_DST=${IPv4_DST}
                ip netns exec ${NS3} bash -c \
-                       "nc -4 -l -s ${IPv4_DST} -p 9000 > ${TMPFILE} &"
+                       "nc -4 -l -p 9000 > ${TMPFILE} &"
        elif [ "${PROTO}" == "IPv6" ] ; then
                IP_DST=${IPv6_DST}
                ip netns exec ${NS3} bash -c \
-                       "nc -6 -l -s ${IPv6_DST} -p 9000 > ${TMPFILE} &"
+                       "nc -6 -l -p 9000 > ${TMPFILE} &"
                RET=$?
        else
                echo "    test_gso: unknown PROTO: ${PROTO}"
index 15a6663..1afa22c 100755 (executable)
@@ -22,6 +22,7 @@ import os
 import pprint
 import random
 import re
+import stat
 import string
 import struct
 import subprocess
@@ -311,7 +312,11 @@ class DebugfsDir:
         for f in out.split():
             if f == "ports":
                 continue
+
             p = os.path.join(path, f)
+            if not os.stat(p).st_mode & stat.S_IRUSR:
+                continue
+
             if os.path.isfile(p):
                 _, out = cmd('cat %s/%s' % (path, f))
                 dfs[f] = out.strip()
index 4f8ec1f..a320e38 100644 (file)
@@ -1385,7 +1385,6 @@ static int fixup_sysctl_value(const char *buf, size_t buf_len,
                uint8_t raw[sizeof(uint64_t)];
                uint64_t num;
        } value = {};
-       uint8_t c, i;
 
        if (buf_len > sizeof(value)) {
                log_err("Value is too big (%zd) to use in fixup", buf_len);
index f38567e..daa7d1b 100755 (executable)
@@ -59,7 +59,7 @@ ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
 
 # start the listener
 ip netns exec ${NS_DST} bash -c \
-       "nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &"
+       "nc -4 -l -p 9000 >/dev/null &"
 declare -i NC_PID=$!
 sleep 1
 
index 5dcdfa2..126caf2 100755 (executable)
@@ -224,13 +224,6 @@ ingress_vlan_filter_test()
        local vid=10
 
        bridge vlan add vid $vid dev $swp2 master
-       # During initialization the firmware enables all the VLAN filters and
-       # the driver does not turn them off since the traffic will be discarded
-       # by the STP filter whose default is DISCARD state. Add the VID on the
-       # ingress bridge port and then remove it to make sure it is not member
-       # in the VLAN.
-       bridge vlan add vid $vid dev $swp1 master
-       bridge vlan del vid $vid dev $swp1 master
 
        RET=0
 
index 8a4025e..ef1e9ba 100644 (file)
@@ -95,7 +95,7 @@ echo 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events
 check_error 'p:kprobes/testevent _do_fork ^bcd=\1'     # DIFF_ARG_TYPE
 check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE
 check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"'        # DIFF_ARG_TYPE
-check_error '^p:kprobes/testevent _do_fork'    # SAME_PROBE
+check_error '^p:kprobes/testevent _do_fork abcd=\1'    # SAME_PROBE
 fi
 
 exit 0
index 7717c0a..ac73850 100644 (file)
@@ -28,7 +28,7 @@ if [ -z "$FEATURE" ]; then
     exit_unsupported
 fi
 
-echo "Test snapshot tigger"
+echo "Test snapshot trigger"
 echo 0 > snapshot
 echo 1 > events/sched/sched_process_fork/enable
 ( echo "forked")
index fa7c24e..2ff6003 100755 (executable)
@@ -37,11 +37,20 @@ is_ima_sig_required()
        # sequentially.  As a result, a policy rule may be defined, but
        # might not necessarily be used.  This test assumes if a policy
        # rule is specified, that is the intent.
+
+       # First check for appended signature (modsig), then xattr
        if [ $ima_read_policy -eq 1 ]; then
                check_ima_policy "appraise" "func=KEXEC_KERNEL_CHECK" \
-                       "appraise_type=imasig"
+                       "appraise_type=imasig|modsig"
                ret=$?
-               [ $ret -eq 1 ] && log_info "IMA signature required";
+               if [ $ret -eq 1 ]; then
+                       log_info "IMA or appended(modsig) signature required"
+               else
+                       check_ima_policy "appraise" "func=KEXEC_KERNEL_CHECK" \
+                               "appraise_type=imasig"
+                       ret=$?
+                       [ $ret -eq 1 ] && log_info "IMA signature required";
+               fi
        fi
        return $ret
 }
@@ -84,6 +93,22 @@ check_for_imasig()
        return $ret
 }
 
+# Return 1 for appended signature (modsig) found and 0 for not found.
+check_for_modsig()
+{
+       local module_sig_string="~Module signature appended~"
+       local sig="$(tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE)"
+       local ret=0
+
+       if [ "$sig" == "$module_sig_string" ]; then
+               ret=1
+               log_info "kexec kernel image modsig signed"
+       else
+               log_info "kexec kernel image not modsig signed"
+       fi
+       return $ret
+}
+
 kexec_file_load_test()
 {
        local succeed_msg="kexec_file_load succeeded"
@@ -98,7 +123,8 @@ kexec_file_load_test()
                # In secureboot mode with an architecture  specific
                # policy, make sure either an IMA or PE signature exists.
                if [ $secureboot -eq 1 ] && [ $arch_policy -eq 1 ] && \
-                       [ $ima_signed -eq 0 ] && [ $pe_signed -eq 0 ]; then
+                       [ $ima_signed -eq 0 ] && [ $pe_signed -eq 0 ] \
+                         && [ $ima_modsig -eq 0 ]; then
                        log_fail "$succeed_msg (missing sig)"
                fi
 
@@ -107,7 +133,8 @@ kexec_file_load_test()
                        log_fail "$succeed_msg (missing PE sig)"
                fi
 
-               if [ $ima_sig_required -eq 1 ] && [ $ima_signed -eq 0 ]; then
+               if [ $ima_sig_required -eq 1 ] && [ $ima_signed -eq 0 ] \
+                    && [ $ima_modsig -eq 0 ]; then
                        log_fail "$succeed_msg (missing IMA sig)"
                fi
 
@@ -204,5 +231,8 @@ pe_signed=$?
 check_for_imasig
 ima_signed=$?
 
+check_for_modsig
+ima_modsig=$?
+
 # Test loading the kernel image via kexec_file_load syscall
 kexec_file_load_test
index 00c9020..84de7bc 100644 (file)
@@ -3,9 +3,14 @@
 #
 # Runs a set of tests in a given subdirectory.
 export skip_rc=4
+export timeout_rc=124
 export logfile=/dev/stdout
 export per_test_logging=
 
+# Defaults for "settings" file fields:
+# "timeout" how many seconds to let each test run before failing.
+export kselftest_default_timeout=45
+
 # There isn't a shell-agnostic way to find the path of a sourced file,
 # so we must rely on BASE_DIR being set to find other tools.
 if [ -z "$BASE_DIR" ]; then
@@ -24,6 +29,16 @@ tap_prefix()
        fi
 }
 
+tap_timeout()
+{
+       # Make sure tests will time out if utility is available.
+       if [ -x /usr/bin/timeout ] ; then
+               /usr/bin/timeout "$kselftest_timeout" "$1"
+       else
+               "$1"
+       fi
+}
+
 run_one()
 {
        DIR="$1"
@@ -32,6 +47,18 @@ run_one()
 
        BASENAME_TEST=$(basename $TEST)
 
+       # Reset any "settings"-file variables.
+       export kselftest_timeout="$kselftest_default_timeout"
+       # Load per-test-directory kselftest "settings" file.
+       settings="$BASE_DIR/$DIR/settings"
+       if [ -r "$settings" ] ; then
+               while read line ; do
+                       field=$(echo "$line" | cut -d= -f1)
+                       value=$(echo "$line" | cut -d= -f2-)
+                       eval "kselftest_$field"="$value"
+               done < "$settings"
+       fi
+
        TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST"
        echo "# $TEST_HDR_MSG"
        if [ ! -x "$TEST" ]; then
@@ -44,14 +71,17 @@ run_one()
                echo "not ok $test_num $TEST_HDR_MSG"
        else
                cd `dirname $TEST` > /dev/null
-               (((((./$BASENAME_TEST 2>&1; echo $? >&3) |
+               ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) |
                        tap_prefix >&4) 3>&1) |
                        (read xs; exit $xs)) 4>>"$logfile" &&
                echo "ok $test_num $TEST_HDR_MSG") ||
-               (if [ $? -eq $skip_rc ]; then   \
+               (rc=$?; \
+               if [ $rc -eq $skip_rc ]; then   \
                        echo "not ok $test_num $TEST_HDR_MSG # SKIP"
+               elif [ $rc -eq $timeout_rc ]; then \
+                       echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT"
                else
-                       echo "not ok $test_num $TEST_HDR_MSG"
+                       echo "not ok $test_num $TEST_HDR_MSG # exit=$rc"
                fi)
                cd - >/dev/null
        fi
index ec30446..e2e1911 100755 (executable)
@@ -24,12 +24,12 @@ main()
                echo "$0: Installing in specified location - $install_loc ..."
        fi
 
-       install_dir=$install_loc/kselftest
+       install_dir=$install_loc/kselftest_install
 
 # Create install directory
        mkdir -p $install_dir
 # Build tests
-       INSTALL_PATH=$install_dir make install
+       KSFT_INSTALL_PATH=$install_dir make install
 }
 
 main "$@"
index b35da37..409c1fa 100644 (file)
@@ -1,4 +1,5 @@
 /s390x/sync_regs_test
+/s390x/memop
 /x86_64/cr4_cpuid_sync_test
 /x86_64/evmcs_test
 /x86_64/hyperv_cpuid
@@ -9,6 +10,7 @@
 /x86_64/state_test
 /x86_64/sync_regs_test
 /x86_64/vmx_close_while_nested_test
+/x86_64/vmx_dirty_log_test
 /x86_64/vmx_set_nested_state_test
 /x86_64/vmx_tsc_adjust_test
 /clear_dirty_log_test
index 62c591f..c5ec868 100644 (file)
@@ -22,6 +22,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test
 TEST_GEN_PROGS_x86_64 += x86_64/state_test
 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
 TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
@@ -48,7 +49,7 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
        -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I..
 
 no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
-        $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
+        $(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie)
 
 # On s390, build the testcases KVM-enabled
 pgste-option = $(call try-run, echo 'int main() { return 0; }' | \
index dc3346e..5614222 100644 (file)
@@ -19,8 +19,6 @@
 #include "kvm_util.h"
 #include "processor.h"
 
-#define DEBUG printf
-
 #define VCPU_ID                                1
 
 /* The memory slot index to track dirty pages */
@@ -249,14 +247,12 @@ static void vm_dirty_log_verify(unsigned long *bmap)
 }
 
 static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
-                               uint64_t extra_mem_pages, void *guest_code,
-                               unsigned long type)
+                               uint64_t extra_mem_pages, void *guest_code)
 {
        struct kvm_vm *vm;
        uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
 
-       vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
-                       O_RDWR, type);
+       vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
        kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
 #ifdef __x86_64__
        vm_create_irqchip(vm);
@@ -265,67 +261,35 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
        return vm;
 }
 
+#define DIRTY_MEM_BITS 30 /* 1G */
+#define PAGE_SHIFT_4K  12
+
 static void run_test(enum vm_guest_mode mode, unsigned long iterations,
                     unsigned long interval, uint64_t phys_offset)
 {
-       unsigned int guest_pa_bits, guest_page_shift;
        pthread_t vcpu_thread;
        struct kvm_vm *vm;
-       uint64_t max_gfn;
        unsigned long *bmap;
-       unsigned long type = 0;
-
-       switch (mode) {
-       case VM_MODE_P52V48_4K:
-               guest_pa_bits = 52;
-               guest_page_shift = 12;
-               break;
-       case VM_MODE_P52V48_64K:
-               guest_pa_bits = 52;
-               guest_page_shift = 16;
-               break;
-       case VM_MODE_P48V48_4K:
-               guest_pa_bits = 48;
-               guest_page_shift = 12;
-               break;
-       case VM_MODE_P48V48_64K:
-               guest_pa_bits = 48;
-               guest_page_shift = 16;
-               break;
-       case VM_MODE_P40V48_4K:
-               guest_pa_bits = 40;
-               guest_page_shift = 12;
-               break;
-       case VM_MODE_P40V48_64K:
-               guest_pa_bits = 40;
-               guest_page_shift = 16;
-               break;
-       default:
-               TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
-       }
 
-       DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
-
-#ifdef __x86_64__
        /*
-        * FIXME
-        * The x86_64 kvm selftests framework currently only supports a
-        * single PML4 which restricts the number of physical address
-        * bits we can change to 39.
+        * We reserve page table for 2 times of extra dirty mem which
+        * will definitely cover the original (1G+) test range.  Here
+        * we do the calculation with 4K page size which is the
+        * smallest so the page number will be enough for all archs
+        * (e.g., 64K page size guest will need even less memory for
+        * page tables).
         */
-       guest_pa_bits = 39;
-#endif
-#ifdef __aarch64__
-       if (guest_pa_bits != 40)
-               type = KVM_VM_TYPE_ARM_IPA_SIZE(guest_pa_bits);
-#endif
-       max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1;
-       guest_page_size = (1ul << guest_page_shift);
+       vm = create_vm(mode, VCPU_ID,
+                      2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K),
+                      guest_code);
+
+       guest_page_size = vm_get_page_size(vm);
        /*
         * A little more than 1G of guest page sized pages.  Cover the
         * case where the size is not aligned to 64 pages.
         */
-       guest_num_pages = (1ul << (30 - guest_page_shift)) + 16;
+       guest_num_pages = (1ul << (DIRTY_MEM_BITS -
+                                  vm_get_page_shift(vm))) + 16;
 #ifdef __s390x__
        /* Round up to multiple of 1M (segment size) */
        guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL;
@@ -335,7 +299,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
                         !!((guest_num_pages * guest_page_size) % host_page_size);
 
        if (!phys_offset) {
-               guest_test_phys_mem = (max_gfn - guest_num_pages) * guest_page_size;
+               guest_test_phys_mem = (vm_get_max_gfn(vm) -
+                                      guest_num_pages) * guest_page_size;
                guest_test_phys_mem &= ~(host_page_size - 1);
        } else {
                guest_test_phys_mem = phys_offset;
@@ -351,8 +316,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
        bmap = bitmap_alloc(host_num_pages);
        host_bmap_track = bitmap_alloc(host_num_pages);
 
-       vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code, type);
-
 #ifdef USE_CLEAR_DIRTY_LOG
        struct kvm_enable_cap cap = {};
 
@@ -482,7 +445,7 @@ int main(int argc, char *argv[])
 #endif
 
 #ifdef __x86_64__
-       vm_guest_mode_params_init(VM_MODE_P52V48_4K, true, true);
+       vm_guest_mode_params_init(VM_MODE_PXXV48_4K, true, true);
 #endif
 #ifdef __aarch64__
        vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true);
index 5463b78..29cccaf 100644 (file)
@@ -24,6 +24,12 @@ struct kvm_vm;
 typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
 typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
 
+#ifndef NDEBUG
+#define DEBUG(...) printf(__VA_ARGS__);
+#else
+#define DEBUG(...)
+#endif
+
 /* Minimum allocated guest virtual and physical addresses */
 #define KVM_UTIL_MIN_VADDR             0x2000
 
@@ -38,11 +44,14 @@ enum vm_guest_mode {
        VM_MODE_P48V48_64K,
        VM_MODE_P40V48_4K,
        VM_MODE_P40V48_64K,
+       VM_MODE_PXXV48_4K,      /* For 48bits VA but ANY bits PA */
        NUM_VM_MODES,
 };
 
-#ifdef __aarch64__
+#if defined(__aarch64__)
 #define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#elif defined(__x86_64__)
+#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
 #else
 #define VM_MODE_DEFAULT VM_MODE_P52V48_4K
 #endif
@@ -60,8 +69,7 @@ int kvm_check_cap(long cap);
 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
 
 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
-                         int perm, unsigned long type);
+struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
 void kvm_vm_free(struct kvm_vm *vmp);
 void kvm_vm_restart(struct kvm_vm *vmp, int perm);
 void kvm_vm_release(struct kvm_vm *vmp);
@@ -146,6 +154,10 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
 
 bool vm_is_unrestricted_guest(struct kvm_vm *vm);
 
+unsigned int vm_get_page_size(struct kvm_vm *vm);
+unsigned int vm_get_page_shift(struct kvm_vm *vm);
+unsigned int vm_get_max_gfn(struct kvm_vm *vm);
+
 struct kvm_userspace_memory_region *
 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
                                 uint64_t end);
index 80d1974..ff23401 100644 (file)
@@ -325,6 +325,9 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
 void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
                  uint64_t msr_value);
 
+uint32_t kvm_get_cpuid_max(void);
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
+
 /*
  * Basic CPU control in CR0
  */
@@ -1080,6 +1083,9 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
 #define VMX_BASIC_MEM_TYPE_WB  6LLU
 #define VMX_BASIC_INOUT                0x0040000000000000LLU
 
+/* VMX_EPT_VPID_CAP bits */
+#define VMX_EPT_VPID_CAP_AD_BITS       (1ULL << 21)
+
 /* MSR_IA32_VMX_MISC bits */
 #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
 #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE   0x1F
index 69b1705..f52e0ba 100644 (file)
@@ -569,6 +569,10 @@ struct vmx_pages {
        void *enlightened_vmcs_hva;
        uint64_t enlightened_vmcs_gpa;
        void *enlightened_vmcs;
+
+       void *eptp_hva;
+       uint64_t eptp_gpa;
+       void *eptp;
 };
 
 struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
@@ -576,4 +580,16 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx);
 void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
 bool load_vmcs(struct vmx_pages *vmx);
 
+void nested_vmx_check_supported(void);
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+                  uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot);
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+                uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+                uint32_t eptp_memslot);
+void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
+                       uint32_t memslot, uint32_t eptp_memslot);
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
+                 uint32_t eptp_memslot);
+
 #endif /* SELFTEST_KVM_VMX_H */
index 486400a..86036a5 100644 (file)
@@ -264,6 +264,9 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
        case VM_MODE_P52V48_4K:
                TEST_ASSERT(false, "AArch64 does not support 4K sized pages "
                                   "with 52-bit physical address ranges");
+       case VM_MODE_PXXV48_4K:
+               TEST_ASSERT(false, "AArch64 does not support 4K sized pages "
+                                  "with ANY-bit physical address ranges");
        case VM_MODE_P52V48_64K:
                tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
                tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
index 6e49bb0..41cf454 100644 (file)
@@ -8,6 +8,7 @@
 #include "test_util.h"
 #include "kvm_util.h"
 #include "kvm_util_internal.h"
+#include "processor.h"
 
 #include <assert.h>
 #include <sys/mman.h>
@@ -84,7 +85,7 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
        return ret;
 }
 
-static void vm_open(struct kvm_vm *vm, int perm, unsigned long type)
+static void vm_open(struct kvm_vm *vm, int perm)
 {
        vm->kvm_fd = open(KVM_DEV_PATH, perm);
        if (vm->kvm_fd < 0)
@@ -95,18 +96,19 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type)
                exit(KSFT_SKIP);
        }
 
-       vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type);
+       vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
        TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
                "rc: %i errno: %i", vm->fd, errno);
 }
 
 const char * const vm_guest_mode_string[] = {
-       "PA-bits:52, VA-bits:48, 4K pages",
-       "PA-bits:52, VA-bits:48, 64K pages",
-       "PA-bits:48, VA-bits:48, 4K pages",
-       "PA-bits:48, VA-bits:48, 64K pages",
-       "PA-bits:40, VA-bits:48, 4K pages",
-       "PA-bits:40, VA-bits:48, 64K pages",
+       "PA-bits:52,  VA-bits:48,  4K pages",
+       "PA-bits:52,  VA-bits:48, 64K pages",
+       "PA-bits:48,  VA-bits:48,  4K pages",
+       "PA-bits:48,  VA-bits:48, 64K pages",
+       "PA-bits:40,  VA-bits:48,  4K pages",
+       "PA-bits:40,  VA-bits:48, 64K pages",
+       "PA-bits:ANY, VA-bits:48,  4K pages",
 };
 _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
               "Missing new mode strings?");
@@ -130,17 +132,17 @@ _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
  * descriptor to control the created VM is created with the permissions
  * given by perm (e.g. O_RDWR).
  */
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
-                         int perm, unsigned long type)
+struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 {
        struct kvm_vm *vm;
 
+       DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
        vm = calloc(1, sizeof(*vm));
        TEST_ASSERT(vm != NULL, "Insufficient Memory");
 
        vm->mode = mode;
-       vm->type = type;
-       vm_open(vm, perm, type);
+       vm->type = 0;
 
        /* Setup mode specific traits. */
        switch (vm->mode) {
@@ -186,10 +188,32 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
                vm->page_size = 0x10000;
                vm->page_shift = 16;
                break;
+       case VM_MODE_PXXV48_4K:
+#ifdef __x86_64__
+               kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
+               TEST_ASSERT(vm->va_bits == 48, "Linear address width "
+                           "(%d bits) not supported", vm->va_bits);
+               vm->pgtable_levels = 4;
+               vm->page_size = 0x1000;
+               vm->page_shift = 12;
+               DEBUG("Guest physical address width detected: %d\n",
+                     vm->pa_bits);
+#else
+               TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on "
+                           "non-x86 platforms");
+#endif
+               break;
        default:
                TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
        }
 
+#ifdef __aarch64__
+       if (vm->pa_bits != 40)
+               vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
+#endif
+
+       vm_open(vm, perm);
+
        /* Limit to VA-bit canonical virtual addresses. */
        vm->vpages_valid = sparsebit_alloc();
        sparsebit_set_num(vm->vpages_valid,
@@ -212,7 +236,7 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
 
 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 {
-       return _vm_create(mode, phy_pages, perm, 0);
+       return _vm_create(mode, phy_pages, perm);
 }
 
 /*
@@ -232,7 +256,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
 {
        struct userspace_mem_region *region;
 
-       vm_open(vmp, perm, vmp->type);
+       vm_open(vmp, perm);
        if (vmp->has_irqchip)
                vm_create_irqchip(vmp);
 
@@ -681,7 +705,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
  *   on error (e.g. currently no memory region using memslot as a KVM
  *   memory slot ID).
  */
-static struct userspace_mem_region *
+struct userspace_mem_region *
 memslot2region(struct kvm_vm *vm, uint32_t memslot)
 {
        struct userspace_mem_region *region;
@@ -1628,3 +1652,18 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm)
 
        return val == 'Y';
 }
+
+unsigned int vm_get_page_size(struct kvm_vm *vm)
+{
+       return vm->page_size;
+}
+
+unsigned int vm_get_page_shift(struct kvm_vm *vm)
+{
+       return vm->page_shift;
+}
+
+unsigned int vm_get_max_gfn(struct kvm_vm *vm)
+{
+       return vm->max_gfn;
+}
index f36262e..ac50c42 100644 (file)
@@ -68,4 +68,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
 void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
 void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
 
+struct userspace_mem_region *
+memslot2region(struct kvm_vm *vm, uint32_t memslot);
+
 #endif /* SELFTEST_KVM_UTIL_INTERNAL_H */
index 0a5e487..6698cb7 100644 (file)
@@ -228,7 +228,7 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
 
 void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
 {
-       TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
                "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
        /* If needed, create page map l4 table. */
@@ -261,7 +261,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
        uint16_t index[4];
        struct pageMapL4Entry *pml4e;
 
-       TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
                "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
        TEST_ASSERT((vaddr % vm->page_size) == 0,
@@ -547,7 +547,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
        struct pageDirectoryEntry *pde;
        struct pageTableEntry *pte;
 
-       TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
                "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
        index[0] = (gva >> 12) & 0x1ffu;
@@ -621,7 +621,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
        kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
 
        switch (vm->mode) {
-       case VM_MODE_P52V48_4K:
+       case VM_MODE_PXXV48_4K:
                sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
                sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
                sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
@@ -1085,7 +1085,7 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
        for (i = 0; i < nmsrs; i++)
                state->msrs.entries[i].index = list->indices[i];
        r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
-        TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)",
+        TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
                 r, r == nmsrs ? -1 : list->indices[r]);
 
        r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
@@ -1157,3 +1157,25 @@ bool is_intel_cpu(void)
        chunk = (const uint32_t *)("GenuineIntel");
        return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
 }
+
+uint32_t kvm_get_cpuid_max(void)
+{
+       return kvm_get_supported_cpuid_entry(0x80000000)->eax;
+}
+
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
+{
+       struct kvm_cpuid_entry2 *entry;
+       bool pae;
+
+       /* SDM 4.1.4 */
+       if (kvm_get_cpuid_max() < 0x80000008) {
+               pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
+               *pa_bits = pae ? 36 : 32;
+               *va_bits = 32;
+       } else {
+               entry = kvm_get_supported_cpuid_entry(0x80000008);
+               *pa_bits = entry->eax & 0xff;
+               *va_bits = (entry->eax >> 8) & 0xff;
+       }
+}
index 4bfc9a9..da4d89a 100644 (file)
@@ -32,7 +32,7 @@ void ucall(uint64_t cmd, int nargs, ...)
        va_end(va);
 
        asm volatile("in %[port], %%al"
-               : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax");
+               : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory");
 }
 
 uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
index 9cef045..f6ec97b 100644 (file)
@@ -7,11 +7,39 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
+#include "../kvm_util_internal.h"
 #include "processor.h"
 #include "vmx.h"
 
+#define PAGE_SHIFT_4K  12
+
+#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000
+
 bool enable_evmcs;
 
+struct eptPageTableEntry {
+       uint64_t readable:1;
+       uint64_t writable:1;
+       uint64_t executable:1;
+       uint64_t memory_type:3;
+       uint64_t ignore_pat:1;
+       uint64_t page_size:1;
+       uint64_t accessed:1;
+       uint64_t dirty:1;
+       uint64_t ignored_11_10:2;
+       uint64_t address:40;
+       uint64_t ignored_62_52:11;
+       uint64_t suppress_ve:1;
+};
+
+struct eptPageTablePointer {
+       uint64_t memory_type:3;
+       uint64_t page_walk_length:3;
+       uint64_t ad_enabled:1;
+       uint64_t reserved_11_07:5;
+       uint64_t address:40;
+       uint64_t reserved_63_52:12;
+};
 int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
 {
        uint16_t evmcs_ver;
@@ -174,15 +202,35 @@ bool load_vmcs(struct vmx_pages *vmx)
  */
 static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
 {
+       uint32_t sec_exec_ctl = 0;
+
        vmwrite(VIRTUAL_PROCESSOR_ID, 0);
        vmwrite(POSTED_INTR_NV, 0);
 
        vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
-       if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0))
+
+       if (vmx->eptp_gpa) {
+               uint64_t ept_paddr;
+               struct eptPageTablePointer eptp = {
+                       .memory_type = VMX_BASIC_MEM_TYPE_WB,
+                       .page_walk_length = 3, /* + 1 */
+                       .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS),
+                       .address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
+               };
+
+               memcpy(&ept_paddr, &eptp, sizeof(ept_paddr));
+               vmwrite(EPT_POINTER, ept_paddr);
+               sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT;
+       }
+
+       if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl))
                vmwrite(CPU_BASED_VM_EXEC_CONTROL,
                        rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
-       else
+       else {
                vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
+               GUEST_ASSERT(!sec_exec_ctl);
+       }
+
        vmwrite(EXCEPTION_BITMAP, 0);
        vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
        vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
@@ -327,3 +375,162 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
        init_vmcs_host_state();
        init_vmcs_guest_state(guest_rip, guest_rsp);
 }
+
+void nested_vmx_check_supported(void)
+{
+       struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+       if (!(entry->ecx & CPUID_VMX)) {
+               fprintf(stderr, "nested VMX not enabled, skipping test\n");
+               exit(KSFT_SKIP);
+       }
+}
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+                  uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot)
+{
+       uint16_t index[4];
+       struct eptPageTableEntry *pml4e;
+
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+                   "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+       TEST_ASSERT((nested_paddr % vm->page_size) == 0,
+                   "Nested physical address not on page boundary,\n"
+                   "  nested_paddr: 0x%lx vm->page_size: 0x%x",
+                   nested_paddr, vm->page_size);
+       TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
+                   "Physical address beyond beyond maximum supported,\n"
+                   "  nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+                   paddr, vm->max_gfn, vm->page_size);
+       TEST_ASSERT((paddr % vm->page_size) == 0,
+                   "Physical address not on page boundary,\n"
+                   "  paddr: 0x%lx vm->page_size: 0x%x",
+                   paddr, vm->page_size);
+       TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+                   "Physical address beyond beyond maximum supported,\n"
+                   "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+                   paddr, vm->max_gfn, vm->page_size);
+
+       index[0] = (nested_paddr >> 12) & 0x1ffu;
+       index[1] = (nested_paddr >> 21) & 0x1ffu;
+       index[2] = (nested_paddr >> 30) & 0x1ffu;
+       index[3] = (nested_paddr >> 39) & 0x1ffu;
+
+       /* Allocate page directory pointer table if not present. */
+       pml4e = vmx->eptp_hva;
+       if (!pml4e[index[3]].readable) {
+               pml4e[index[3]].address = vm_phy_page_alloc(vm,
+                         KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
+                       >> vm->page_shift;
+               pml4e[index[3]].writable = true;
+               pml4e[index[3]].readable = true;
+               pml4e[index[3]].executable = true;
+       }
+
+       /* Allocate page directory table if not present. */
+       struct eptPageTableEntry *pdpe;
+       pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+       if (!pdpe[index[2]].readable) {
+               pdpe[index[2]].address = vm_phy_page_alloc(vm,
+                         KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
+                       >> vm->page_shift;
+               pdpe[index[2]].writable = true;
+               pdpe[index[2]].readable = true;
+               pdpe[index[2]].executable = true;
+       }
+
+       /* Allocate page table if not present. */
+       struct eptPageTableEntry *pde;
+       pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+       if (!pde[index[1]].readable) {
+               pde[index[1]].address = vm_phy_page_alloc(vm,
+                         KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
+                       >> vm->page_shift;
+               pde[index[1]].writable = true;
+               pde[index[1]].readable = true;
+               pde[index[1]].executable = true;
+       }
+
+       /* Fill in page table entry. */
+       struct eptPageTableEntry *pte;
+       pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+       pte[index[0]].address = paddr >> vm->page_shift;
+       pte[index[0]].writable = true;
+       pte[index[0]].readable = true;
+       pte[index[0]].executable = true;
+
+       /*
+        * For now mark these as accessed and dirty because the only
+        * testcase we have needs that.  Can be reconsidered later.
+        */
+       pte[index[0]].accessed = true;
+       pte[index[0]].dirty = true;
+}
+
+/*
+ * Map a range of EPT guest physical addresses to the VM's physical address
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   nested_paddr - Nested guest physical address to map
+ *   paddr - VM Physical Address
+ *   size - The size of the range to map
+ *   eptp_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a nested guest translation for the
+ * page range starting at nested_paddr to the page range starting at paddr.
+ */
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+               uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+               uint32_t eptp_memslot)
+{
+       size_t page_size = vm->page_size;
+       size_t npages = size / page_size;
+
+       TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
+       TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+
+       while (npages--) {
+               nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot);
+               nested_paddr += page_size;
+               paddr += page_size;
+       }
+}
+
+/* Prepare an identity extended page table that maps all the
+ * physical pages in VM.
+ */
+void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
+                       uint32_t memslot, uint32_t eptp_memslot)
+{
+       sparsebit_idx_t i, last;
+       struct userspace_mem_region *region =
+               memslot2region(vm, memslot);
+
+       i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
+       last = i + (region->region.memory_size >> vm->page_shift);
+       for (;;) {
+               i = sparsebit_next_clear(region->unused_phy_pages, i);
+               if (i > last)
+                       break;
+
+               nested_map(vmx, vm,
+                          (uint64_t)i << vm->page_shift,
+                          (uint64_t)i << vm->page_shift,
+                          1 << vm->page_shift,
+                          eptp_memslot);
+       }
+}
+
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
+                 uint32_t eptp_memslot)
+{
+       vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+       vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
+       vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
+}
index ee59831..443a2b5 100644 (file)
@@ -26,6 +26,25 @@ static void guest_code(void)
 {
 }
 
+static int smt_possible(void)
+{
+       char buf[16];
+       FILE *f;
+       bool res = 1;
+
+       f = fopen("/sys/devices/system/cpu/smt/control", "r");
+       if (f) {
+               if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
+                       if (!strncmp(buf, "forceoff", 8) ||
+                           !strncmp(buf, "notsupported", 12))
+                               res = 0;
+               }
+               fclose(f);
+       }
+
+       return res;
+}
+
 static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
                          int evmcs_enabled)
 {
@@ -59,6 +78,14 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
                TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
                            !entry->padding[2], "padding should be zero");
 
+               if (entry->function == 0x40000004) {
+                       int nononarchcs = !!(entry->eax & (1UL << 18));
+
+                       TEST_ASSERT(nononarchcs == !smt_possible(),
+                                   "NoNonArchitecturalCoreSharing bit"
+                                   " doesn't reflect SMT setting");
+               }
+
                /*
                 * If needed for debug:
                 * fprintf(stdout,
index 11c2a70..5c82242 100644 (file)
 
 #define VCPU_ID 5
 
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+/*
+ * ucall is embedded here to protect against compiler reshuffling registers
+ * before calling a function. In this test we only need to get KVM_EXIT_IO
+ * vmexit and preserve RBX, no additional information is needed.
+ */
 void guest_code(void)
 {
-       /*
-        * use a callee-save register, otherwise the compiler
-        * saves it around the call to GUEST_SYNC.
-        */
-       register u32 stage asm("rbx");
-       for (;;) {
-               GUEST_SYNC(0);
-               stage++;
-               asm volatile ("" : : "r" (stage));
-       }
+       asm volatile("1: in %[port], %%al\n"
+                    "add $0x1, %%rbx\n"
+                    "jmp 1b"
+                    : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx");
 }
 
 static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
index 3b0ffe0..5dfb535 100644 (file)
@@ -53,12 +53,8 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
 int main(int argc, char *argv[])
 {
        vm_vaddr_t vmx_pages_gva;
-       struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
 
-       if (!(entry->ecx & CPUID_VMX)) {
-               fprintf(stderr, "nested VMX not enabled, skipping test\n");
-               exit(KSFT_SKIP);
-       }
+       nested_vmx_check_supported();
 
        vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
        vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
new file mode 100644 (file)
index 0000000..a223a64
--- /dev/null
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define VCPU_ID                                1
+
+/* The memory slot index to track dirty pages */
+#define TEST_MEM_SLOT_INDEX            1
+#define TEST_MEM_SIZE                  3
+
+/* L1 guest test virtual memory offset */
+#define GUEST_TEST_MEM                 0xc0000000
+
+/* L2 guest test virtual memory offset */
+#define NESTED_TEST_MEM1               0xc0001000
+#define NESTED_TEST_MEM2               0xc0002000
+
+static void l2_guest_code(void)
+{
+       *(volatile uint64_t *)NESTED_TEST_MEM1;
+       *(volatile uint64_t *)NESTED_TEST_MEM1 = 1;
+       GUEST_SYNC(true);
+       GUEST_SYNC(false);
+
+       *(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
+       GUEST_SYNC(true);
+       *(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
+       GUEST_SYNC(true);
+       GUEST_SYNC(false);
+
+       /* Exit to L1 and never come back.  */
+       vmcall();
+}
+
+void l1_guest_code(struct vmx_pages *vmx)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+       GUEST_ASSERT(vmx->vmcs_gpa);
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+       GUEST_ASSERT(load_vmcs(vmx));
+
+       prepare_vmcs(vmx, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       GUEST_SYNC(false);
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_SYNC(false);
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t vmx_pages_gva = 0;
+       struct vmx_pages *vmx;
+       unsigned long *bmap;
+       uint64_t *host_test_mem;
+
+       struct kvm_vm *vm;
+       struct kvm_run *run;
+       struct ucall uc;
+       bool done = false;
+
+       nested_vmx_check_supported();
+
+       /* Create VM */
+       vm = vm_create_default(VCPU_ID, 0, l1_guest_code);
+       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+       vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+       run = vcpu_state(vm, VCPU_ID);
+
+       /* Add an extra memory slot for testing dirty logging */
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   GUEST_TEST_MEM,
+                                   TEST_MEM_SLOT_INDEX,
+                                   TEST_MEM_SIZE,
+                                   KVM_MEM_LOG_DIRTY_PAGES);
+
+       /*
+        * Add an identity map for GVA range [0xc0000000, 0xc0002000).  This
+        * affects both L1 and L2.  However...
+        */
+       virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM,
+                TEST_MEM_SIZE * 4096, 0);
+
+       /*
+        * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
+        * 0xc0000000.
+        *
+        * Note that prepare_eptp should be called only L1's GPA map is done,
+        * meaning after the last call to virt_map.
+        */
+       prepare_eptp(vmx, vm, 0);
+       nested_map_memslot(vmx, vm, 0, 0);
+       nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0);
+       nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0);
+
+       bmap = bitmap_alloc(TEST_MEM_SIZE);
+       host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
+
+       while (!done) {
+               memset(host_test_mem, 0xaa, TEST_MEM_SIZE * 4096);
+               _vcpu_run(vm, VCPU_ID);
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                           "Unexpected exit reason: %u (%s),\n",
+                           run->exit_reason,
+                           exit_reason_str(run->exit_reason));
+
+               switch (get_ucall(vm, VCPU_ID, &uc)) {
+               case UCALL_ABORT:
+                       TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
+                                   __FILE__, uc.args[1]);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       /*
+                        * The nested guest wrote at offset 0x1000 in the memslot, but the
+                        * dirty bitmap must be filled in according to L1 GPA, not L2.
+                        */
+                       kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+                       if (uc.args[1]) {
+                               TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n");
+                               TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n");
+                       } else {
+                               TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n");
+                               TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n");
+                       }
+
+                       TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n");
+                       TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n");
+                       TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n");
+                       TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n");
+                       break;
+               case UCALL_DONE:
+                       done = true;
+                       break;
+               default:
+                       TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+               }
+       }
+}
index 853e370..9ef7fab 100644 (file)
@@ -224,7 +224,6 @@ int main(int argc, char *argv[])
 {
        struct kvm_vm *vm;
        struct kvm_nested_state state;
-       struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
 
        have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
 
@@ -237,10 +236,7 @@ int main(int argc, char *argv[])
         * AMD currently does not implement set_nested_state, so for now we
         * just early out.
         */
-       if (!(entry->ecx & CPUID_VMX)) {
-               fprintf(stderr, "nested VMX not enabled, skipping test\n");
-               exit(KSFT_SKIP);
-       }
+       nested_vmx_check_supported();
 
        vm = vm_create_default(VCPU_ID, 0, 0);
 
@@ -271,12 +267,7 @@ int main(int argc, char *argv[])
        state.flags = KVM_STATE_NESTED_RUN_PENDING;
        test_nested_state_expect_einval(vm, &state);
 
-       /*
-        * TODO: When SVM support is added for KVM_SET_NESTED_STATE
-        *       add tests here to support it like VMX.
-        */
-       if (entry->ecx & CPUID_VMX)
-               test_vmx_nested_state(vm);
+       test_vmx_nested_state(vm);
 
        kvm_vm_free(vm);
        return 0;
index f36c10e..5590fd2 100644 (file)
@@ -128,12 +128,8 @@ static void report(int64_t val)
 int main(int argc, char *argv[])
 {
        vm_vaddr_t vmx_pages_gva;
-       struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
 
-       if (!(entry->ecx & CPUID_VMX)) {
-               fprintf(stderr, "nested VMX not enabled, skipping test\n");
-               exit(KSFT_SKIP);
-       }
+       nested_vmx_check_supported();
 
        vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
        vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
index 0dd7700..ad23100 100644 (file)
@@ -1 +1,3 @@
+CONFIG_LIVEPATCH=y
+CONFIG_DYNAMIC_DEBUG=y
 CONFIG_TEST_LIVEPATCH=m
index 020c44f..f2f7ec0 100644 (file)
@@ -1 +1,2 @@
-membarrier_test
+membarrier_test_multi_thread
+membarrier_test_single_thread
index 97e3bdf..34d1c81 100644 (file)
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
 CFLAGS += -g -I../../../../usr/include/
+LDLIBS += -lpthread
 
-TEST_GEN_PROGS := membarrier_test
+TEST_GEN_PROGS := membarrier_test_single_thread \
+               membarrier_test_multi_thread
 
 include ../lib.mk
-
diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c
deleted file mode 100644 (file)
index 70b4ddb..0000000
+++ /dev/null
@@ -1,313 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-#include <linux/membarrier.h>
-#include <syscall.h>
-#include <stdio.h>
-#include <errno.h>
-#include <string.h>
-
-#include "../kselftest.h"
-
-static int sys_membarrier(int cmd, int flags)
-{
-       return syscall(__NR_membarrier, cmd, flags);
-}
-
-static int test_membarrier_cmd_fail(void)
-{
-       int cmd = -1, flags = 0;
-       const char *test_name = "sys membarrier invalid command";
-
-       if (sys_membarrier(cmd, flags) != -1) {
-               ksft_exit_fail_msg(
-                       "%s test: command = %d, flags = %d. Should fail, but passed\n",
-                       test_name, cmd, flags);
-       }
-       if (errno != EINVAL) {
-               ksft_exit_fail_msg(
-                       "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
-                       test_name, flags, EINVAL, strerror(EINVAL),
-                       errno, strerror(errno));
-       }
-
-       ksft_test_result_pass(
-               "%s test: command = %d, flags = %d, errno = %d. Failed as expected\n",
-               test_name, cmd, flags, errno);
-       return 0;
-}
-
-static int test_membarrier_flags_fail(void)
-{
-       int cmd = MEMBARRIER_CMD_QUERY, flags = 1;
-       const char *test_name = "sys membarrier MEMBARRIER_CMD_QUERY invalid flags";
-
-       if (sys_membarrier(cmd, flags) != -1) {
-               ksft_exit_fail_msg(
-                       "%s test: flags = %d. Should fail, but passed\n",
-                       test_name, flags);
-       }
-       if (errno != EINVAL) {
-               ksft_exit_fail_msg(
-                       "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
-                       test_name, flags, EINVAL, strerror(EINVAL),
-                       errno, strerror(errno));
-       }
-
-       ksft_test_result_pass(
-               "%s test: flags = %d, errno = %d. Failed as expected\n",
-               test_name, flags, errno);
-       return 0;
-}
-
-static int test_membarrier_global_success(void)
-{
-       int cmd = MEMBARRIER_CMD_GLOBAL, flags = 0;
-       const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL";
-
-       if (sys_membarrier(cmd, flags) != 0) {
-               ksft_exit_fail_msg(
-                       "%s test: flags = %d, errno = %d\n",
-                       test_name, flags, errno);
-       }
-
-       ksft_test_result_pass(
-               "%s test: flags = %d\n", test_name, flags);
-       return 0;
-}
-
-static int test_membarrier_private_expedited_fail(void)
-{
-       int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
-       const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED not registered failure";
-
-       if (sys_membarrier(cmd, flags) != -1) {
-               ksft_exit_fail_msg(
-                       "%s test: flags = %d. Should fail, but passed\n",
-                       test_name, flags);
-       }
-       if (errno != EPERM) {
-               ksft_exit_fail_msg(
-                       "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
-                       test_name, flags, EPERM, strerror(EPERM),
-                       errno, strerror(errno));
-       }
-
-       ksft_test_result_pass(
-               "%s test: flags = %d, errno = %d\n",
-               test_name, flags, errno);
-       return 0;
-}
-
-static int test_membarrier_register_private_expedited_success(void)
-{
-       int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, flags = 0;
-       const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED";
-
-       if (sys_membarrier(cmd, flags) != 0) {
-               ksft_exit_fail_msg(
-                       "%s test: flags = %d, errno = %d\n",
-                       test_name, flags, errno);
-       }
-
-       ksft_test_result_pass(
-               "%s test: flags = %d\n",
-               test_name, flags);
-       return 0;
-}
-
-static int test_membarrier_private_expedited_success(void)
-{
-       int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
-       const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED";
-
-       if (sys_membarrier(cmd, flags) != 0) {
-               ksft_exit_fail_msg(
-                       "%s test: flags = %d, errno = %d\n",
-                       test_name, flags, errno);
-       }
-
-       ksft_test_result_pass(
-               "%s test: flags = %d\n",
-               test_name, flags);
-       return 0;
-}
-
-static int test_membarrier_private_expedited_sync_core_fail(void)
-{
-       int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
-       const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE not registered failure";
-
-       if (sys_membarrier(cmd, flags) != -1) {
-               ksft_exit_fail_msg(
-                       "%s test: flags = %d. Should fail, but passed\n",
-                       test_name, flags);
-       }
-       if (errno != EPERM) {
-               ksft_exit_fail_msg(
-                       "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
-                       test_name, flags, EPERM, strerror(EPERM),
-                       errno, strerror(errno));
-       }
-
-       ksft_test_result_pass(
-               "%s test: flags = %d, errno = %d\n",
-               test_name, flags, errno);
-       return 0;
-}
-
-static int test_membarrier_register_private_expedited_sync_core_success(void)
-{
-       int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
-       const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE";
-
-       if (sys_membarrier(cmd, flags) != 0) {
-               ksft_exit_fail_msg(
-                       "%s test: flags = %d, errno = %d\n",
-                       test_name, flags, errno);
-       }
-
-       ksft_test_result_pass(
-               "%s test: flags = %d\n",
-               test_name, flags);
-       return 0;
-}
-
-static int test_membarrier_private_expedited_sync_core_success(void)
-{
-       int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
-       const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE";
-
-       if (sys_membarrier(cmd, flags) != 0) {
-               ksft_exit_fail_msg(
-                       "%s test: flags = %d, errno = %d\n",
-                       test_name, flags, errno);
-       }
-
-       ksft_test_result_pass(
-               "%s test: flags = %d\n",
-               test_name, flags);
-       return 0;
-}
-
-static int test_membarrier_register_global_expedited_success(void)
-{
-       int cmd = MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, flags = 0;
-       const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED";
-
-       if (sys_membarrier(cmd, flags) != 0) {
-               ksft_exit_fail_msg(
-                       "%s test: flags = %d, errno = %d\n",
-                       test_name, flags, errno);
-       }
-
-       ksft_test_result_pass(
-               "%s test: flags = %d\n",
-               test_name, flags);
-       return 0;
-}
-
-static int test_membarrier_global_expedited_success(void)
-{
-       int cmd = MEMBARRIER_CMD_GLOBAL_EXPEDITED, flags = 0;
-       const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL_EXPEDITED";
-
-       if (sys_membarrier(cmd, flags) != 0) {
-               ksft_exit_fail_msg(
-                       "%s test: flags = %d, errno = %d\n",
-                       test_name, flags, errno);
-       }
-
-       ksft_test_result_pass(
-               "%s test: flags = %d\n",
-               test_name, flags);
-       return 0;
-}
-
-static int test_membarrier(void)
-{
-       int status;
-
-       status = test_membarrier_cmd_fail();
-       if (status)
-               return status;
-       status = test_membarrier_flags_fail();
-       if (status)
-               return status;
-       status = test_membarrier_global_success();
-       if (status)
-               return status;
-       status = test_membarrier_private_expedited_fail();
-       if (status)
-               return status;
-       status = test_membarrier_register_private_expedited_success();
-       if (status)
-               return status;
-       status = test_membarrier_private_expedited_success();
-       if (status)
-               return status;
-       status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0);
-       if (status < 0) {
-               ksft_test_result_fail("sys_membarrier() failed\n");
-               return status;
-       }
-       if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
-               status = test_membarrier_private_expedited_sync_core_fail();
-               if (status)
-                       return status;
-               status = test_membarrier_register_private_expedited_sync_core_success();
-               if (status)
-                       return status;
-               status = test_membarrier_private_expedited_sync_core_success();
-               if (status)
-                       return status;
-       }
-       /*
-        * It is valid to send a global membarrier from a non-registered
-        * process.
-        */
-       status = test_membarrier_global_expedited_success();
-       if (status)
-               return status;
-       status = test_membarrier_register_global_expedited_success();
-       if (status)
-               return status;
-       status = test_membarrier_global_expedited_success();
-       if (status)
-               return status;
-       return 0;
-}
-
-static int test_membarrier_query(void)
-{
-       int flags = 0, ret;
-
-       ret = sys_membarrier(MEMBARRIER_CMD_QUERY, flags);
-       if (ret < 0) {
-               if (errno == ENOSYS) {
-                       /*
-                        * It is valid to build a kernel with
-                        * CONFIG_MEMBARRIER=n. However, this skips the tests.
-                        */
-                       ksft_exit_skip(
-                               "sys membarrier (CONFIG_MEMBARRIER) is disabled.\n");
-               }
-               ksft_exit_fail_msg("sys_membarrier() failed\n");
-       }
-       if (!(ret & MEMBARRIER_CMD_GLOBAL))
-               ksft_exit_skip(
-                       "sys_membarrier unsupported: CMD_GLOBAL not found.\n");
-
-       ksft_test_result_pass("sys_membarrier available\n");
-       return 0;
-}
-
-int main(int argc, char **argv)
-{
-       ksft_print_header();
-       ksft_set_plan(13);
-
-       test_membarrier_query();
-       test_membarrier();
-
-       return ksft_exit_pass();
-}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_impl.h b/tools/testing/selftests/membarrier/membarrier_test_impl.h
new file mode 100644 (file)
index 0000000..186be69
--- /dev/null
@@ -0,0 +1,317 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
+#include <linux/membarrier.h>
+#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "../kselftest.h"
+
+static int sys_membarrier(int cmd, int flags)
+{
+       return syscall(__NR_membarrier, cmd, flags);
+}
+
+static int test_membarrier_cmd_fail(void)
+{
+       int cmd = -1, flags = 0;
+       const char *test_name = "sys membarrier invalid command";
+
+       if (sys_membarrier(cmd, flags) != -1) {
+               ksft_exit_fail_msg(
+                       "%s test: command = %d, flags = %d. Should fail, but passed\n",
+                       test_name, cmd, flags);
+       }
+       if (errno != EINVAL) {
+               ksft_exit_fail_msg(
+                       "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+                       test_name, flags, EINVAL, strerror(EINVAL),
+                       errno, strerror(errno));
+       }
+
+       ksft_test_result_pass(
+               "%s test: command = %d, flags = %d, errno = %d. Failed as expected\n",
+               test_name, cmd, flags, errno);
+       return 0;
+}
+
+static int test_membarrier_flags_fail(void)
+{
+       int cmd = MEMBARRIER_CMD_QUERY, flags = 1;
+       const char *test_name = "sys membarrier MEMBARRIER_CMD_QUERY invalid flags";
+
+       if (sys_membarrier(cmd, flags) != -1) {
+               ksft_exit_fail_msg(
+                       "%s test: flags = %d. Should fail, but passed\n",
+                       test_name, flags);
+       }
+       if (errno != EINVAL) {
+               ksft_exit_fail_msg(
+                       "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+                       test_name, flags, EINVAL, strerror(EINVAL),
+                       errno, strerror(errno));
+       }
+
+       ksft_test_result_pass(
+               "%s test: flags = %d, errno = %d. Failed as expected\n",
+               test_name, flags, errno);
+       return 0;
+}
+
+static int test_membarrier_global_success(void)
+{
+       int cmd = MEMBARRIER_CMD_GLOBAL, flags = 0;
+       const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL";
+
+       if (sys_membarrier(cmd, flags) != 0) {
+               ksft_exit_fail_msg(
+                       "%s test: flags = %d, errno = %d\n",
+                       test_name, flags, errno);
+       }
+
+       ksft_test_result_pass(
+               "%s test: flags = %d\n", test_name, flags);
+       return 0;
+}
+
+static int test_membarrier_private_expedited_fail(void)
+{
+       int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+       const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED not registered failure";
+
+       if (sys_membarrier(cmd, flags) != -1) {
+               ksft_exit_fail_msg(
+                       "%s test: flags = %d. Should fail, but passed\n",
+                       test_name, flags);
+       }
+       if (errno != EPERM) {
+               ksft_exit_fail_msg(
+                       "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+                       test_name, flags, EPERM, strerror(EPERM),
+                       errno, strerror(errno));
+       }
+
+       ksft_test_result_pass(
+               "%s test: flags = %d, errno = %d\n",
+               test_name, flags, errno);
+       return 0;
+}
+
+static int test_membarrier_register_private_expedited_success(void)
+{
+       int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, flags = 0;
+       const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED";
+
+       if (sys_membarrier(cmd, flags) != 0) {
+               ksft_exit_fail_msg(
+                       "%s test: flags = %d, errno = %d\n",
+                       test_name, flags, errno);
+       }
+
+       ksft_test_result_pass(
+               "%s test: flags = %d\n",
+               test_name, flags);
+       return 0;
+}
+
+static int test_membarrier_private_expedited_success(void)
+{
+       int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+       const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED";
+
+       if (sys_membarrier(cmd, flags) != 0) {
+               ksft_exit_fail_msg(
+                       "%s test: flags = %d, errno = %d\n",
+                       test_name, flags, errno);
+       }
+
+       ksft_test_result_pass(
+               "%s test: flags = %d\n",
+               test_name, flags);
+       return 0;
+}
+
+static int test_membarrier_private_expedited_sync_core_fail(void)
+{
+       int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
+       const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE not registered failure";
+
+       if (sys_membarrier(cmd, flags) != -1) {
+               ksft_exit_fail_msg(
+                       "%s test: flags = %d. Should fail, but passed\n",
+                       test_name, flags);
+       }
+       if (errno != EPERM) {
+               ksft_exit_fail_msg(
+                       "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+                       test_name, flags, EPERM, strerror(EPERM),
+                       errno, strerror(errno));
+       }
+
+       ksft_test_result_pass(
+               "%s test: flags = %d, errno = %d\n",
+               test_name, flags, errno);
+       return 0;
+}
+
+static int test_membarrier_register_private_expedited_sync_core_success(void)
+{
+       int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
+       const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE";
+
+       if (sys_membarrier(cmd, flags) != 0) {
+               ksft_exit_fail_msg(
+                       "%s test: flags = %d, errno = %d\n",
+                       test_name, flags, errno);
+       }
+
+       ksft_test_result_pass(
+               "%s test: flags = %d\n",
+               test_name, flags);
+       return 0;
+}
+
+static int test_membarrier_private_expedited_sync_core_success(void)
+{
+       int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+       const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE";
+
+       if (sys_membarrier(cmd, flags) != 0) {
+               ksft_exit_fail_msg(
+                       "%s test: flags = %d, errno = %d\n",
+                       test_name, flags, errno);
+       }
+
+       ksft_test_result_pass(
+               "%s test: flags = %d\n",
+               test_name, flags);
+       return 0;
+}
+
+static int test_membarrier_register_global_expedited_success(void)
+{
+       int cmd = MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, flags = 0;
+       const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED";
+
+       if (sys_membarrier(cmd, flags) != 0) {
+               ksft_exit_fail_msg(
+                       "%s test: flags = %d, errno = %d\n",
+                       test_name, flags, errno);
+       }
+
+       ksft_test_result_pass(
+               "%s test: flags = %d\n",
+               test_name, flags);
+       return 0;
+}
+
+static int test_membarrier_global_expedited_success(void)
+{
+       int cmd = MEMBARRIER_CMD_GLOBAL_EXPEDITED, flags = 0;
+       const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL_EXPEDITED";
+
+       if (sys_membarrier(cmd, flags) != 0) {
+               ksft_exit_fail_msg(
+                       "%s test: flags = %d, errno = %d\n",
+                       test_name, flags, errno);
+       }
+
+       ksft_test_result_pass(
+               "%s test: flags = %d\n",
+               test_name, flags);
+       return 0;
+}
+
+static int test_membarrier_fail(void)
+{
+       int status;
+
+       status = test_membarrier_cmd_fail();
+       if (status)
+               return status;
+       status = test_membarrier_flags_fail();
+       if (status)
+               return status;
+       status = test_membarrier_private_expedited_fail();
+       if (status)
+               return status;
+       status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0);
+       if (status < 0) {
+               ksft_test_result_fail("sys_membarrier() failed\n");
+               return status;
+       }
+       if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
+               status = test_membarrier_private_expedited_sync_core_fail();
+               if (status)
+                       return status;
+       }
+       return 0;
+}
+
+static int test_membarrier_success(void)
+{
+       int status;
+
+       status = test_membarrier_global_success();
+       if (status)
+               return status;
+       status = test_membarrier_register_private_expedited_success();
+       if (status)
+               return status;
+       status = test_membarrier_private_expedited_success();
+       if (status)
+               return status;
+       status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0);
+       if (status < 0) {
+               ksft_test_result_fail("sys_membarrier() failed\n");
+               return status;
+       }
+       if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
+               status = test_membarrier_register_private_expedited_sync_core_success();
+               if (status)
+                       return status;
+               status = test_membarrier_private_expedited_sync_core_success();
+               if (status)
+                       return status;
+       }
+       /*
+        * It is valid to send a global membarrier from a non-registered
+        * process.
+        */
+       status = test_membarrier_global_expedited_success();
+       if (status)
+               return status;
+       status = test_membarrier_register_global_expedited_success();
+       if (status)
+               return status;
+       status = test_membarrier_global_expedited_success();
+       if (status)
+               return status;
+       return 0;
+}
+
+static int test_membarrier_query(void)
+{
+       int flags = 0, ret;
+
+       ret = sys_membarrier(MEMBARRIER_CMD_QUERY, flags);
+       if (ret < 0) {
+               if (errno == ENOSYS) {
+                       /*
+                        * It is valid to build a kernel with
+                        * CONFIG_MEMBARRIER=n. However, this skips the tests.
+                        */
+                       ksft_exit_skip(
+                               "sys membarrier (CONFIG_MEMBARRIER) is disabled.\n");
+               }
+               ksft_exit_fail_msg("sys_membarrier() failed\n");
+       }
+       if (!(ret & MEMBARRIER_CMD_GLOBAL))
+               ksft_exit_skip(
+                       "sys_membarrier unsupported: CMD_GLOBAL not found.\n");
+
+       ksft_test_result_pass("sys_membarrier available\n");
+       return 0;
+}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
new file mode 100644 (file)
index 0000000..ac5613e
--- /dev/null
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/membarrier.h>
+#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "membarrier_test_impl.h"
+
+static int thread_ready, thread_quit;
+static pthread_mutex_t test_membarrier_thread_mutex =
+       PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t test_membarrier_thread_cond =
+       PTHREAD_COND_INITIALIZER;
+
+void *test_membarrier_thread(void *arg)
+{
+       pthread_mutex_lock(&test_membarrier_thread_mutex);
+       thread_ready = 1;
+       pthread_cond_broadcast(&test_membarrier_thread_cond);
+       pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+       pthread_mutex_lock(&test_membarrier_thread_mutex);
+       while (!thread_quit)
+               pthread_cond_wait(&test_membarrier_thread_cond,
+                                 &test_membarrier_thread_mutex);
+       pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+       return NULL;
+}
+
+static int test_mt_membarrier(void)
+{
+       int i;
+       pthread_t test_thread;
+
+       pthread_create(&test_thread, NULL,
+                      test_membarrier_thread, NULL);
+
+       pthread_mutex_lock(&test_membarrier_thread_mutex);
+       while (!thread_ready)
+               pthread_cond_wait(&test_membarrier_thread_cond,
+                                 &test_membarrier_thread_mutex);
+       pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+       test_membarrier_fail();
+
+       test_membarrier_success();
+
+       pthread_mutex_lock(&test_membarrier_thread_mutex);
+       thread_quit = 1;
+       pthread_cond_broadcast(&test_membarrier_thread_cond);
+       pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+       pthread_join(test_thread, NULL);
+
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       ksft_print_header();
+       ksft_set_plan(13);
+
+       test_membarrier_query();
+
+       /* Multi-threaded */
+       test_mt_membarrier();
+
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
new file mode 100644 (file)
index 0000000..c1c9639
--- /dev/null
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/membarrier.h>
+#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "membarrier_test_impl.h"
+
+int main(int argc, char **argv)
+{
+       ksft_print_header();
+       ksft_set_plan(13);
+
+       test_membarrier_query();
+
+       test_membarrier_fail();
+
+       test_membarrier_success();
+
+       return ksft_exit_pass();
+}
index c7cced7..8aefd81 100644 (file)
@@ -21,3 +21,4 @@ ipv6_flowlabel
 ipv6_flowlabel_mgr
 so_txtime
 tcp_fastopen_backup_key
+nettest
index e682873..9dc35a1 100755 (executable)
@@ -15,6 +15,8 @@
 PAUSE_ON_FAIL=no
 VERBOSE=0
 
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
 ################################################################################
 # helpers
 
@@ -200,7 +202,7 @@ validate_v6_exception()
        local rc
 
        if [ ${ping_sz} != "0" ]; then
-               run_cmd ip netns exec h0 ping6 -s ${ping_sz} -c5 -w5 ${dst}
+               run_cmd ip netns exec h0 ${ping6} -s ${ping_sz} -c5 -w5 ${dst}
        fi
 
        if [ "$VERBOSE" = "1" ]; then
@@ -243,7 +245,7 @@ do
                run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1
                [ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1
 
-               run_cmd taskset -c ${c} ip netns exec h0 ping6 -c1 -w1 2001:db8:10${i}::1
+               run_cmd taskset -c ${c} ip netns exec h0 ${ping6} -c1 -w1 2001:db8:10${i}::1
                [ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1
 
                [ $ret -ne 0 ] && break
index f9ebeac..796670e 100755 (executable)
@@ -940,6 +940,20 @@ basic()
        run_cmd "$IP nexthop add id 104 group 1 dev veth1"
        log_test $? 2 "Nexthop group and device"
 
+       # Tests to ensure that flushing works as expected.
+       run_cmd "$IP nexthop add id 105 blackhole proto 99"
+       run_cmd "$IP nexthop add id 106 blackhole proto 100"
+       run_cmd "$IP nexthop add id 107 blackhole proto 99"
+       run_cmd "$IP nexthop flush proto 99"
+       check_nexthop "id 105" ""
+       check_nexthop "id 106" "id 106 blackhole proto 100"
+       check_nexthop "id 107" ""
+       run_cmd "$IP nexthop flush proto 100"
+       check_nexthop "id 106" ""
+
+       run_cmd "$IP nexthop flush proto 100"
+       log_test $? 0 "Test proto flush"
+
        run_cmd "$IP nexthop add id 104 group 1 blackhole"
        log_test $? 2 "Nexthop group and blackhole"
 
index 4465fc2..76c1897 100755 (executable)
@@ -9,7 +9,7 @@ ret=0
 ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter"
+TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter"
 
 VERBOSE=0
 PAUSE_ON_FAIL=no
@@ -17,6 +17,8 @@ PAUSE=no
 IP="ip -netns ns1"
 NS_EXEC="ip netns exec ns1"
 
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
 log_test()
 {
        local rc=$1
@@ -614,6 +616,20 @@ fib_nexthop_test()
        cleanup
 }
 
+fib_suppress_test()
+{
+       $IP link add dummy1 type dummy
+       $IP link set dummy1 up
+       $IP -6 route add default dev dummy1
+       $IP -6 rule add table main suppress_prefixlength 0
+       ping -f -c 1000 -W 1 1234::1 || true
+       $IP -6 rule del table main suppress_prefixlength 0
+       $IP link del dummy1
+
+       # If we got here without crashing, we're good.
+       return 0
+}
+
 ################################################################################
 # Tests on route add and replace
 
@@ -1086,7 +1102,7 @@ ipv6_route_metrics_test()
        log_test $rc 0 "Multipath route with mtu metric"
 
        $IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300
-       run_cmd "ip netns exec ns1 ping6 -w1 -c1 -s 1500 2001:db8:104::1"
+       run_cmd "ip netns exec ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1"
        log_test $? 0 "Using route with mtu metric"
 
        run_cmd "$IP -6 ro add 2001:db8:114::/64 via  2001:db8:101::2  congctl lock foo"
@@ -1422,6 +1438,27 @@ ipv4_addr_metric_test()
        fi
        log_test $rc 0 "Prefix route with metric on link up"
 
+       # explicitly check for metric changes on edge scenarios
+       run_cmd "$IP addr flush dev dummy2"
+       run_cmd "$IP addr add dev dummy2 172.16.104.0/24 metric 259"
+       run_cmd "$IP addr change dev dummy2 172.16.104.0/24 metric 260"
+       rc=$?
+       if [ $rc -eq 0 ]; then
+               check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.0 metric 260"
+               rc=$?
+       fi
+       log_test $rc 0 "Modify metric of .0/24 address"
+
+       run_cmd "$IP addr flush dev dummy2"
+       run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260"
+       run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 261"
+       rc=$?
+       if [ $rc -eq 0 ]; then
+               check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261"
+               rc=$?
+       fi
+       log_test $rc 0 "Modify metric of address with peer route"
+
        $IP li del dummy1
        $IP li del dummy2
        cleanup
@@ -1591,6 +1628,7 @@ do
        fib_carrier_test|carrier)       fib_carrier_test;;
        fib_rp_filter_test|rp_filter)   fib_rp_filter_test;;
        fib_nexthop_test|nexthop)       fib_nexthop_test;;
+       fib_suppress_test|suppress)     fib_suppress_test;;
        ipv6_route_test|ipv6_rt)        ipv6_route_test;;
        ipv4_route_test|ipv4_rt)        ipv4_route_test;;
        ipv6_addr_metric)               ipv6_addr_metric_test;;
old mode 100644 (file)
new mode 100755 (executable)
index fe3230c..fb7a59e 100644 (file)
@@ -129,7 +129,7 @@ static void test(int *rcv_fds, int count, int proto)
 {
        struct epoll_event ev;
        int epfd, i, test_fd;
-       uint16_t test_family;
+       int test_family;
        socklen_t len;
 
        epfd = epoll_create(1);
@@ -146,6 +146,7 @@ static void test(int *rcv_fds, int count, int proto)
        send_from_v4(proto);
 
        test_fd = receive_once(epfd, proto);
+       len = sizeof(test_family);
        if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len))
                error(1, errno, "failed to read socket domain");
        if (test_family != AF_INET)
index b8265ee..614b31a 100644 (file)
@@ -89,12 +89,9 @@ struct testcase testcases_v4[] = {
                .tfail = true,
        },
        {
-               /* send a single MSS: will fail with GSO, because the segment
-                * logic in udp4_ufo_fragment demands a gso skb to be > MTU
-                */
+               /* send a single MSS: will fall back to no GSO */
                .tlen = CONST_MSS_V4,
                .gso_len = CONST_MSS_V4,
-               .tfail = true,
                .r_num_mss = 1,
        },
        {
@@ -139,10 +136,9 @@ struct testcase testcases_v4[] = {
                .tfail = true,
        },
        {
-               /* send a single 1B MSS: will fail, see single MSS above */
+               /* send a single 1B MSS: will fall back to no GSO */
                .tlen = 1,
                .gso_len = 1,
-               .tfail = true,
                .r_num_mss = 1,
        },
        {
@@ -196,12 +192,9 @@ struct testcase testcases_v6[] = {
                .tfail = true,
        },
        {
-               /* send a single MSS: will fail with GSO, because the segment
-                * logic in udp4_ufo_fragment demands a gso skb to be > MTU
-                */
+               /* send a single MSS: will fall back to no GSO */
                .tlen = CONST_MSS_V6,
                .gso_len = CONST_MSS_V6,
-               .tfail = true,
                .r_num_mss = 1,
        },
        {
@@ -246,10 +239,9 @@ struct testcase testcases_v6[] = {
                .tfail = true,
        },
        {
-               /* send a single 1B MSS: will fail, see single MSS above */
+               /* send a single 1B MSS: will fall back to no GSO */
                .tlen = 1,
                .gso_len = 1,
-               .tfail = true,
                .r_num_mss = 1,
        },
        {
index 464c9b7..7550f08 100644 (file)
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -g -I../../../../usr/include/ -lpthread
+CFLAGS += -g -I../../../../usr/include/ -pthread
 
 TEST_GEN_PROGS := pidfd_test pidfd_open_test pidfd_poll_test pidfd_wait
 
index f1fbc15..ed15658 100644 (file)
@@ -4,6 +4,7 @@ noarg:
 
 TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
                  large_vm_fork_separation
+TEST_GEN_PROGS_EXTENDED := tlbie_test
 TEST_GEN_FILES := tempfile
 
 top_srcdir = ../../../../..
@@ -19,3 +20,4 @@ $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
 $(OUTPUT)/tempfile:
        dd if=/dev/zero of=$@ bs=64k count=1
 
+$(OUTPUT)/tlbie_test: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c
new file mode 100644 (file)
index 0000000..f85a093
--- /dev/null
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2019, Nick Piggin, Gautham R. Shenoy, Aneesh Kumar K.V, IBM Corp.
+ */
+
+/*
+ *
+ * Test tlbie/mtpidr race. We have 4 threads doing flush/load/compare/store
+ * sequence in a loop. The same threads also rung a context switch task
+ * that does sched_yield() in loop.
+ *
+ * The snapshot thread mark the mmap area PROT_READ in between, make a copy
+ * and copy it back to the original area. This helps us to detect if any
+ * store continued to happen after we marked the memory PROT_READ.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <linux/futex.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <time.h>
+#include <stdarg.h>
+#include <sched.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/prctl.h>
+
+static inline void dcbf(volatile unsigned int *addr)
+{
+       __asm__ __volatile__ ("dcbf %y0; sync" : : "Z"(*(unsigned char *)addr) : "memory");
+}
+
+static void err_msg(char *msg)
+{
+
+       time_t now;
+       time(&now);
+       printf("=================================\n");
+       printf("    Error: %s\n", msg);
+       printf("    %s", ctime(&now));
+       printf("=================================\n");
+       exit(1);
+}
+
+static char *map1;
+static char *map2;
+static pid_t rim_process_pid;
+
+/*
+ * A "rim-sequence" is defined to be the sequence of the following
+ * operations performed on a memory word:
+ *     1) FLUSH the contents of that word.
+ *     2) LOAD the contents of that word.
+ *     3) COMPARE the contents of that word with the content that was
+ *                previously stored at that word
+ *     4) STORE new content into that word.
+ *
+ * The threads in this test that perform the rim-sequence are termed
+ * as rim_threads.
+ */
+
+/*
+ * A "corruption" is defined to be the failed COMPARE operation in a
+ * rim-sequence.
+ *
+ * A rim_thread that detects a corruption informs about it to all the
+ * other rim_threads, and the mem_snapshot thread.
+ */
+static volatile unsigned int corruption_found;
+
+/*
+ * This defines the maximum number of rim_threads in this test.
+ *
+ * The THREAD_ID_BITS denote the number of bits required
+ * to represent the thread_ids [0..MAX_THREADS - 1].
+ * We are being a bit paranoid here and set it to 8 bits,
+ * though 6 bits suffice.
+ *
+ */
+#define MAX_THREADS            64
+#define THREAD_ID_BITS         8
+#define THREAD_ID_MASK         ((1 << THREAD_ID_BITS) - 1)
+static unsigned int rim_thread_ids[MAX_THREADS];
+static pthread_t rim_threads[MAX_THREADS];
+
+
+/*
+ * Each rim_thread works on an exclusive "chunk" of size
+ * RIM_CHUNK_SIZE.
+ *
+ * The ith rim_thread works on the ith chunk.
+ *
+ * The ith chunk begins at
+ * map1 + (i * RIM_CHUNK_SIZE)
+ */
+#define RIM_CHUNK_SIZE         1024
+#define BITS_PER_BYTE          8
+#define WORD_SIZE              (sizeof(unsigned int))
+#define WORD_BITS              (WORD_SIZE * BITS_PER_BYTE)
+#define WORDS_PER_CHUNK                (RIM_CHUNK_SIZE/WORD_SIZE)
+
+static inline char *compute_chunk_start_addr(unsigned int thread_id)
+{
+       char *chunk_start;
+
+       chunk_start = (char *)((unsigned long)map1 +
+                              (thread_id * RIM_CHUNK_SIZE));
+
+       return chunk_start;
+}
+
+/*
+ * The "word-offset" of a word-aligned address inside a chunk, is
+ * defined to be the number of words that precede the address in that
+ * chunk.
+ *
+ * WORD_OFFSET_BITS denote the number of bits required to represent
+ * the word-offsets of all the word-aligned addresses of a chunk.
+ */
+#define WORD_OFFSET_BITS       (__builtin_ctz(WORDS_PER_CHUNK))
+#define WORD_OFFSET_MASK       ((1 << WORD_OFFSET_BITS) - 1)
+
+static inline unsigned int compute_word_offset(char *start, unsigned int *addr)
+{
+       unsigned int delta_bytes, ret;
+       delta_bytes = (unsigned long)addr - (unsigned long)start;
+
+       ret = delta_bytes/WORD_SIZE;
+
+       return ret;
+}
+
+/*
+ * A "sweep" is defined to be the sequential execution of the
+ * rim-sequence by a rim_thread on its chunk one word at a time,
+ * starting from the first word of its chunk and ending with the last
+ * word of its chunk.
+ *
+ * Each sweep of a rim_thread is uniquely identified by a sweep_id.
+ * SWEEP_ID_BITS denote the number of bits required to represent
+ * the sweep_ids of rim_threads.
+ *
+ * As to why SWEEP_ID_BITS are computed as a function of THREAD_ID_BITS,
+ * WORD_OFFSET_BITS, and WORD_BITS, see the "store-pattern" below.
+ */
+#define SWEEP_ID_BITS          (WORD_BITS - (THREAD_ID_BITS + WORD_OFFSET_BITS))
+#define SWEEP_ID_MASK          ((1 << SWEEP_ID_BITS) - 1)
+
+/*
+ * A "store-pattern" is the word-pattern that is stored into a word
+ * location in the 4)STORE step of the rim-sequence.
+ *
+ * In the store-pattern, we shall encode:
+ *
+ *      - The thread-id of the rim_thread performing the store
+ *        (The most significant THREAD_ID_BITS)
+ *
+ *      - The word-offset of the address into which the store is being
+ *        performed (The next WORD_OFFSET_BITS)
+ *
+ *      - The sweep_id of the current sweep in which the store is
+ *        being performed. (The lower SWEEP_ID_BITS)
+ *
+ * Store Pattern: 32 bits
+ * |------------------|--------------------|---------------------------------|
+ * |    Thread id     |  Word offset       |         sweep_id                |
+ * |------------------|--------------------|---------------------------------|
+ *    THREAD_ID_BITS     WORD_OFFSET_BITS          SWEEP_ID_BITS
+ *
+ * In the store pattern, the (Thread-id + Word-offset) uniquely identify the
+ * address to which the store is being performed i.e,
+ *    address == map1 +
+ *              (Thread-id * RIM_CHUNK_SIZE) + (Word-offset * WORD_SIZE)
+ *
+ * And the sweep_id in the store pattern identifies the time when the
+ * store was performed by the rim_thread.
+ *
+ * We shall use this property in the 3)COMPARE step of the
+ * rim-sequence.
+ */
+#define SWEEP_ID_SHIFT 0
+#define WORD_OFFSET_SHIFT      (SWEEP_ID_BITS)
+#define THREAD_ID_SHIFT                (WORD_OFFSET_BITS + SWEEP_ID_BITS)
+
+/*
+ * Compute the store pattern for a given thread with id @tid, at
+ * location @addr in the sweep identified by @sweep_id
+ */
+static inline unsigned int compute_store_pattern(unsigned int tid,
+                                                unsigned int *addr,
+                                                unsigned int sweep_id)
+{
+       unsigned int ret = 0;
+       char *start = compute_chunk_start_addr(tid);
+       unsigned int word_offset = compute_word_offset(start, addr);
+
+       ret += (tid & THREAD_ID_MASK) << THREAD_ID_SHIFT;
+       ret += (word_offset & WORD_OFFSET_MASK) << WORD_OFFSET_SHIFT;
+       ret += (sweep_id & SWEEP_ID_MASK) << SWEEP_ID_SHIFT;
+       return ret;
+}
+
+/* Extract the thread-id from the given store-pattern */
+static inline unsigned int extract_tid(unsigned int pattern)
+{
+       unsigned int ret;
+
+       ret = (pattern >> THREAD_ID_SHIFT) & THREAD_ID_MASK;
+       return ret;
+}
+
+/* Extract the word-offset from the given store-pattern */
+static inline unsigned int extract_word_offset(unsigned int pattern)
+{
+       unsigned int ret;
+
+       ret = (pattern >> WORD_OFFSET_SHIFT) & WORD_OFFSET_MASK;
+
+       return ret;
+}
+
+/* Extract the sweep-id from the given store-pattern */
+static inline unsigned int extract_sweep_id(unsigned int pattern)
+
+{
+       unsigned int ret;
+
+       ret = (pattern >> SWEEP_ID_SHIFT) & SWEEP_ID_MASK;
+
+       return ret;
+}
+
+/************************************************************
+ *                                                          *
+ *          Logging the output of the verification          *
+ *                                                          *
+ ************************************************************/
+#define LOGDIR_NAME_SIZE 100
+static char logdir[LOGDIR_NAME_SIZE];
+
+static FILE *fp[MAX_THREADS];
+static const char logfilename[] ="Thread-%02d-Chunk";
+
+static inline void start_verification_log(unsigned int tid,
+                                         unsigned int *addr,
+                                         unsigned int cur_sweep_id,
+                                         unsigned int prev_sweep_id)
+{
+       FILE *f;
+       char logfile[30];
+       char path[LOGDIR_NAME_SIZE + 30];
+       char separator[2] = "/";
+       char *chunk_start = compute_chunk_start_addr(tid);
+       unsigned int size = RIM_CHUNK_SIZE;
+
+       sprintf(logfile, logfilename, tid);
+       strcpy(path, logdir);
+       strcat(path, separator);
+       strcat(path, logfile);
+       f = fopen(path, "w");
+
+       if (!f) {
+               err_msg("Unable to create logfile\n");
+       }
+
+       fp[tid] = f;
+
+       fprintf(f, "----------------------------------------------------------\n");
+       fprintf(f, "PID                = %d\n", rim_process_pid);
+       fprintf(f, "Thread id          = %02d\n", tid);
+       fprintf(f, "Chunk Start Addr   = 0x%016lx\n", (unsigned long)chunk_start);
+       fprintf(f, "Chunk Size         = %d\n", size);
+       fprintf(f, "Next Store Addr    = 0x%016lx\n", (unsigned long)addr);
+       fprintf(f, "Current sweep-id   = 0x%08x\n", cur_sweep_id);
+       fprintf(f, "Previous sweep-id  = 0x%08x\n", prev_sweep_id);
+       fprintf(f, "----------------------------------------------------------\n");
+}
+
+static inline void log_anamoly(unsigned int tid, unsigned int *addr,
+                              unsigned int expected, unsigned int observed)
+{
+       FILE *f = fp[tid];
+
+       fprintf(f, "Thread %02d: Addr 0x%lx: Expected 0x%x, Observed 0x%x\n",
+               tid, (unsigned long)addr, expected, observed);
+       fprintf(f, "Thread %02d: Expected Thread id   = %02d\n", tid, extract_tid(expected));
+       fprintf(f, "Thread %02d: Observed Thread id   = %02d\n", tid, extract_tid(observed));
+       fprintf(f, "Thread %02d: Expected Word offset = %03d\n", tid, extract_word_offset(expected));
+       fprintf(f, "Thread %02d: Observed Word offset = %03d\n", tid, extract_word_offset(observed));
+       fprintf(f, "Thread %02d: Expected sweep-id    = 0x%x\n", tid, extract_sweep_id(expected));
+       fprintf(f, "Thread %02d: Observed sweep-id    = 0x%x\n", tid, extract_sweep_id(observed));
+       fprintf(f, "----------------------------------------------------------\n");
+}
+
+static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies)
+{
+       FILE *f = fp[tid];
+       char logfile[30];
+       char path[LOGDIR_NAME_SIZE + 30];
+       char separator[] = "/";
+
+       fclose(f);
+
+       if (nr_anamolies == 0) {
+               remove(path);
+               return;
+       }
+
+       sprintf(logfile, logfilename, tid);
+       strcpy(path, logdir);
+       strcat(path, separator);
+       strcat(path, logfile);
+
+       printf("Thread %02d chunk has %d corrupted words. For details check %s\n",
+               tid, nr_anamolies, path);
+}
+
+/*
+ * When a COMPARE step of a rim-sequence fails, the rim_thread informs
+ * everyone else via the shared_memory pointed to by
+ * corruption_found variable. On seeing this, every thread verifies the
+ * content of its chunk as follows.
+ *
+ * Suppose a thread identified with @tid was about to store (but not
+ * yet stored) to @next_store_addr in its current sweep identified
+ * @cur_sweep_id. Let @prev_sweep_id indicate the previous sweep_id.
+ *
+ * This implies that for all the addresses @addr < @next_store_addr,
+ * Thread @tid has already performed a store as part of its current
+ * sweep. Hence we expect the content of such @addr to be:
+ *    |-------------------------------------------------|
+ *    | tid   | word_offset(addr) |    cur_sweep_id     |
+ *    |-------------------------------------------------|
+ *
+ * Since Thread @tid is yet to perform stores on address
+ * @next_store_addr and above, we expect the content of such an
+ * address @addr to be:
+ *    |-------------------------------------------------|
+ *    | tid   | word_offset(addr) |    prev_sweep_id    |
+ *    |-------------------------------------------------|
+ *
+ * The verifier function @verify_chunk does this verification and logs
+ * any anamolies that it finds.
+ */
+static void verify_chunk(unsigned int tid, unsigned int *next_store_addr,
+                 unsigned int cur_sweep_id,
+                 unsigned int prev_sweep_id)
+{
+       unsigned int *iter_ptr;
+       unsigned int size = RIM_CHUNK_SIZE;
+       unsigned int expected;
+       unsigned int observed;
+       char *chunk_start = compute_chunk_start_addr(tid);
+
+       int nr_anamolies = 0;
+
+       start_verification_log(tid, next_store_addr,
+                              cur_sweep_id, prev_sweep_id);
+
+       for (iter_ptr = (unsigned int *)chunk_start;
+            (unsigned long)iter_ptr < (unsigned long)chunk_start + size;
+            iter_ptr++) {
+               unsigned int expected_sweep_id;
+
+               if (iter_ptr < next_store_addr) {
+                       expected_sweep_id = cur_sweep_id;
+               } else {
+                       expected_sweep_id = prev_sweep_id;
+               }
+
+               expected = compute_store_pattern(tid, iter_ptr, expected_sweep_id);
+
+               dcbf((volatile unsigned int*)iter_ptr); //Flush before reading
+               observed = *iter_ptr;
+
+               if (observed != expected) {
+                       nr_anamolies++;
+                       log_anamoly(tid, iter_ptr, expected, observed);
+               }
+       }
+
+       end_verification_log(tid, nr_anamolies);
+}
+
+static void set_pthread_cpu(pthread_t th, int cpu)
+{
+       cpu_set_t run_cpu_mask;
+       struct sched_param param;
+
+       CPU_ZERO(&run_cpu_mask);
+       CPU_SET(cpu, &run_cpu_mask);
+       pthread_setaffinity_np(th, sizeof(cpu_set_t), &run_cpu_mask);
+
+       param.sched_priority = 1;
+       if (0 && sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
+               /* haven't reproduced with this setting, it kills random preemption which may be a factor */
+               fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
+       }
+}
+
+static void set_mycpu(int cpu)
+{
+       cpu_set_t run_cpu_mask;
+       struct sched_param param;
+
+       CPU_ZERO(&run_cpu_mask);
+       CPU_SET(cpu, &run_cpu_mask);
+       sched_setaffinity(0, sizeof(cpu_set_t), &run_cpu_mask);
+
+       param.sched_priority = 1;
+       if (0 && sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
+               fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
+       }
+}
+
+static volatile int segv_wait;
+
+static void segv_handler(int signo, siginfo_t *info, void *extra)
+{
+       while (segv_wait) {
+               sched_yield();
+       }
+
+}
+
+static void set_segv_handler(void)
+{
+       struct sigaction sa;
+
+       sa.sa_flags = SA_SIGINFO;
+       sa.sa_sigaction = segv_handler;
+
+       if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+               perror("sigaction");
+               exit(EXIT_FAILURE);
+       }
+}
+
+int timeout = 0;
+/*
+ * This function is executed by every rim_thread.
+ *
+ * This function performs sweeps over the exclusive chunks of the
+ * rim_threads executing the rim-sequence one word at a time.
+ */
+static void *rim_fn(void *arg)
+{
+       unsigned int tid = *((unsigned int *)arg);
+
+       int size = RIM_CHUNK_SIZE;
+       char *chunk_start = compute_chunk_start_addr(tid);
+
+       unsigned int prev_sweep_id;
+       unsigned int cur_sweep_id = 0;
+
+       /* word access */
+       unsigned int pattern = cur_sweep_id;
+       unsigned int *pattern_ptr = &pattern;
+       unsigned int *w_ptr, read_data;
+
+       set_segv_handler();
+
+       /*
+        * Let us initialize the chunk:
+        *
+        * Each word-aligned address addr in the chunk,
+        * is initialized to :
+        *    |-------------------------------------------------|
+        *    | tid   | word_offset(addr) |         0           |
+        *    |-------------------------------------------------|
+        */
+       for (w_ptr = (unsigned int *)chunk_start;
+            (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
+            w_ptr++) {
+
+               *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
+               *w_ptr = *pattern_ptr;
+       }
+
+       while (!corruption_found && !timeout) {
+               prev_sweep_id = cur_sweep_id;
+               cur_sweep_id = cur_sweep_id + 1;
+
+               for (w_ptr = (unsigned int *)chunk_start;
+                    (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
+                    w_ptr++)  {
+                       unsigned int old_pattern;
+
+                       /*
+                        * Compute the pattern that we would have
+                        * stored at this location in the previous
+                        * sweep.
+                        */
+                       old_pattern = compute_store_pattern(tid, w_ptr, prev_sweep_id);
+
+                       /*
+                        * FLUSH:Ensure that we flush the contents of
+                        *       the cache before loading
+                        */
+                       dcbf((volatile unsigned int*)w_ptr); //Flush
+
+                       /* LOAD: Read the value */
+                       read_data = *w_ptr; //Load
+
+                       /*
+                        * COMPARE: Is it the same as what we had stored
+                        *          in the previous sweep ? It better be!
+                        */
+                       if (read_data != old_pattern) {
+                               /* No it isn't! Tell everyone */
+                               corruption_found = 1;
+                       }
+
+                       /*
+                        * Before performing a store, let us check if
+                        * any rim_thread has found a corruption.
+                        */
+                       if (corruption_found || timeout) {
+                               /*
+                                * Yes. Someone (including us!) has found
+                                * a corruption :(
+                                *
+                                * Let us verify that our chunk is
+                                * correct.
+                                */
+                               /* But first, let us allow the dust to settle down! */
+                               verify_chunk(tid, w_ptr, cur_sweep_id, prev_sweep_id);
+
+                               return 0;
+                       }
+
+                       /*
+                        * Compute the new pattern that we are going
+                        * to write to this location
+                        */
+                       *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
+
+                       /*
+                        * STORE: Now let us write this pattern into
+                        *        the location
+                        */
+                       *w_ptr = *pattern_ptr;
+               }
+       }
+
+       return NULL;
+}
+
+
+static unsigned long start_cpu = 0;
+static unsigned long nrthreads = 4;
+
+static pthread_t mem_snapshot_thread;
+
+static void *mem_snapshot_fn(void *arg)
+{
+       int page_size = getpagesize();
+       size_t size = page_size;
+       void *tmp = malloc(size);
+
+       while (!corruption_found && !timeout) {
+               /* Stop memory migration once corruption is found */
+               segv_wait = 1;
+
+               mprotect(map1, size, PROT_READ);
+
+               /*
+                * Load from the working alias (map1). Loading from map2
+                * also fails.
+                */
+               memcpy(tmp, map1, size);
+
+               /*
+                * Stores must go via map2 which has write permissions, but
+                * the corrupted data tends to be seen in the snapshot buffer,
+                * so corruption does not appear to be introduced at the
+                * copy-back via map2 alias here.
+                */
+               memcpy(map2, tmp, size);
+               /*
+                * Before releasing other threads, must ensure the copy
+                * back to
+                */
+               asm volatile("sync" ::: "memory");
+               mprotect(map1, size, PROT_READ|PROT_WRITE);
+               asm volatile("sync" ::: "memory");
+               segv_wait = 0;
+
+               usleep(1); /* This value makes a big difference */
+       }
+
+       return 0;
+}
+
+void alrm_sighandler(int sig)
+{
+       timeout = 1;
+}
+
+int main(int argc, char *argv[])
+{
+       int c;
+       int page_size = getpagesize();
+       time_t now;
+       int i, dir_error;
+       pthread_attr_t attr;
+       key_t shm_key = (key_t) getpid();
+       int shmid, run_time = 20 * 60;
+       struct sigaction sa_alrm;
+
+       snprintf(logdir, LOGDIR_NAME_SIZE,
+                "/tmp/logdir-%u", (unsigned int)getpid());
+       while ((c = getopt(argc, argv, "r:hn:l:t:")) != -1) {
+               switch(c) {
+               case 'r':
+                       start_cpu = strtoul(optarg, NULL, 10);
+                       break;
+               case 'h':
+                       printf("%s [-r <start_cpu>] [-n <nrthreads>] [-l <logdir>] [-t <timeout>]\n", argv[0]);
+                       exit(0);
+                       break;
+               case 'n':
+                       nrthreads = strtoul(optarg, NULL, 10);
+                       break;
+               case 'l':
+                       strncpy(logdir, optarg, LOGDIR_NAME_SIZE - 1);
+                       break;
+               case 't':
+                       run_time = strtoul(optarg, NULL, 10);
+                       break;
+               default:
+                       printf("invalid option\n");
+                       exit(0);
+                       break;
+               }
+       }
+
+       if (nrthreads > MAX_THREADS)
+               nrthreads = MAX_THREADS;
+
+       shmid = shmget(shm_key, page_size, IPC_CREAT|0666);
+       if (shmid < 0) {
+               err_msg("Failed shmget\n");
+       }
+
+       map1 = shmat(shmid, NULL, 0);
+       if (map1 == (void *) -1) {
+               err_msg("Failed shmat");
+       }
+
+       map2 = shmat(shmid, NULL, 0);
+       if (map2 == (void *) -1) {
+               err_msg("Failed shmat");
+       }
+
+       dir_error = mkdir(logdir, 0755);
+
+       if (dir_error) {
+               err_msg("Failed mkdir");
+       }
+
+       printf("start_cpu list:%lu\n", start_cpu);
+       printf("number of worker threads:%lu + 1 snapshot thread\n", nrthreads);
+       printf("Allocated address:0x%016lx + secondary map:0x%016lx\n", (unsigned long)map1, (unsigned long)map2);
+       printf("logdir at : %s\n", logdir);
+       printf("Timeout: %d seconds\n", run_time);
+
+       time(&now);
+       printf("=================================\n");
+       printf("     Starting Test\n");
+       printf("     %s", ctime(&now));
+       printf("=================================\n");
+
+       for (i = 0; i < nrthreads; i++) {
+               if (1 && !fork()) {
+                       prctl(PR_SET_PDEATHSIG, SIGKILL);
+                       set_mycpu(start_cpu + i);
+                       for (;;)
+                               sched_yield();
+                       exit(0);
+               }
+       }
+
+
+       sa_alrm.sa_handler = &alrm_sighandler;
+       sigemptyset(&sa_alrm.sa_mask);
+       sa_alrm.sa_flags = 0;
+
+       if (sigaction(SIGALRM, &sa_alrm, 0) == -1) {
+               err_msg("Failed signal handler registration\n");
+       }
+
+       alarm(run_time);
+
+       pthread_attr_init(&attr);
+       for (i = 0; i < nrthreads; i++) {
+               rim_thread_ids[i] = i;
+               pthread_create(&rim_threads[i], &attr, rim_fn, &rim_thread_ids[i]);
+               set_pthread_cpu(rim_threads[i], start_cpu + i);
+       }
+
+       pthread_create(&mem_snapshot_thread, &attr, mem_snapshot_fn, map1);
+       set_pthread_cpu(mem_snapshot_thread, start_cpu + i);
+
+
+       pthread_join(mem_snapshot_thread, NULL);
+       for (i = 0; i < nrthreads; i++) {
+               pthread_join(rim_threads[i], NULL);
+       }
+
+       if (!timeout) {
+               time(&now);
+               printf("=================================\n");
+               printf("      Data Corruption Detected\n");
+               printf("      %s", ctime(&now));
+               printf("      See logfiles in %s\n", logdir);
+               printf("=================================\n");
+               return 1;
+       }
+       return 0;
+}
index c0734ed..b15a1a3 100644 (file)
@@ -5,7 +5,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
 TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
        tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
        $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \
-       tm-signal-context-force-tm
+       tm-signal-context-force-tm tm-poison
 
 top_srcdir = ../../../../..
 include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c
new file mode 100644 (file)
index 0000000..9775584
--- /dev/null
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2019, Gustavo Romero, Michael Neuling, IBM Corp.
+ *
+ * This test will spawn two processes. Both will be attached to the same
+ * CPU (CPU 0). The child will be in a loop writing to FP register f31 and
+ * VMX/VEC/Altivec register vr31 a known value, called poison, calling
+ * sched_yield syscall after to allow the parent to switch on the CPU.
+ * Parent will set f31 and vr31 to 1 and in a loop will check if f31 and
+ * vr31 remain 1 as expected until a given timeout (2m). If the issue is
+ * present child's poison will leak into parent's f31 or vr31 registers,
+ * otherwise, poison will never leak into parent's f31 and vr31 registers.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <inttypes.h>
+
+#include "tm.h"
+
+int tm_poison_test(void)
+{
+       int pid;
+       cpu_set_t cpuset;
+       uint64_t poison = 0xdeadbeefc0dec0fe;
+       uint64_t unknown = 0;
+       bool fail_fp = false;
+       bool fail_vr = false;
+
+       SKIP_IF(!have_htm());
+
+       /* Attach both Child and Parent to CPU 0 */
+       CPU_ZERO(&cpuset);
+       CPU_SET(0, &cpuset);
+       sched_setaffinity(0, sizeof(cpuset), &cpuset);
+
+       pid = fork();
+       if (!pid) {
+               /**
+                * child
+                */
+               while (1) {
+                       sched_yield();
+                       asm (
+                               "mtvsrd 31, %[poison];" // f31 = poison
+                               "mtvsrd 63, %[poison];" // vr31 = poison
+
+                               : : [poison] "r" (poison) : );
+               }
+       }
+
+       /**
+        * parent
+        */
+       asm (
+               /*
+                * Set r3, r4, and f31 to known value 1 before entering
+                * in transaction. They won't be written after that.
+                */
+               "       li      3, 0x1          ;"
+               "       li      4, 0x1          ;"
+               "       mtvsrd  31, 4           ;"
+
+               /*
+                * The Time Base (TB) is a 64-bit counter register that is
+                * independent of the CPU clock and which is incremented
+                * at a frequency of 512000000 Hz, so every 1.953125ns.
+                * So it's necessary 120s/0.000000001953125s = 61440000000
+                * increments to get a 2 minutes timeout. Below we set that
+                * value in r5 and then use r6 to track initial TB value,
+                * updating TB values in r7 at every iteration and comparing it
+                * to r6. When r7 (current) - r6 (initial) > 61440000000 we bail
+                * out since for sure we spent already 2 minutes in the loop.
+                * SPR 268 is the TB register.
+                */
+               "       lis     5, 14           ;"
+               "       ori     5, 5, 19996     ;"
+               "       sldi    5, 5, 16        ;" // r5 = 61440000000
+
+               "       mfspr   6, 268          ;" // r6 (TB initial)
+               "1:     mfspr   7, 268          ;" // r7 (TB current)
+               "       subf    7, 6, 7         ;" // r7 - r6 > 61440000000 ?
+               "       cmpd    7, 5            ;"
+               "       bgt     3f              ;" // yes, exit
+
+               /*
+                * Main loop to check f31
+                */
+               "       tbegin.                 ;" // no, try again
+               "       beq     1b              ;" // restart if no timeout
+               "       mfvsrd  3, 31           ;" // read f31
+               "       cmpd    3, 4            ;" // f31 == 1 ?
+               "       bne     2f              ;" // broken :-(
+               "       tabort. 3               ;" // try another transaction
+               "2:     tend.                   ;" // commit transaction
+               "3:     mr    %[unknown], 3     ;" // record r3
+
+               : [unknown] "=r" (unknown)
+               :
+               : "cr0", "r3", "r4", "r5", "r6", "r7", "vs31"
+
+               );
+
+       /*
+        * On leak 'unknown' will contain 'poison' value from child,
+        * otherwise (no leak) 'unknown' will contain the same value
+        * as r3 before entering in transactional mode, i.e. 0x1.
+        */
+       fail_fp = unknown != 0x1;
+       if (fail_fp)
+               printf("Unknown value %#"PRIx64" leaked into f31!\n", unknown);
+       else
+               printf("Good, no poison or leaked value into FP registers\n");
+
+       asm (
+               /*
+                * Set r3, r4, and vr31 to known value 1 before entering
+                * in transaction. They won't be written after that.
+                */
+               "       li      3, 0x1          ;"
+               "       li      4, 0x1          ;"
+               "       mtvsrd  63, 4           ;"
+
+               "       lis     5, 14           ;"
+               "       ori     5, 5, 19996     ;"
+               "       sldi    5, 5, 16        ;" // r5 = 61440000000
+
+               "       mfspr   6, 268          ;" // r6 (TB initial)
+               "1:     mfspr   7, 268          ;" // r7 (TB current)
+               "       subf    7, 6, 7         ;" // r7 - r6 > 61440000000 ?
+               "       cmpd    7, 5            ;"
+               "       bgt     3f              ;" // yes, exit
+
+               /*
+                * Main loop to check vr31
+                */
+               "       tbegin.                 ;" // no, try again
+               "       beq     1b              ;" // restart if no timeout
+               "       mfvsrd  3, 63           ;" // read vr31
+               "       cmpd    3, 4            ;" // vr31 == 1 ?
+               "       bne     2f              ;" // broken :-(
+               "       tabort. 3               ;" // try another transaction
+               "2:     tend.                   ;" // commit transaction
+               "3:     mr    %[unknown], 3     ;" // record r3
+
+               : [unknown] "=r" (unknown)
+               :
+               : "cr0", "r3", "r4", "r5", "r6", "r7", "vs63"
+
+               );
+
+       /*
+        * On leak 'unknown' will contain 'poison' value from child,
+        * otherwise (no leak) 'unknown' will contain the same value
+        * as r3 before entering in transactional mode, i.e. 0x1.
+        */
+       fail_vr = unknown != 0x1;
+       if (fail_vr)
+               printf("Unknown value %#"PRIx64" leaked into vr31!\n", unknown);
+       else
+               printf("Good, no poison or leaked value into VEC registers\n");
+
+       kill(pid, SIGKILL);
+
+       return (fail_fp | fail_vr);
+}
+
+int main(int argc, char *argv[])
+{
+       /* Test completes in about 4m */
+       test_harness_set_timeout(250);
+       return test_harness(tm_poison_test, "tm_poison_test");
+}
diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings
new file mode 100644 (file)
index 0000000..ba4d85f
--- /dev/null
@@ -0,0 +1 @@
+timeout=90
index 6ef7f16..7f8b5c8 100644 (file)
@@ -199,6 +199,11 @@ struct seccomp_notif_sizes {
 };
 #endif
 
+#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
+#define PTRACE_EVENTMSG_SYSCALL_ENTRY  1
+#define PTRACE_EVENTMSG_SYSCALL_EXIT   2
+#endif
+
 #ifndef seccomp
 int seccomp(unsigned int op, unsigned int flags, void *args)
 {
index 9dd8484..1a5db1e 100644 (file)
@@ -2,3 +2,4 @@
 include ../lib.mk
 
 TEST_PROGS := test_smoke.sh test_space.sh
+TEST_PROGS_EXTENDED := tpm2.py tpm2_tests.py
index c0534e2..485cf06 100644 (file)
@@ -37,7 +37,7 @@ int main(int argc, char **argv)
        char *file = "/dev/zero";
        char *p;
 
-       while ((opt = getopt(argc, argv, "m:r:n:f:tTLUSH")) != -1) {
+       while ((opt = getopt(argc, argv, "m:r:n:f:tTLUwSH")) != -1) {
                switch (opt) {
                case 'm':
                        size = atoi(optarg) * MB;
@@ -71,7 +71,7 @@ int main(int argc, char **argv)
                        flags |= MAP_SHARED;
                        break;
                case 'H':
-                       flags |= MAP_HUGETLB;
+                       flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
                        break;
                default:
                        return -1;
index c2333c7..f45e510 100644 (file)
@@ -19,7 +19,7 @@
 
 int fd;
 const char v = 'V';
-static const char sopts[] = "bdehp:t:Tn:NL";
+static const char sopts[] = "bdehp:t:Tn:NLf:i";
 static const struct option lopts[] = {
        {"bootstatus",          no_argument, NULL, 'b'},
        {"disable",             no_argument, NULL, 'd'},
@@ -31,6 +31,8 @@ static const struct option lopts[] = {
        {"pretimeout",    required_argument, NULL, 'n'},
        {"getpretimeout",       no_argument, NULL, 'N'},
        {"gettimeleft",         no_argument, NULL, 'L'},
+       {"file",          required_argument, NULL, 'f'},
+       {"info",                no_argument, NULL, 'i'},
        {NULL,                  no_argument, NULL, 0x0}
 };
 
@@ -69,16 +71,20 @@ static void term(int sig)
 static void usage(char *progname)
 {
        printf("Usage: %s [options]\n", progname);
-       printf(" -b, --bootstatus    Get last boot status (Watchdog/POR)\n");
-       printf(" -d, --disable       Turn off the watchdog timer\n");
-       printf(" -e, --enable        Turn on the watchdog timer\n");
-       printf(" -h, --help          Print the help message\n");
-       printf(" -p, --pingrate=P    Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE);
-       printf(" -t, --timeout=T     Set timeout to T seconds\n");
-       printf(" -T, --gettimeout    Get the timeout\n");
-       printf(" -n, --pretimeout=T  Set the pretimeout to T seconds\n");
-       printf(" -N, --getpretimeout Get the pretimeout\n");
-       printf(" -L, --gettimeleft   Get the time left until timer expires\n");
+       printf(" -f, --file\t\tOpen watchdog device file\n");
+       printf("\t\t\tDefault is /dev/watchdog\n");
+       printf(" -i, --info\t\tShow watchdog_info\n");
+       printf(" -b, --bootstatus\tGet last boot status (Watchdog/POR)\n");
+       printf(" -d, --disable\t\tTurn off the watchdog timer\n");
+       printf(" -e, --enable\t\tTurn on the watchdog timer\n");
+       printf(" -h, --help\t\tPrint the help message\n");
+       printf(" -p, --pingrate=P\tSet ping rate to P seconds (default %d)\n",
+              DEFAULT_PING_RATE);
+       printf(" -t, --timeout=T\tSet timeout to T seconds\n");
+       printf(" -T, --gettimeout\tGet the timeout\n");
+       printf(" -n, --pretimeout=T\tSet the pretimeout to T seconds\n");
+       printf(" -N, --getpretimeout\tGet the pretimeout\n");
+       printf(" -L, --gettimeleft\tGet the time left until timer expires\n");
        printf("\n");
        printf("Parameters are parsed left-to-right in real-time.\n");
        printf("Example: %s -d -t 10 -p 5 -e\n", progname);
@@ -92,14 +98,21 @@ int main(int argc, char *argv[])
        int ret;
        int c;
        int oneshot = 0;
+       char *file = "/dev/watchdog";
+       struct watchdog_info info;
 
        setbuf(stdout, NULL);
 
-       fd = open("/dev/watchdog", O_WRONLY);
+       while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) {
+               if (c == 'f')
+                       file = optarg;
+       }
+
+       fd = open(file, O_WRONLY);
 
        if (fd == -1) {
                if (errno == ENOENT)
-                       printf("Watchdog device not enabled.\n");
+                       printf("Watchdog device (%s) not found.\n", file);
                else if (errno == EACCES)
                        printf("Run watchdog as root.\n");
                else
@@ -108,6 +121,18 @@ int main(int argc, char *argv[])
                exit(-1);
        }
 
+       /*
+        * Validate that `file` is a watchdog device
+        */
+       ret = ioctl(fd, WDIOC_GETSUPPORT, &info);
+       if (ret) {
+               printf("WDIOC_GETSUPPORT error '%s'\n", strerror(errno));
+               close(fd);
+               exit(ret);
+       }
+
+       optind = 0;
+
        while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) {
                switch (c) {
                case 'b':
@@ -190,6 +215,21 @@ int main(int argc, char *argv[])
                        else
                                printf("WDIOC_GETTIMELEFT error '%s'\n", strerror(errno));
                        break;
+               case 'f':
+                       /* Handled above */
+                       break;
+               case 'i':
+                       /*
+                        * watchdog_info was obtained as part of file open
+                        * validation. So we just show it here.
+                        */
+                       oneshot = 1;
+                       printf("watchdog_info:\n");
+                       printf(" identity:\t\t%s\n", info.identity);
+                       printf(" firmware_version:\t%u\n",
+                              info.firmware_version);
+                       printf(" options:\t\t%08x\n", info.options);
+                       break;
 
                default:
                        usage(argv[0]);
index 051d7d3..927a151 100644 (file)
@@ -69,7 +69,7 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev)
        FILE *fd = NULL;
        struct udev_device *plat;
        const char *speed;
-       int ret = 0;
+       size_t ret;
 
        plat = udev_device_get_parent(sdev);
        path = udev_device_get_syspath(plat);
@@ -79,8 +79,10 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev)
        if (!fd)
                return -1;
        ret = fread((char *) &descr, sizeof(descr), 1, fd);
-       if (ret < 0)
+       if (ret != 1) {
+               err("Cannot read vudc device descr file: %s", strerror(errno));
                goto err;
+       }
        fclose(fd);
 
        copy_descr_attr(dev, &descr, bDeviceClass);
diff --git a/tools/virtio/crypto/hash.h b/tools/virtio/crypto/hash.h
new file mode 100644 (file)
index 0000000..e69de29
index f91aeb5..8f41cd6 100644 (file)
@@ -29,4 +29,6 @@ enum dma_data_direction {
 #define dma_unmap_single(...) do { } while (0)
 #define dma_unmap_page(...) do { } while (0)
 
+#define dma_max_mapping_size(...) SIZE_MAX
+
 #endif
diff --git a/tools/virtio/xen/xen.h b/tools/virtio/xen/xen.h
new file mode 100644 (file)
index 0000000..f569387
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef XEN_XEN_STUB_H
+#define XEN_XEN_STUB_H
+
+#define xen_domain() 0
+
+#endif
index 6a89eb0..e6f7cb2 100644 (file)
@@ -11,6 +11,9 @@ datafile_y = initramfs_data.cpio$(suffix_y)
 datafile_d_y = .$(datafile_y).d
 AFLAGS_initramfs_data.o += -DINITRAMFS_IMAGE="usr/$(datafile_y)"
 
+# clean rules do not have CONFIG_INITRAMFS_COMPRESSION.  So clean up after all
+# possible compression formats.
+clean-files += initramfs_data.cpio*
 
 # Generate builtin.o based on initramfs_data.o
 obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data.o
index 05c71ef..57b20f7 100644 (file)
@@ -29,16 +29,13 @@ header-test- += linux/android/binderfs.h
 header-test-$(CONFIG_CPU_BIG_ENDIAN) += linux/byteorder/big_endian.h
 header-test-$(CONFIG_CPU_LITTLE_ENDIAN) += linux/byteorder/little_endian.h
 header-test- += linux/coda.h
-header-test- += linux/coda_psdev.h
 header-test- += linux/elfcore.h
 header-test- += linux/errqueue.h
 header-test- += linux/fsmap.h
 header-test- += linux/hdlc/ioctl.h
 header-test- += linux/ivtv.h
-header-test- += linux/jffs2.h
 header-test- += linux/kexec.h
 header-test- += linux/matroxfb.h
-header-test- += linux/netfilter_bridge/ebtables.h
 header-test- += linux/netfilter_ipv4/ipt_LOG.h
 header-test- += linux/netfilter_ipv6/ip6t_LOG.h
 header-test- += linux/nfc.h
@@ -56,20 +53,12 @@ header-test- += linux/v4l2-mediabus.h
 header-test- += linux/v4l2-subdev.h
 header-test- += linux/videodev2.h
 header-test- += linux/vm_sockets.h
-header-test- += scsi/scsi_bsg_fc.h
-header-test- += scsi/scsi_netlink.h
-header-test- += scsi/scsi_netlink_fc.h
 header-test- += sound/asequencer.h
 header-test- += sound/asoc.h
 header-test- += sound/asound.h
 header-test- += sound/compress_offload.h
 header-test- += sound/emu10k1.h
 header-test- += sound/sfnt_info.h
-header-test- += sound/sof/eq.h
-header-test- += sound/sof/fw.h
-header-test- += sound/sof/header.h
-header-test- += sound/sof/manifest.h
-header-test- += sound/sof/trace.h
 header-test- += xen/evtchn.h
 header-test- += xen/gntdev.h
 header-test- += xen/privcmd.h
index 362a018..8731dfe 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/perf_event.h>
+#include <linux/perf/arm_pmu.h>
 #include <linux/uaccess.h>
 #include <asm/kvm_emulate.h>
 #include <kvm/arm_pmu.h>
@@ -146,8 +147,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
        if (kvm_pmu_pmc_is_chained(pmc) &&
            kvm_pmu_idx_is_high_counter(select_idx))
                counter = upper_32_bits(counter);
-
-       else if (!kvm_pmu_idx_is_64bit(vcpu, select_idx))
+       else if (select_idx != ARMV8_PMU_CYCLE_IDX)
                counter = lower_32_bits(counter);
 
        return counter;
@@ -193,7 +193,7 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
  */
 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
 {
-       u64 counter, reg;
+       u64 counter, reg, val;
 
        pmc = kvm_pmu_get_canonical_pmc(pmc);
        if (!pmc->perf_event)
@@ -201,16 +201,19 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
 
        counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
 
-       if (kvm_pmu_pmc_is_chained(pmc)) {
-               reg = PMEVCNTR0_EL0 + pmc->idx;
-               __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
-               __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
+       if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
+               reg = PMCCNTR_EL0;
+               val = counter;
        } else {
-               reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
-                      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
-               __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
+               reg = PMEVCNTR0_EL0 + pmc->idx;
+               val = lower_32_bits(counter);
        }
 
+       __vcpu_sys_reg(vcpu, reg) = val;
+
+       if (kvm_pmu_pmc_is_chained(pmc))
+               __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
+
        kvm_pmu_release_perf_event(pmc);
 }
 
@@ -440,8 +443,25 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
                                  struct pt_regs *regs)
 {
        struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+       struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
        struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
        int idx = pmc->idx;
+       u64 period;
+
+       cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
+
+       /*
+        * Reset the sample period to the architectural limit,
+        * i.e. the point where the counter overflows.
+        */
+       period = -(local64_read(&perf_event->count));
+
+       if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
+               period &= GENMASK(31, 0);
+
+       local64_set(&perf_event->hw.period_left, 0);
+       perf_event->attr.sample_period = period;
+       perf_event->hw.sample_period = period;
 
        __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
 
@@ -449,6 +469,8 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
                kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
                kvm_vcpu_kick(vcpu);
        }
+
+       cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
 }
 
 /**
@@ -567,12 +589,12 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
                 * high counter.
                 */
                attr.sample_period = (-counter) & GENMASK(63, 0);
+               if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1))
+                       attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
+
                event = perf_event_create_kernel_counter(&attr, -1, current,
                                                         kvm_pmu_perf_overflow,
                                                         pmc + 1);
-
-               if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1))
-                       attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
        } else {
                /* The initial sample period (overflow count) of an event. */
                if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
index 55fed77..4fd4f6d 100644 (file)
@@ -30,7 +30,7 @@ TRACE_EVENT(vgic_update_irq_pending,
 #endif /* _TRACE_VGIC_H */
 
 #undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm/vgic
+#define TRACE_INCLUDE_PATH ../../virt/kvm/arm/vgic
 #undef TRACE_INCLUDE_FILE
 #define TRACE_INCLUDE_FILE trace
 
index e6de315..d6f0696 100644 (file)
@@ -617,8 +617,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
 
                stat_data->kvm = kvm;
                stat_data->offset = p->offset;
+               stat_data->mode = p->mode ? p->mode : 0644;
                kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
-               debugfs_create_file(p->name, 0644, kvm->debugfs_dentry,
+               debugfs_create_file(p->name, stat_data->mode, kvm->debugfs_dentry,
                                    stat_data, stat_fops_per_vm[p->kind]);
        }
        return 0;
@@ -626,8 +627,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
 
 static struct kvm *kvm_create_vm(unsigned long type)
 {
-       int r, i;
        struct kvm *kvm = kvm_arch_alloc_vm();
+       int r = -ENOMEM;
+       int i;
 
        if (!kvm)
                return ERR_PTR(-ENOMEM);
@@ -639,44 +641,45 @@ static struct kvm *kvm_create_vm(unsigned long type)
        mutex_init(&kvm->lock);
        mutex_init(&kvm->irq_lock);
        mutex_init(&kvm->slots_lock);
-       refcount_set(&kvm->users_count, 1);
        INIT_LIST_HEAD(&kvm->devices);
 
-       r = kvm_arch_init_vm(kvm, type);
-       if (r)
-               goto out_err_no_disable;
-
-       r = hardware_enable_all();
-       if (r)
-               goto out_err_no_disable;
-
-#ifdef CONFIG_HAVE_KVM_IRQFD
-       INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
-#endif
-
        BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
 
-       r = -ENOMEM;
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
                struct kvm_memslots *slots = kvm_alloc_memslots();
+
                if (!slots)
-                       goto out_err_no_srcu;
+                       goto out_err_no_arch_destroy_vm;
                /* Generations must be different for each address space. */
                slots->generation = i;
                rcu_assign_pointer(kvm->memslots[i], slots);
        }
 
-       if (init_srcu_struct(&kvm->srcu))
-               goto out_err_no_srcu;
-       if (init_srcu_struct(&kvm->irq_srcu))
-               goto out_err_no_irq_srcu;
        for (i = 0; i < KVM_NR_BUSES; i++) {
                rcu_assign_pointer(kvm->buses[i],
                        kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT));
                if (!kvm->buses[i])
-                       goto out_err;
+                       goto out_err_no_arch_destroy_vm;
        }
 
+       refcount_set(&kvm->users_count, 1);
+       r = kvm_arch_init_vm(kvm, type);
+       if (r)
+               goto out_err_no_arch_destroy_vm;
+
+       r = hardware_enable_all();
+       if (r)
+               goto out_err_no_disable;
+
+#ifdef CONFIG_HAVE_KVM_IRQFD
+       INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
+#endif
+
+       if (init_srcu_struct(&kvm->srcu))
+               goto out_err_no_srcu;
+       if (init_srcu_struct(&kvm->irq_srcu))
+               goto out_err_no_irq_srcu;
+
        r = kvm_init_mmu_notifier(kvm);
        if (r)
                goto out_err;
@@ -696,7 +699,9 @@ out_err_no_irq_srcu:
 out_err_no_srcu:
        hardware_disable_all();
 out_err_no_disable:
-       refcount_set(&kvm->users_count, 0);
+       kvm_arch_destroy_vm(kvm);
+       WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
+out_err_no_arch_destroy_vm:
        for (i = 0; i < KVM_NR_BUSES; i++)
                kfree(kvm_get_bus(kvm, i));
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
@@ -2359,20 +2364,23 @@ out:
        kvm_arch_vcpu_unblocking(vcpu);
        block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
 
-       if (!vcpu_valid_wakeup(vcpu))
-               shrink_halt_poll_ns(vcpu);
-       else if (halt_poll_ns) {
-               if (block_ns <= vcpu->halt_poll_ns)
-                       ;
-               /* we had a long block, shrink polling */
-               else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+       if (!kvm_arch_no_poll(vcpu)) {
+               if (!vcpu_valid_wakeup(vcpu)) {
                        shrink_halt_poll_ns(vcpu);
-               /* we had a short halt and our poll time is too small */
-               else if (vcpu->halt_poll_ns < halt_poll_ns &&
-                       block_ns < halt_poll_ns)
-                       grow_halt_poll_ns(vcpu);
-       } else
-               vcpu->halt_poll_ns = 0;
+               } else if (halt_poll_ns) {
+                       if (block_ns <= vcpu->halt_poll_ns)
+                               ;
+                       /* we had a long block, shrink polling */
+                       else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+                               shrink_halt_poll_ns(vcpu);
+                       /* we had a short halt and our poll time is too small */
+                       else if (vcpu->halt_poll_ns < halt_poll_ns &&
+                               block_ns < halt_poll_ns)
+                               grow_halt_poll_ns(vcpu);
+               } else {
+                       vcpu->halt_poll_ns = 0;
+               }
+       }
 
        trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
        kvm_arch_vcpu_block_finish(vcpu);
@@ -3929,7 +3937,9 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
        if (!refcount_inc_not_zero(&stat_data->kvm->users_count))
                return -ENOENT;
 
-       if (simple_attr_open(inode, file, get, set, fmt)) {
+       if (simple_attr_open(inode, file, get,
+                            stat_data->mode & S_IWUGO ? set : NULL,
+                            fmt)) {
                kvm_put_kvm(stat_data->kvm);
                return -ENOMEM;
        }
@@ -4177,7 +4187,8 @@ static void kvm_init_debug(void)
 
        kvm_debugfs_num_entries = 0;
        for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
-               debugfs_create_file(p->name, 0644, kvm_debugfs_dir,
+               int mode = p->mode ? p->mode : 0644;
+               debugfs_create_file(p->name, mode, kvm_debugfs_dir,
                                    (void *)(long)p->offset,
                                    stat_fops[p->kind]);
        }